toil 8.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +4 -4
- toil/batchSystems/options.py +1 -0
- toil/batchSystems/slurm.py +227 -83
- toil/common.py +161 -45
- toil/cwl/cwltoil.py +31 -10
- toil/job.py +47 -38
- toil/jobStores/aws/jobStore.py +46 -10
- toil/lib/aws/session.py +14 -3
- toil/lib/aws/utils.py +92 -35
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2nodes.py +3 -2
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/io.py +22 -1
- toil/lib/misc.py +18 -0
- toil/lib/retry.py +10 -10
- toil/lib/{integration.py → trs.py} +95 -46
- toil/lib/web.py +38 -0
- toil/options/common.py +17 -2
- toil/options/cwl.py +10 -0
- toil/provisioners/gceProvisioner.py +4 -4
- toil/server/cli/wes_cwl_runner.py +3 -3
- toil/server/utils.py +2 -3
- toil/statsAndLogging.py +35 -1
- toil/test/batchSystems/test_slurm.py +172 -2
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +105 -2
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_trs.py +161 -0
- toil/test/wdl/wdltoil_test.py +1 -1
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +23 -9
- toil/worker.py +113 -33
- {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/METADATA +9 -4
- {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/RECORD +40 -34
- {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil/test/lib/test_integration.py +0 -104
- {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/lib/io.py
CHANGED
|
@@ -11,8 +11,29 @@ from contextlib import contextmanager
|
|
|
11
11
|
from io import BytesIO
|
|
12
12
|
from typing import IO, Any, Callable, Optional, Protocol, Union
|
|
13
13
|
|
|
14
|
+
from toil.lib.memoize import memoize
|
|
15
|
+
|
|
14
16
|
logger = logging.getLogger(__name__)
|
|
15
17
|
|
|
18
|
+
@memoize
|
|
19
|
+
def get_toil_home() -> str:
|
|
20
|
+
"""
|
|
21
|
+
Get the Toil home directory for storing configuration and global state.
|
|
22
|
+
|
|
23
|
+
Raises an error if it does not exist and cannot be created. Safe to run
|
|
24
|
+
simultaneously in multiple processes.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# TODO: should this use an XDG config directory or ~/.config to not clutter the
|
|
28
|
+
# base home directory?
|
|
29
|
+
toil_home_dir = os.path.join(os.path.expanduser("~"), ".toil")
|
|
30
|
+
|
|
31
|
+
dir_path = try_path(toil_home_dir)
|
|
32
|
+
if dir_path is None:
|
|
33
|
+
raise RuntimeError(
|
|
34
|
+
f"Cannot create or access Toil configuration directory {toil_home_dir}"
|
|
35
|
+
)
|
|
36
|
+
return dir_path
|
|
16
37
|
|
|
17
38
|
TOIL_URI_SCHEME = "toilfile:"
|
|
18
39
|
|
|
@@ -177,7 +198,7 @@ def atomic_install(tmp_path, final_path) -> None:
|
|
|
177
198
|
def AtomicFileCreate(final_path: str, keep: bool = False) -> Iterator[str]:
|
|
178
199
|
"""Context manager to create a temporary file. Entering returns path to
|
|
179
200
|
the temporary file in the same directory as finalPath. If the code in
|
|
180
|
-
context succeeds, the file renamed to its
|
|
201
|
+
context succeeds, the file renamed to its actual name. If an error
|
|
181
202
|
occurs, the file is not installed and is removed unless keep is specified.
|
|
182
203
|
"""
|
|
183
204
|
tmp_path = atomic_tmp_file(final_path)
|
toil/lib/misc.py
CHANGED
|
@@ -63,6 +63,24 @@ def unix_now_ms() -> float:
|
|
|
63
63
|
"""Return the current time in milliseconds since the Unix epoch."""
|
|
64
64
|
return time.time() * 1000
|
|
65
65
|
|
|
66
|
+
def unix_seconds_to_timestamp(timestamp: float) -> str:
|
|
67
|
+
"""
|
|
68
|
+
Convert a time in seconds since the Unix epoch to an ISO 8601 string.
|
|
69
|
+
"""
|
|
70
|
+
return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
|
71
|
+
|
|
72
|
+
def unix_seconds_to_local_time(timestamp: float) -> datetime.datetime:
|
|
73
|
+
"""
|
|
74
|
+
Returns a local time corresponding to the given number of seconds since the Unix epoch.
|
|
75
|
+
"""
|
|
76
|
+
return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).astimezone()
|
|
77
|
+
|
|
78
|
+
def seconds_to_duration(time_difference: float) -> str:
|
|
79
|
+
"""
|
|
80
|
+
Convert a time difference in seconds to an ISO 8601 duration string.
|
|
81
|
+
"""
|
|
82
|
+
return f"PT{time_difference:.3f}S"
|
|
83
|
+
|
|
66
84
|
|
|
67
85
|
def slow_down(seconds: float) -> float:
|
|
68
86
|
"""
|
toil/lib/retry.py
CHANGED
|
@@ -24,21 +24,21 @@ objects wrapping Exceptions to include additional conditions.
|
|
|
24
24
|
|
|
25
25
|
For example, retrying on a one Exception (HTTPError)::
|
|
26
26
|
|
|
27
|
-
from
|
|
27
|
+
from toil.lib.web import web_session
|
|
28
28
|
from requests.exceptions import HTTPError
|
|
29
29
|
|
|
30
30
|
@retry(errors=[HTTPError])
|
|
31
31
|
def update_my_wallpaper():
|
|
32
|
-
return get('https://www.deviantart.com/')
|
|
32
|
+
return web_session.get('https://www.deviantart.com/')
|
|
33
33
|
|
|
34
34
|
Or::
|
|
35
35
|
|
|
36
|
-
from
|
|
36
|
+
from toil.lib.web import web_session
|
|
37
37
|
from requests.exceptions import HTTPError
|
|
38
38
|
|
|
39
39
|
@retry(errors=[HTTPError, ValueError])
|
|
40
40
|
def update_my_wallpaper():
|
|
41
|
-
return get('https://www.deviantart.com/')
|
|
41
|
+
return web_session.get('https://www.deviantart.com/')
|
|
42
42
|
|
|
43
43
|
The examples above will retry for the default interval on any errors specified
|
|
44
44
|
the "errors=" arg list.
|
|
@@ -46,7 +46,7 @@ the "errors=" arg list.
|
|
|
46
46
|
To retry on specifically 500/502/503/504 errors, you could specify an ErrorCondition
|
|
47
47
|
object instead, for example::
|
|
48
48
|
|
|
49
|
-
from
|
|
49
|
+
from toil.lib.web import web_session
|
|
50
50
|
from requests.exceptions import HTTPError
|
|
51
51
|
|
|
52
52
|
@retry(errors=[
|
|
@@ -55,11 +55,11 @@ object instead, for example::
|
|
|
55
55
|
error_codes=[500, 502, 503, 504]
|
|
56
56
|
)])
|
|
57
57
|
def update_my_wallpaper():
|
|
58
|
-
return
|
|
58
|
+
return web_session.get('https://www.deviantart.com/')
|
|
59
59
|
|
|
60
60
|
To retry on specifically errors containing the phrase "NotFound"::
|
|
61
61
|
|
|
62
|
-
from
|
|
62
|
+
from toil.lib.web import web_session
|
|
63
63
|
from requests.exceptions import HTTPError
|
|
64
64
|
|
|
65
65
|
@retry(errors=[
|
|
@@ -68,11 +68,11 @@ To retry on specifically errors containing the phrase "NotFound"::
|
|
|
68
68
|
error_message_must_include="NotFound"
|
|
69
69
|
)])
|
|
70
70
|
def update_my_wallpaper():
|
|
71
|
-
return
|
|
71
|
+
return web_session.get('https://www.deviantart.com/')
|
|
72
72
|
|
|
73
73
|
To retry on all HTTPError errors EXCEPT an HTTPError containing the phrase "NotFound"::
|
|
74
74
|
|
|
75
|
-
from
|
|
75
|
+
from toil.lib.web import web_session
|
|
76
76
|
from requests.exceptions import HTTPError
|
|
77
77
|
|
|
78
78
|
@retry(errors=[
|
|
@@ -83,7 +83,7 @@ To retry on all HTTPError errors EXCEPT an HTTPError containing the phrase "NotF
|
|
|
83
83
|
retry_on_this_condition=False
|
|
84
84
|
)])
|
|
85
85
|
def update_my_wallpaper():
|
|
86
|
-
return
|
|
86
|
+
return web_session.get('https://www.deviantart.com/')
|
|
87
87
|
|
|
88
88
|
To retry on boto3's specific status errors, an example of the implementation is::
|
|
89
89
|
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
"""
|
|
16
|
-
Contains functions for integrating Toil with
|
|
17
|
-
|
|
16
|
+
Contains functions for integrating Toil with GA4GH Tool Registry Service
|
|
17
|
+
servers, for fetching workflows.
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
20
|
import hashlib
|
|
@@ -24,35 +24,29 @@ import shutil
|
|
|
24
24
|
import sys
|
|
25
25
|
import tempfile
|
|
26
26
|
import zipfile
|
|
27
|
-
from typing import Any,
|
|
27
|
+
from typing import Any, Literal, Optional, Union, TypedDict, cast
|
|
28
28
|
|
|
29
29
|
from urllib.parse import urlparse, unquote, quote
|
|
30
30
|
import requests
|
|
31
31
|
|
|
32
32
|
from toil.lib.retry import retry
|
|
33
33
|
from toil.lib.io import file_digest, robust_rmtree
|
|
34
|
-
from toil.
|
|
34
|
+
from toil.lib.web import web_session
|
|
35
35
|
|
|
36
36
|
logger = logging.getLogger(__name__)
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
# doing cookies, and to send a sensible user agent.
|
|
40
|
-
# We expect the Toil and Python version to not be personally identifiable even
|
|
41
|
-
# in theory (someone might make a new Toil version first, buit there's no way
|
|
42
|
-
# to know for sure that nobody else did the same thing).
|
|
43
|
-
session = requests.Session()
|
|
44
|
-
session.headers.update({"User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"})
|
|
38
|
+
TRS_ROOT = "https://dockstore.org" if "TOIL_TRS_ROOT" not in os.environ else os.environ["TOIL_TRS_ROOT"]
|
|
45
39
|
|
|
46
|
-
def
|
|
40
|
+
def is_trs_workflow(workflow: str) -> bool:
|
|
47
41
|
"""
|
|
48
|
-
Returns True if a workflow string smells
|
|
42
|
+
Returns True if a workflow string smells like TRS.
|
|
49
43
|
|
|
50
44
|
Detects Dockstore page URLs and strings that could be Dockstore TRS IDs.
|
|
51
45
|
"""
|
|
52
46
|
|
|
53
|
-
return workflow.startswith("
|
|
47
|
+
return workflow.startswith(f"{TRS_ROOT}/workflows/") or workflow.startswith(f"{TRS_ROOT}/my-workflows/") or workflow.startswith("#workflow/")
|
|
54
48
|
|
|
55
|
-
def
|
|
49
|
+
def extract_trs_spec(workflow: str) -> str:
|
|
56
50
|
"""
|
|
57
51
|
Parse a Dockstore workflow URL or TSR ID to a string that is definitely a TRS ID.
|
|
58
52
|
"""
|
|
@@ -63,13 +57,16 @@ def find_trs_spec(workflow: str) -> str:
|
|
|
63
57
|
logger.debug("Workflow %s is a TRS specifier already", workflow)
|
|
64
58
|
trs_spec = workflow
|
|
65
59
|
else:
|
|
66
|
-
# We need to get the right TRS ID from the
|
|
60
|
+
# We need to get the right TRS ID from the Dockstore URL
|
|
67
61
|
parsed = urlparse(workflow)
|
|
68
|
-
# TODO: We assume the
|
|
62
|
+
# TODO: We assume the Dockstore page URL structure and the TRS IDs are basically the same.
|
|
69
63
|
page_path = unquote(parsed.path)
|
|
70
|
-
if
|
|
64
|
+
if page_path.startswith("/workflows/"):
|
|
65
|
+
trs_spec = "#workflow/" + page_path[len("/workflows/"):]
|
|
66
|
+
elif page_path.startswith("/my-workflows/"):
|
|
67
|
+
trs_spec = "#workflow/" + page_path[len("/my-workflows/"):]
|
|
68
|
+
else:
|
|
71
69
|
raise RuntimeError("Cannot parse Dockstore URL " + workflow)
|
|
72
|
-
trs_spec = "#workflow/" + page_path[len("/workflows/"):]
|
|
73
70
|
logger.debug("Translated %s to TRS: %s", workflow, trs_spec)
|
|
74
71
|
|
|
75
72
|
return trs_spec
|
|
@@ -88,27 +85,36 @@ def parse_trs_spec(trs_spec: str) -> tuple[str, Optional[str]]:
|
|
|
88
85
|
trs_version = None
|
|
89
86
|
return trs_workflow_id, trs_version
|
|
90
87
|
|
|
88
|
+
def compose_trs_spec(trs_workflow_id: str, trs_version: str) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Compose a TRS ID from a workflow ID and version ID.
|
|
91
|
+
"""
|
|
92
|
+
return f"{trs_workflow_id}:{trs_version}"
|
|
93
|
+
|
|
91
94
|
@retry(errors=[requests.exceptions.ConnectionError])
|
|
92
|
-
def
|
|
95
|
+
def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> tuple[str, str, str]:
|
|
93
96
|
"""
|
|
94
|
-
Given a Dockstore URL or TRS identifier, get the root WDL or CWL URL for the workflow.
|
|
97
|
+
Given a Dockstore URL or TRS identifier, get the root WDL or CWL URL for the workflow, along with the TRS workflow ID and version.
|
|
95
98
|
|
|
96
99
|
Accepts inputs like:
|
|
97
100
|
|
|
98
101
|
- https://dockstore.org/workflows/github.com/dockstore-testing/md5sum-checker:master?tab=info
|
|
99
102
|
- #workflow/github.com/dockstore-testing/md5sum-checker
|
|
100
103
|
|
|
101
|
-
Assumes the input is actually one of the supported formats. See
|
|
104
|
+
Assumes the input is actually one of the supported formats. See is_trs_workflow().
|
|
102
105
|
|
|
103
106
|
TODO: Needs to handle multi-workflow files if Dockstore can.
|
|
104
107
|
|
|
108
|
+
:raises FileNotFoundError: if the workflow or version doesn't exist.
|
|
109
|
+
:raises ValueError: if the version is not specified but cannot be
|
|
110
|
+
automatically determined.
|
|
105
111
|
"""
|
|
106
112
|
|
|
107
113
|
if supported_languages is not None and len(supported_languages) == 0:
|
|
108
114
|
raise ValueError("Set of supported languages must be nonempty if provided.")
|
|
109
115
|
|
|
110
116
|
# Get the TRS id[:version] string from what might be a Dockstore URL
|
|
111
|
-
trs_spec =
|
|
117
|
+
trs_spec = extract_trs_spec(workflow)
|
|
112
118
|
# Parse out workflow and possible version
|
|
113
119
|
trs_workflow_id, trs_version = parse_trs_spec(trs_spec)
|
|
114
120
|
|
|
@@ -116,8 +122,14 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
116
122
|
|
|
117
123
|
# Fetch the main TRS document.
|
|
118
124
|
# See e.g. https://dockstore.org/api/ga4gh/trs/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker
|
|
119
|
-
trs_workflow_url = f"
|
|
120
|
-
|
|
125
|
+
trs_workflow_url = f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}"
|
|
126
|
+
logger.debug("Get versions: %s", trs_workflow_url)
|
|
127
|
+
trs_workflow_response = web_session.get(trs_workflow_url)
|
|
128
|
+
if trs_workflow_response.status_code in (400, 404):
|
|
129
|
+
# If the workflow ID isn't in Dockstore's accepted format (and also thus doesn't exist), we can get a 400
|
|
130
|
+
raise FileNotFoundError(f"Workflow {trs_workflow_id} does not exist.")
|
|
131
|
+
trs_workflow_response.raise_for_status()
|
|
132
|
+
trs_workflow_document = trs_workflow_response.json()
|
|
121
133
|
|
|
122
134
|
# Make a map from version to version info. We will need the
|
|
123
135
|
# "descriptor_type" array to find eligible languages, and the "url" field
|
|
@@ -146,12 +158,10 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
146
158
|
continue
|
|
147
159
|
eligible_workflow_versions.add(version_name)
|
|
148
160
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
logger.debug("Defaulting to workflow version %s", default_version)
|
|
154
|
-
break
|
|
161
|
+
# TODO: Dockstore has a concept of a "default version", but doesn't expose
|
|
162
|
+
# it over TRS. To avoid defaulting to something that *isn't* the Dockstore
|
|
163
|
+
# default version, we refuse to choose a version when there are multiple
|
|
164
|
+
# possibilities.
|
|
155
165
|
|
|
156
166
|
if trs_version is None and len(eligible_workflow_versions) == 1:
|
|
157
167
|
# If there's just one version use that.
|
|
@@ -161,26 +171,31 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
161
171
|
|
|
162
172
|
# If we don't like what we found we compose a useful error message.
|
|
163
173
|
problems: list[str] = []
|
|
174
|
+
problem_type: type[Exception] = RuntimeError
|
|
164
175
|
if trs_version is None:
|
|
165
176
|
problems.append(f"Workflow {workflow} does not specify a version")
|
|
177
|
+
problem_type = ValueError
|
|
166
178
|
elif trs_version not in workflow_versions:
|
|
167
179
|
problems.append(f"Workflow version {trs_version} from {workflow} does not exist")
|
|
180
|
+
problem_type = FileNotFoundError
|
|
168
181
|
elif trs_version not in eligible_workflow_versions:
|
|
169
182
|
message = f"Workflow version {trs_version} from {workflow} is not available"
|
|
170
183
|
if supported_languages is not None:
|
|
171
184
|
message += f" in any of: {', '.join(supported_languages)}"
|
|
172
185
|
problems.append(message)
|
|
186
|
+
problem_type = FileNotFoundError
|
|
173
187
|
if len(problems) > 0:
|
|
174
188
|
if len(eligible_workflow_versions) == 0:
|
|
175
189
|
message = "No versions of the workflow are available"
|
|
176
190
|
if supported_languages is not None:
|
|
177
191
|
message += f" in any of: {', '.join(supported_languages)}"
|
|
178
192
|
problems.append(message)
|
|
193
|
+
problem_type = FileNotFoundError
|
|
179
194
|
elif trs_version is None:
|
|
180
195
|
problems.append(f"Add ':' and the name of a workflow version ({', '.join(eligible_workflow_versions)}) after '{trs_workflow_id}'")
|
|
181
196
|
else:
|
|
182
197
|
problems.append(f"Replace '{trs_version}' with one of ({', '.join(eligible_workflow_versions)})")
|
|
183
|
-
raise
|
|
198
|
+
raise problem_type("; ".join(problems))
|
|
184
199
|
|
|
185
200
|
# Tell MyPy we now have a version, or we would have raised
|
|
186
201
|
assert trs_version is not None
|
|
@@ -192,12 +207,35 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
192
207
|
language = candidate_language
|
|
193
208
|
|
|
194
209
|
logger.debug("Going to use %s version %s in %s", trs_workflow_id, trs_version, language)
|
|
195
|
-
|
|
210
|
+
|
|
211
|
+
return trs_workflow_id, trs_version, language
|
|
212
|
+
|
|
213
|
+
@retry(errors=[requests.exceptions.ConnectionError])
|
|
214
|
+
def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str:
|
|
215
|
+
"""
|
|
216
|
+
Returns a URL or local path to a workflow's primary descriptor file.
|
|
217
|
+
|
|
218
|
+
The file will be in context with its required files so it can actually run.
|
|
219
|
+
|
|
220
|
+
:raises FileNotFoundError: if the workflow or version doesn't exist.
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
# TODO: We should probably use HATEOAS and pull this from the worflow
|
|
224
|
+
# document we probably already fetched but aren't passing.
|
|
225
|
+
trs_version_url = f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}/versions/{quote(trs_version, safe='')}"
|
|
196
226
|
|
|
197
227
|
# Fetch the list of all the files
|
|
198
228
|
trs_files_url = f"{trs_version_url}/{language}/files"
|
|
199
229
|
logger.debug("Workflow files URL: %s", trs_files_url)
|
|
200
|
-
|
|
230
|
+
trs_files_response = web_session.get(trs_files_url)
|
|
231
|
+
if trs_files_response.status_code in (204, 400, 404):
|
|
232
|
+
# We can get a 204 No Content response if the version doesn't exist.
|
|
233
|
+
# That's successful, so we need to handle it specifically. See
|
|
234
|
+
# <https://github.com/dockstore/dockstore/issues/6048>
|
|
235
|
+
# We can also get a 400 if the workflow ID is not in Dockstore's expected format (3 slash-separated segments).
|
|
236
|
+
raise FileNotFoundError(f"Workflow {trs_workflow_id} version {trs_version} in language {language} does not exist.")
|
|
237
|
+
trs_files_response.raise_for_status()
|
|
238
|
+
trs_files_document = trs_files_response.json()
|
|
201
239
|
|
|
202
240
|
# Find the information we need to ID the primary descriptor file
|
|
203
241
|
primary_descriptor_path: Optional[str] = None
|
|
@@ -210,7 +248,7 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
210
248
|
primary_descriptor_hash = file_info["checksum"]["checksum"]
|
|
211
249
|
break
|
|
212
250
|
if primary_descriptor_path is None or primary_descriptor_hash is None or primary_descriptor_hash_algorithm is None:
|
|
213
|
-
raise RuntimeError("Could not find a primary descriptor file for
|
|
251
|
+
raise RuntimeError(f"Could not find a primary descriptor file for workflow {trs_workflow_id} version {trs_version} in language {language}")
|
|
214
252
|
primary_descriptor_basename = os.path.basename(primary_descriptor_path)
|
|
215
253
|
|
|
216
254
|
# Work out how to compute the hash we are looking for. See
|
|
@@ -263,7 +301,7 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
263
301
|
}
|
|
264
302
|
# If we don't set stream=True, we can't actually read anything from the
|
|
265
303
|
# raw stream, since Requests will have done it already.
|
|
266
|
-
with
|
|
304
|
+
with web_session.get(trs_zip_file_url, headers=headers, stream=True) as response:
|
|
267
305
|
response_content_length = response.headers.get("Content-Length")
|
|
268
306
|
logger.debug("Server reports content length: %s", response_content_length)
|
|
269
307
|
shutil.copyfileobj(response.raw, zip_file)
|
|
@@ -308,27 +346,38 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
|
|
|
308
346
|
logger.debug("Rejected %s because its %s hash %s is not %s", file_path, python_hash_name, file_hash, primary_descriptor_hash)
|
|
309
347
|
if found_path is None:
|
|
310
348
|
# We couldn't find the promised primary descriptor
|
|
311
|
-
raise RuntimeError(f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash}")
|
|
349
|
+
raise RuntimeError(f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash} for workflow {trs_workflow_id} version {trs_version} in language {language}")
|
|
312
350
|
|
|
313
351
|
return found_path
|
|
314
352
|
|
|
315
|
-
def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> str:
|
|
353
|
+
def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> tuple[str, Optional[str]]:
|
|
316
354
|
"""
|
|
317
355
|
Find the real workflow URL or filename from a command line argument.
|
|
318
356
|
|
|
319
357
|
Transform a workflow URL or path that might actually be a Dockstore page
|
|
320
|
-
URL or TRS specifier to an actual URL or path to a workflow document
|
|
358
|
+
URL or TRS specifier to an actual URL or path to a workflow document, and
|
|
359
|
+
optional TRS specifier.
|
|
360
|
+
|
|
361
|
+
Accepts inputs like
|
|
362
|
+
|
|
363
|
+
- https://dockstore.org/workflows/github.com/dockstore-testing/md5sum-checker:master?tab=info
|
|
364
|
+
- #workflow/github.com/dockstore-testing/md5sum-checker
|
|
365
|
+
- ./local.cwl
|
|
366
|
+
- https://example.com/~myuser/workflow/main.cwl
|
|
367
|
+
|
|
368
|
+
:raises FileNotFoundError: if the workflow or version should be in Dockstore but doesn't seem to exist.
|
|
321
369
|
"""
|
|
322
370
|
|
|
323
|
-
if
|
|
324
|
-
# Ask
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
371
|
+
if is_trs_workflow(workflow):
|
|
372
|
+
# Ask TRS host where to find TRS-looking things
|
|
373
|
+
trs_workflow_id, trs_version, language = find_workflow(workflow, supported_languages)
|
|
374
|
+
resolved = fetch_workflow(trs_workflow_id, trs_version, language)
|
|
375
|
+
logger.info("Resolved TRS workflow %s to %s", workflow, resolved)
|
|
376
|
+
return resolved, compose_trs_spec(trs_workflow_id, trs_version)
|
|
328
377
|
else:
|
|
329
378
|
# Pass other things through.
|
|
330
|
-
|
|
331
|
-
|
|
379
|
+
# TODO: Find out if they have TRS names.
|
|
380
|
+
return workflow, None
|
|
332
381
|
|
|
333
382
|
|
|
334
383
|
|
toil/lib/web.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright (C) 2024 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
Contains functions for making web requests with Toil.
|
|
17
|
+
|
|
18
|
+
All web requests should go through this module, to make sure they use the right
|
|
19
|
+
user agent.
|
|
20
|
+
|
|
21
|
+
>>> from toil.lib.web import web_session
|
|
22
|
+
>>> web_session.get("https://example.com")
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
import requests
|
|
28
|
+
import sys
|
|
29
|
+
|
|
30
|
+
from toil.version import baseVersion
|
|
31
|
+
|
|
32
|
+
# We manage a Requests session at the module level in case we're supposed to be
|
|
33
|
+
# doing cookies, and to send a sensible user agent.
|
|
34
|
+
# We expect the Toil and Python version to not be personally identifiable even
|
|
35
|
+
# in theory (someone might make a new Toil version first, but there's no way
|
|
36
|
+
# to know for sure that nobody else did the same thing).
|
|
37
|
+
web_session = requests.Session()
|
|
38
|
+
web_session.headers.update({"User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"})
|
toil/options/common.py
CHANGED
|
@@ -362,12 +362,13 @@ def add_base_toil_options(
|
|
|
362
362
|
action="store_true",
|
|
363
363
|
help="Do not capture standard output and error from batch system jobs.",
|
|
364
364
|
)
|
|
365
|
+
# TODO: Should this be deprecated since we always save stats now for history tracking?
|
|
365
366
|
core_options.add_argument(
|
|
366
367
|
"--stats",
|
|
367
368
|
dest="stats",
|
|
368
369
|
default=False,
|
|
369
370
|
action="store_true",
|
|
370
|
-
help="
|
|
371
|
+
help="Keep statistics about the toil workflow to be used by 'toil stats'.",
|
|
371
372
|
)
|
|
372
373
|
clean_choices = ["always", "onError", "never", "onSuccess"]
|
|
373
374
|
core_options.add_argument(
|
|
@@ -466,7 +467,8 @@ def add_base_toil_options(
|
|
|
466
467
|
)
|
|
467
468
|
|
|
468
469
|
caching = file_store_options.add_mutually_exclusive_group()
|
|
469
|
-
caching_help = "Enable or disable caching for your workflow, specifying this overrides default from
|
|
470
|
+
caching_help = ("Enable or disable worker level file caching for your workflow, specifying this overrides default from batch system. "
|
|
471
|
+
"Does not affect CWL or WDL task caching.")
|
|
470
472
|
caching.add_argument(
|
|
471
473
|
"--caching",
|
|
472
474
|
dest="caching",
|
|
@@ -1102,6 +1104,19 @@ def add_base_toil_options(
|
|
|
1102
1104
|
default=False,
|
|
1103
1105
|
help="Disables the progress bar shown when standard error is a terminal.",
|
|
1104
1106
|
)
|
|
1107
|
+
misc_options.add_argument(
|
|
1108
|
+
"--publishWorkflowMetrics",
|
|
1109
|
+
dest="publish_workflow_metrics",
|
|
1110
|
+
choices=["all", "current", "no"],
|
|
1111
|
+
default=None,
|
|
1112
|
+
help="Whether to publish workflow metrics reports (including unique workflow "
|
|
1113
|
+
"and task run IDs, job names, and version and Toil feature use information) to "
|
|
1114
|
+
"Dockstore when a workflow completes. Selecting \"current\" will publish metrics "
|
|
1115
|
+
"for the current workflow. Selecting \"all\" will also publish prior workflow "
|
|
1116
|
+
"runs from the Toil history database, even if they themselves were run with \"no\". "
|
|
1117
|
+
"Note that once published, workflow metrics CANNOT be deleted or un-published; they "
|
|
1118
|
+
"will stay published forever!"
|
|
1119
|
+
)
|
|
1105
1120
|
|
|
1106
1121
|
# Debug options
|
|
1107
1122
|
debug_options = parser.add_argument_group(
|
toil/options/cwl.py
CHANGED
|
@@ -419,3 +419,13 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None:
|
|
|
419
419
|
type=str,
|
|
420
420
|
help=suppress_help or "Specify a cloud bucket endpoint for output files.",
|
|
421
421
|
)
|
|
422
|
+
parser.add_argument(
|
|
423
|
+
"--cachedir",
|
|
424
|
+
type=str,
|
|
425
|
+
help=suppress_help
|
|
426
|
+
or "Directory to cache intermediate workflow outputs to avoid "
|
|
427
|
+
"recomputing steps. Can be very helpful in the development and "
|
|
428
|
+
"troubleshooting of CWL documents. This automatically bypasses the file store."
|
|
429
|
+
" Not to be confused with --caching.",
|
|
430
|
+
dest="cachedir"
|
|
431
|
+
)
|
|
@@ -19,7 +19,6 @@ import time
|
|
|
19
19
|
import uuid
|
|
20
20
|
from typing import Optional
|
|
21
21
|
|
|
22
|
-
import requests
|
|
23
22
|
from libcloud.compute.drivers.gce import GCEFailedNode
|
|
24
23
|
from libcloud.compute.providers import get_driver
|
|
25
24
|
from libcloud.compute.types import Provider
|
|
@@ -27,6 +26,7 @@ from libcloud.compute.types import Provider
|
|
|
27
26
|
from toil.jobStores.googleJobStore import GoogleJobStore
|
|
28
27
|
from toil.lib.compatibility import compat_bytes_recursive
|
|
29
28
|
from toil.lib.conversions import human2bytes
|
|
29
|
+
from toil.lib.web import web_session
|
|
30
30
|
from toil.provisioners import NoSuchClusterException
|
|
31
31
|
from toil.provisioners.abstractProvisioner import AbstractProvisioner, Shape
|
|
32
32
|
from toil.provisioners.node import Node
|
|
@@ -83,11 +83,11 @@ class GCEProvisioner(AbstractProvisioner):
|
|
|
83
83
|
"""
|
|
84
84
|
metadata_server = "http://metadata/computeMetadata/v1/instance/"
|
|
85
85
|
metadata_flavor = {"Metadata-Flavor": "Google"}
|
|
86
|
-
zone =
|
|
86
|
+
zone = web_session.get(metadata_server + "zone", headers=metadata_flavor).text
|
|
87
87
|
self._zone = zone.split("/")[-1]
|
|
88
88
|
|
|
89
89
|
project_metadata_server = "http://metadata/computeMetadata/v1/project/"
|
|
90
|
-
self._projectId =
|
|
90
|
+
self._projectId = web_session.get(
|
|
91
91
|
project_metadata_server + "project-id", headers=metadata_flavor
|
|
92
92
|
).text
|
|
93
93
|
|
|
@@ -95,7 +95,7 @@ class GCEProvisioner(AbstractProvisioner):
|
|
|
95
95
|
self._googleJson = ""
|
|
96
96
|
self._clientEmail = ""
|
|
97
97
|
|
|
98
|
-
self._tags =
|
|
98
|
+
self._tags = web_session.get(
|
|
99
99
|
metadata_server + "description", headers=metadata_flavor
|
|
100
100
|
).text
|
|
101
101
|
tags = json.loads(self._tags)
|
|
@@ -10,13 +10,13 @@ from io import BytesIO
|
|
|
10
10
|
from typing import Any, Optional, cast
|
|
11
11
|
from urllib.parse import urldefrag, urljoin, urlparse
|
|
12
12
|
|
|
13
|
-
import requests
|
|
14
13
|
import ruamel.yaml
|
|
15
14
|
import schema_salad
|
|
16
15
|
from configargparse import ArgumentParser
|
|
17
16
|
from wes_client.util import WESClient # type: ignore
|
|
18
17
|
from wes_client.util import wes_reponse as wes_response
|
|
19
18
|
|
|
19
|
+
from toil.lib.web import web_session
|
|
20
20
|
from toil.wdl.utils import get_version as get_wdl_version
|
|
21
21
|
|
|
22
22
|
"""
|
|
@@ -117,7 +117,7 @@ class WESClientWithWorkflowEngineParameters(WESClient): # type: ignore
|
|
|
117
117
|
proto, host = endpoint.split("://")
|
|
118
118
|
super().__init__(
|
|
119
119
|
{
|
|
120
|
-
# TODO: use the auth argument in requests
|
|
120
|
+
# TODO: use the auth argument in requests' post so we don't need to encode it ourselves
|
|
121
121
|
"auth": (
|
|
122
122
|
{
|
|
123
123
|
"Authorization": "Basic "
|
|
@@ -314,7 +314,7 @@ class WESClientWithWorkflowEngineParameters(WESClient): # type: ignore
|
|
|
314
314
|
data, files = self.build_wes_request(
|
|
315
315
|
workflow_file, workflow_params_file, attachments, workflow_engine_parameters
|
|
316
316
|
)
|
|
317
|
-
post_result =
|
|
317
|
+
post_result = web_session.post(
|
|
318
318
|
urljoin(f"{self.proto}://{self.host}", "/ga4gh/wes/v1/runs"),
|
|
319
319
|
data=data,
|
|
320
320
|
files=files,
|
toil/server/utils.py
CHANGED
|
@@ -19,10 +19,9 @@ from datetime import datetime
|
|
|
19
19
|
from typing import Optional
|
|
20
20
|
from urllib.parse import urlparse
|
|
21
21
|
|
|
22
|
-
import requests
|
|
23
|
-
|
|
24
22
|
from toil.lib.io import AtomicFileCreate
|
|
25
23
|
from toil.lib.retry import retry
|
|
24
|
+
from toil.lib.web import web_session
|
|
26
25
|
|
|
27
26
|
try:
|
|
28
27
|
from toil.lib.aws import get_current_aws_region
|
|
@@ -63,7 +62,7 @@ def download_file_from_internet(
|
|
|
63
62
|
"""
|
|
64
63
|
Download a file from the Internet and write it to dest.
|
|
65
64
|
"""
|
|
66
|
-
response =
|
|
65
|
+
response = web_session.get(src)
|
|
67
66
|
|
|
68
67
|
if not response.ok:
|
|
69
68
|
raise RuntimeError("Request failed with a client error or a server error.")
|