toil 8.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. toil/__init__.py +4 -4
  2. toil/batchSystems/options.py +1 -0
  3. toil/batchSystems/slurm.py +227 -83
  4. toil/common.py +161 -45
  5. toil/cwl/cwltoil.py +31 -10
  6. toil/job.py +47 -38
  7. toil/jobStores/aws/jobStore.py +46 -10
  8. toil/lib/aws/session.py +14 -3
  9. toil/lib/aws/utils.py +92 -35
  10. toil/lib/dockstore.py +379 -0
  11. toil/lib/ec2nodes.py +3 -2
  12. toil/lib/history.py +1271 -0
  13. toil/lib/history_submission.py +681 -0
  14. toil/lib/io.py +22 -1
  15. toil/lib/misc.py +18 -0
  16. toil/lib/retry.py +10 -10
  17. toil/lib/{integration.py → trs.py} +95 -46
  18. toil/lib/web.py +38 -0
  19. toil/options/common.py +17 -2
  20. toil/options/cwl.py +10 -0
  21. toil/provisioners/gceProvisioner.py +4 -4
  22. toil/server/cli/wes_cwl_runner.py +3 -3
  23. toil/server/utils.py +2 -3
  24. toil/statsAndLogging.py +35 -1
  25. toil/test/batchSystems/test_slurm.py +172 -2
  26. toil/test/cwl/conftest.py +39 -0
  27. toil/test/cwl/cwlTest.py +105 -2
  28. toil/test/cwl/optional-file.cwl +18 -0
  29. toil/test/lib/test_history.py +212 -0
  30. toil/test/lib/test_trs.py +161 -0
  31. toil/test/wdl/wdltoil_test.py +1 -1
  32. toil/version.py +10 -10
  33. toil/wdl/wdltoil.py +23 -9
  34. toil/worker.py +113 -33
  35. {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/METADATA +9 -4
  36. {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/RECORD +40 -34
  37. {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  38. toil/test/lib/test_integration.py +0 -104
  39. {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  40. {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  41. {toil-8.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/lib/io.py CHANGED
@@ -11,8 +11,29 @@ from contextlib import contextmanager
11
11
  from io import BytesIO
12
12
  from typing import IO, Any, Callable, Optional, Protocol, Union
13
13
 
14
+ from toil.lib.memoize import memoize
15
+
14
16
  logger = logging.getLogger(__name__)
15
17
 
18
+ @memoize
19
+ def get_toil_home() -> str:
20
+ """
21
+ Get the Toil home directory for storing configuration and global state.
22
+
23
+ Raises an error if it does not exist and cannot be created. Safe to run
24
+ simultaneously in multiple processes.
25
+ """
26
+
27
+ # TODO: should this use an XDG config directory or ~/.config to not clutter the
28
+ # base home directory?
29
+ toil_home_dir = os.path.join(os.path.expanduser("~"), ".toil")
30
+
31
+ dir_path = try_path(toil_home_dir)
32
+ if dir_path is None:
33
+ raise RuntimeError(
34
+ f"Cannot create or access Toil configuration directory {toil_home_dir}"
35
+ )
36
+ return dir_path
16
37
 
17
38
  TOIL_URI_SCHEME = "toilfile:"
18
39
 
@@ -177,7 +198,7 @@ def atomic_install(tmp_path, final_path) -> None:
177
198
  def AtomicFileCreate(final_path: str, keep: bool = False) -> Iterator[str]:
178
199
  """Context manager to create a temporary file. Entering returns path to
179
200
  the temporary file in the same directory as finalPath. If the code in
180
- context succeeds, the file renamed to its actually name. If an error
201
+ context succeeds, the file renamed to its actual name. If an error
181
202
  occurs, the file is not installed and is removed unless keep is specified.
182
203
  """
183
204
  tmp_path = atomic_tmp_file(final_path)
toil/lib/misc.py CHANGED
@@ -63,6 +63,24 @@ def unix_now_ms() -> float:
63
63
  """Return the current time in milliseconds since the Unix epoch."""
64
64
  return time.time() * 1000
65
65
 
66
+ def unix_seconds_to_timestamp(timestamp: float) -> str:
67
+ """
68
+ Convert a time in seconds since the Unix epoch to an ISO 8601 string.
69
+ """
70
+ return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
71
+
72
+ def unix_seconds_to_local_time(timestamp: float) -> datetime.datetime:
73
+ """
74
+ Returns a local time corresponding to the given number of seconds since the Unix epoch.
75
+ """
76
+ return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).astimezone()
77
+
78
+ def seconds_to_duration(time_difference: float) -> str:
79
+ """
80
+ Convert a time difference in seconds to an ISO 8601 duration string.
81
+ """
82
+ return f"PT{time_difference:.3f}S"
83
+
66
84
 
67
85
  def slow_down(seconds: float) -> float:
68
86
  """
toil/lib/retry.py CHANGED
@@ -24,21 +24,21 @@ objects wrapping Exceptions to include additional conditions.
24
24
 
25
25
  For example, retrying on a one Exception (HTTPError)::
26
26
 
27
- from requests import get
27
+ from toil.lib.web import web_session
28
28
  from requests.exceptions import HTTPError
29
29
 
30
30
  @retry(errors=[HTTPError])
31
31
  def update_my_wallpaper():
32
- return get('https://www.deviantart.com/')
32
+ return web_session.get('https://www.deviantart.com/')
33
33
 
34
34
  Or::
35
35
 
36
- from requests import get
36
+ from toil.lib.web import web_session
37
37
  from requests.exceptions import HTTPError
38
38
 
39
39
  @retry(errors=[HTTPError, ValueError])
40
40
  def update_my_wallpaper():
41
- return get('https://www.deviantart.com/')
41
+ return web_session.get('https://www.deviantart.com/')
42
42
 
43
43
  The examples above will retry for the default interval on any errors specified
44
44
  the "errors=" arg list.
@@ -46,7 +46,7 @@ the "errors=" arg list.
46
46
  To retry on specifically 500/502/503/504 errors, you could specify an ErrorCondition
47
47
  object instead, for example::
48
48
 
49
- from requests import get
49
+ from toil.lib.web import web_session
50
50
  from requests.exceptions import HTTPError
51
51
 
52
52
  @retry(errors=[
@@ -55,11 +55,11 @@ object instead, for example::
55
55
  error_codes=[500, 502, 503, 504]
56
56
  )])
57
57
  def update_my_wallpaper():
58
- return requests.get('https://www.deviantart.com/')
58
+ return web_session.get('https://www.deviantart.com/')
59
59
 
60
60
  To retry on specifically errors containing the phrase "NotFound"::
61
61
 
62
- from requests import get
62
+ from toil.lib.web import web_session
63
63
  from requests.exceptions import HTTPError
64
64
 
65
65
  @retry(errors=[
@@ -68,11 +68,11 @@ To retry on specifically errors containing the phrase "NotFound"::
68
68
  error_message_must_include="NotFound"
69
69
  )])
70
70
  def update_my_wallpaper():
71
- return requests.get('https://www.deviantart.com/')
71
+ return web_session.get('https://www.deviantart.com/')
72
72
 
73
73
  To retry on all HTTPError errors EXCEPT an HTTPError containing the phrase "NotFound"::
74
74
 
75
- from requests import get
75
+ from toil.lib.web import web_session
76
76
  from requests.exceptions import HTTPError
77
77
 
78
78
  @retry(errors=[
@@ -83,7 +83,7 @@ To retry on all HTTPError errors EXCEPT an HTTPError containing the phrase "NotF
83
83
  retry_on_this_condition=False
84
84
  )])
85
85
  def update_my_wallpaper():
86
- return requests.get('https://www.deviantart.com/')
86
+ return web_session.get('https://www.deviantart.com/')
87
87
 
88
88
  To retry on boto3's specific status errors, an example of the implementation is::
89
89
 
@@ -13,8 +13,8 @@
13
13
  # limitations under the License.
14
14
 
15
15
  """
16
- Contains functions for integrating Toil with external services such as
17
- Dockstore.
16
+ Contains functions for integrating Toil with GA4GH Tool Registry Service
17
+ servers, for fetching workflows.
18
18
  """
19
19
 
20
20
  import hashlib
@@ -24,35 +24,29 @@ import shutil
24
24
  import sys
25
25
  import tempfile
26
26
  import zipfile
27
- from typing import Any, Dict, List, Optional, Set, Tuple, cast
27
+ from typing import Any, Literal, Optional, Union, TypedDict, cast
28
28
 
29
29
  from urllib.parse import urlparse, unquote, quote
30
30
  import requests
31
31
 
32
32
  from toil.lib.retry import retry
33
33
  from toil.lib.io import file_digest, robust_rmtree
34
- from toil.version import baseVersion
34
+ from toil.lib.web import web_session
35
35
 
36
36
  logger = logging.getLogger(__name__)
37
37
 
38
- # We manage a Requests session at the module level in case we're supposed to be
39
- # doing cookies, and to send a sensible user agent.
40
- # We expect the Toil and Python version to not be personally identifiable even
41
- # in theory (someone might make a new Toil version first, buit there's no way
42
- # to know for sure that nobody else did the same thing).
43
- session = requests.Session()
44
- session.headers.update({"User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"})
38
+ TRS_ROOT = "https://dockstore.org" if "TOIL_TRS_ROOT" not in os.environ else os.environ["TOIL_TRS_ROOT"]
45
39
 
46
- def is_dockstore_workflow(workflow: str) -> bool:
40
+ def is_trs_workflow(workflow: str) -> bool:
47
41
  """
48
- Returns True if a workflow string smells Dockstore-y.
42
+ Returns True if a workflow string smells like TRS.
49
43
 
50
44
  Detects Dockstore page URLs and strings that could be Dockstore TRS IDs.
51
45
  """
52
46
 
53
- return workflow.startswith("https://dockstore.org/workflows/") or workflow.startswith("#workflow/")
47
+ return workflow.startswith(f"{TRS_ROOT}/workflows/") or workflow.startswith(f"{TRS_ROOT}/my-workflows/") or workflow.startswith("#workflow/")
54
48
 
55
- def find_trs_spec(workflow: str) -> str:
49
+ def extract_trs_spec(workflow: str) -> str:
56
50
  """
57
51
  Parse a Dockstore workflow URL or TSR ID to a string that is definitely a TRS ID.
58
52
  """
@@ -63,13 +57,16 @@ def find_trs_spec(workflow: str) -> str:
63
57
  logger.debug("Workflow %s is a TRS specifier already", workflow)
64
58
  trs_spec = workflow
65
59
  else:
66
- # We need to get the right TRS ID from the Docstore URL
60
+ # We need to get the right TRS ID from the Dockstore URL
67
61
  parsed = urlparse(workflow)
68
- # TODO: We assume the Docksotre page URL structure and the TRS IDs are basically the same.
62
+ # TODO: We assume the Dockstore page URL structure and the TRS IDs are basically the same.
69
63
  page_path = unquote(parsed.path)
70
- if not page_path.startswith("/workflows/"):
64
+ if page_path.startswith("/workflows/"):
65
+ trs_spec = "#workflow/" + page_path[len("/workflows/"):]
66
+ elif page_path.startswith("/my-workflows/"):
67
+ trs_spec = "#workflow/" + page_path[len("/my-workflows/"):]
68
+ else:
71
69
  raise RuntimeError("Cannot parse Dockstore URL " + workflow)
72
- trs_spec = "#workflow/" + page_path[len("/workflows/"):]
73
70
  logger.debug("Translated %s to TRS: %s", workflow, trs_spec)
74
71
 
75
72
  return trs_spec
@@ -88,27 +85,36 @@ def parse_trs_spec(trs_spec: str) -> tuple[str, Optional[str]]:
88
85
  trs_version = None
89
86
  return trs_workflow_id, trs_version
90
87
 
88
+ def compose_trs_spec(trs_workflow_id: str, trs_version: str) -> str:
89
+ """
90
+ Compose a TRS ID from a workflow ID and version ID.
91
+ """
92
+ return f"{trs_workflow_id}:{trs_version}"
93
+
91
94
  @retry(errors=[requests.exceptions.ConnectionError])
92
- def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optional[set[str]] = None) -> str:
95
+ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> tuple[str, str, str]:
93
96
  """
94
- Given a Dockstore URL or TRS identifier, get the root WDL or CWL URL for the workflow.
97
+ Given a Dockstore URL or TRS identifier, get the root WDL or CWL URL for the workflow, along with the TRS workflow ID and version.
95
98
 
96
99
  Accepts inputs like:
97
100
 
98
101
  - https://dockstore.org/workflows/github.com/dockstore-testing/md5sum-checker:master?tab=info
99
102
  - #workflow/github.com/dockstore-testing/md5sum-checker
100
103
 
101
- Assumes the input is actually one of the supported formats. See is_dockstore_workflow().
104
+ Assumes the input is actually one of the supported formats. See is_trs_workflow().
102
105
 
103
106
  TODO: Needs to handle multi-workflow files if Dockstore can.
104
107
 
108
+ :raises FileNotFoundError: if the workflow or version doesn't exist.
109
+ :raises ValueError: if the version is not specified but cannot be
110
+ automatically determined.
105
111
  """
106
112
 
107
113
  if supported_languages is not None and len(supported_languages) == 0:
108
114
  raise ValueError("Set of supported languages must be nonempty if provided.")
109
115
 
110
116
  # Get the TRS id[:version] string from what might be a Dockstore URL
111
- trs_spec = find_trs_spec(workflow)
117
+ trs_spec = extract_trs_spec(workflow)
112
118
  # Parse out workflow and possible version
113
119
  trs_workflow_id, trs_version = parse_trs_spec(trs_spec)
114
120
 
@@ -116,8 +122,14 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
116
122
 
117
123
  # Fetch the main TRS document.
118
124
  # See e.g. https://dockstore.org/api/ga4gh/trs/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker
119
- trs_workflow_url = f"https://dockstore.org/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}"
120
- trs_workflow_document = session.get(trs_workflow_url).json()
125
+ trs_workflow_url = f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}"
126
+ logger.debug("Get versions: %s", trs_workflow_url)
127
+ trs_workflow_response = web_session.get(trs_workflow_url)
128
+ if trs_workflow_response.status_code in (400, 404):
129
+ # If the workflow ID isn't in Dockstore's accepted format (and also thus doesn't exist), we can get a 400
130
+ raise FileNotFoundError(f"Workflow {trs_workflow_id} does not exist.")
131
+ trs_workflow_response.raise_for_status()
132
+ trs_workflow_document = trs_workflow_response.json()
121
133
 
122
134
  # Make a map from version to version info. We will need the
123
135
  # "descriptor_type" array to find eligible languages, and the "url" field
@@ -146,12 +158,10 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
146
158
  continue
147
159
  eligible_workflow_versions.add(version_name)
148
160
 
149
- for default_version in ['main', 'master']:
150
- if trs_version is None and default_version in eligible_workflow_versions:
151
- # Fill in a version if the user didn't provide one.
152
- trs_version = default_version
153
- logger.debug("Defaulting to workflow version %s", default_version)
154
- break
161
+ # TODO: Dockstore has a concept of a "default version", but doesn't expose
162
+ # it over TRS. To avoid defaulting to something that *isn't* the Dockstore
163
+ # default version, we refuse to choose a version when there are multiple
164
+ # possibilities.
155
165
 
156
166
  if trs_version is None and len(eligible_workflow_versions) == 1:
157
167
  # If there's just one version use that.
@@ -161,26 +171,31 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
161
171
 
162
172
  # If we don't like what we found we compose a useful error message.
163
173
  problems: list[str] = []
174
+ problem_type: type[Exception] = RuntimeError
164
175
  if trs_version is None:
165
176
  problems.append(f"Workflow {workflow} does not specify a version")
177
+ problem_type = ValueError
166
178
  elif trs_version not in workflow_versions:
167
179
  problems.append(f"Workflow version {trs_version} from {workflow} does not exist")
180
+ problem_type = FileNotFoundError
168
181
  elif trs_version not in eligible_workflow_versions:
169
182
  message = f"Workflow version {trs_version} from {workflow} is not available"
170
183
  if supported_languages is not None:
171
184
  message += f" in any of: {', '.join(supported_languages)}"
172
185
  problems.append(message)
186
+ problem_type = FileNotFoundError
173
187
  if len(problems) > 0:
174
188
  if len(eligible_workflow_versions) == 0:
175
189
  message = "No versions of the workflow are available"
176
190
  if supported_languages is not None:
177
191
  message += f" in any of: {', '.join(supported_languages)}"
178
192
  problems.append(message)
193
+ problem_type = FileNotFoundError
179
194
  elif trs_version is None:
180
195
  problems.append(f"Add ':' and the name of a workflow version ({', '.join(eligible_workflow_versions)}) after '{trs_workflow_id}'")
181
196
  else:
182
197
  problems.append(f"Replace '{trs_version}' with one of ({', '.join(eligible_workflow_versions)})")
183
- raise RuntimeError("; ".join(problems))
198
+ raise problem_type("; ".join(problems))
184
199
 
185
200
  # Tell MyPy we now have a version, or we would have raised
186
201
  assert trs_version is not None
@@ -192,12 +207,35 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
192
207
  language = candidate_language
193
208
 
194
209
  logger.debug("Going to use %s version %s in %s", trs_workflow_id, trs_version, language)
195
- trs_version_url = workflow_versions[trs_version]["url"]
210
+
211
+ return trs_workflow_id, trs_version, language
212
+
213
+ @retry(errors=[requests.exceptions.ConnectionError])
214
+ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str:
215
+ """
216
+ Returns a URL or local path to a workflow's primary descriptor file.
217
+
218
+ The file will be in context with its required files so it can actually run.
219
+
220
+ :raises FileNotFoundError: if the workflow or version doesn't exist.
221
+ """
222
+
223
+ # TODO: We should probably use HATEOAS and pull this from the worflow
224
+ # document we probably already fetched but aren't passing.
225
+ trs_version_url = f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}/versions/{quote(trs_version, safe='')}"
196
226
 
197
227
  # Fetch the list of all the files
198
228
  trs_files_url = f"{trs_version_url}/{language}/files"
199
229
  logger.debug("Workflow files URL: %s", trs_files_url)
200
- trs_files_document = session.get(trs_files_url).json()
230
+ trs_files_response = web_session.get(trs_files_url)
231
+ if trs_files_response.status_code in (204, 400, 404):
232
+ # We can get a 204 No Content response if the version doesn't exist.
233
+ # That's successful, so we need to handle it specifically. See
234
+ # <https://github.com/dockstore/dockstore/issues/6048>
235
+ # We can also get a 400 if the workflow ID is not in Dockstore's expected format (3 slash-separated segments).
236
+ raise FileNotFoundError(f"Workflow {trs_workflow_id} version {trs_version} in language {language} does not exist.")
237
+ trs_files_response.raise_for_status()
238
+ trs_files_document = trs_files_response.json()
201
239
 
202
240
  # Find the information we need to ID the primary descriptor file
203
241
  primary_descriptor_path: Optional[str] = None
@@ -210,7 +248,7 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
210
248
  primary_descriptor_hash = file_info["checksum"]["checksum"]
211
249
  break
212
250
  if primary_descriptor_path is None or primary_descriptor_hash is None or primary_descriptor_hash_algorithm is None:
213
- raise RuntimeError("Could not find a primary descriptor file for the workflow")
251
+ raise RuntimeError(f"Could not find a primary descriptor file for workflow {trs_workflow_id} version {trs_version} in language {language}")
214
252
  primary_descriptor_basename = os.path.basename(primary_descriptor_path)
215
253
 
216
254
  # Work out how to compute the hash we are looking for. See
@@ -263,7 +301,7 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
263
301
  }
264
302
  # If we don't set stream=True, we can't actually read anything from the
265
303
  # raw stream, since Requests will have done it already.
266
- with session.get(trs_zip_file_url, headers=headers, stream=True) as response:
304
+ with web_session.get(trs_zip_file_url, headers=headers, stream=True) as response:
267
305
  response_content_length = response.headers.get("Content-Length")
268
306
  logger.debug("Server reports content length: %s", response_content_length)
269
307
  shutil.copyfileobj(response.raw, zip_file)
@@ -308,27 +346,38 @@ def get_workflow_root_from_dockstore(workflow: str, supported_languages: Optiona
308
346
  logger.debug("Rejected %s because its %s hash %s is not %s", file_path, python_hash_name, file_hash, primary_descriptor_hash)
309
347
  if found_path is None:
310
348
  # We couldn't find the promised primary descriptor
311
- raise RuntimeError(f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash}")
349
+ raise RuntimeError(f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash} for workflow {trs_workflow_id} version {trs_version} in language {language}")
312
350
 
313
351
  return found_path
314
352
 
315
- def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> str:
353
+ def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> tuple[str, Optional[str]]:
316
354
  """
317
355
  Find the real workflow URL or filename from a command line argument.
318
356
 
319
357
  Transform a workflow URL or path that might actually be a Dockstore page
320
- URL or TRS specifier to an actual URL or path to a workflow document.
358
+ URL or TRS specifier to an actual URL or path to a workflow document, and
359
+ optional TRS specifier.
360
+
361
+ Accepts inputs like
362
+
363
+ - https://dockstore.org/workflows/github.com/dockstore-testing/md5sum-checker:master?tab=info
364
+ - #workflow/github.com/dockstore-testing/md5sum-checker
365
+ - ./local.cwl
366
+ - https://example.com/~myuser/workflow/main.cwl
367
+
368
+ :raises FileNotFoundError: if the workflow or version should be in Dockstore but doesn't seem to exist.
321
369
  """
322
370
 
323
- if is_dockstore_workflow(workflow):
324
- # Ask Dockstore where to find Dockstore-y things
325
- resolved = get_workflow_root_from_dockstore(workflow, supported_languages=supported_languages)
326
- logger.info("Resolved Dockstore workflow %s to %s", workflow, resolved)
327
- return resolved
371
+ if is_trs_workflow(workflow):
372
+ # Ask TRS host where to find TRS-looking things
373
+ trs_workflow_id, trs_version, language = find_workflow(workflow, supported_languages)
374
+ resolved = fetch_workflow(trs_workflow_id, trs_version, language)
375
+ logger.info("Resolved TRS workflow %s to %s", workflow, resolved)
376
+ return resolved, compose_trs_spec(trs_workflow_id, trs_version)
328
377
  else:
329
378
  # Pass other things through.
330
- return workflow
331
-
379
+ # TODO: Find out if they have TRS names.
380
+ return workflow, None
332
381
 
333
382
 
334
383
 
toil/lib/web.py ADDED
@@ -0,0 +1,38 @@
1
+ # Copyright (C) 2024 Regents of the University of California
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Contains functions for making web requests with Toil.
17
+
18
+ All web requests should go through this module, to make sure they use the right
19
+ user agent.
20
+
21
+ >>> from toil.lib.web import web_session
22
+ >>> web_session.get("https://example.com")
23
+
24
+ """
25
+
26
+ import logging
27
+ import requests
28
+ import sys
29
+
30
+ from toil.version import baseVersion
31
+
32
+ # We manage a Requests session at the module level in case we're supposed to be
33
+ # doing cookies, and to send a sensible user agent.
34
+ # We expect the Toil and Python version to not be personally identifiable even
35
+ # in theory (someone might make a new Toil version first, but there's no way
36
+ # to know for sure that nobody else did the same thing).
37
+ web_session = requests.Session()
38
+ web_session.headers.update({"User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"})
toil/options/common.py CHANGED
@@ -362,12 +362,13 @@ def add_base_toil_options(
362
362
  action="store_true",
363
363
  help="Do not capture standard output and error from batch system jobs.",
364
364
  )
365
+ # TODO: Should this be deprecated since we always save stats now for history tracking?
365
366
  core_options.add_argument(
366
367
  "--stats",
367
368
  dest="stats",
368
369
  default=False,
369
370
  action="store_true",
370
- help="Records statistics about the toil workflow to be used by 'toil stats'.",
371
+ help="Keep statistics about the toil workflow to be used by 'toil stats'.",
371
372
  )
372
373
  clean_choices = ["always", "onError", "never", "onSuccess"]
373
374
  core_options.add_argument(
@@ -466,7 +467,8 @@ def add_base_toil_options(
466
467
  )
467
468
 
468
469
  caching = file_store_options.add_mutually_exclusive_group()
469
- caching_help = "Enable or disable caching for your workflow, specifying this overrides default from job store"
470
+ caching_help = ("Enable or disable worker level file caching for your workflow, specifying this overrides default from batch system. "
471
+ "Does not affect CWL or WDL task caching.")
470
472
  caching.add_argument(
471
473
  "--caching",
472
474
  dest="caching",
@@ -1102,6 +1104,19 @@ def add_base_toil_options(
1102
1104
  default=False,
1103
1105
  help="Disables the progress bar shown when standard error is a terminal.",
1104
1106
  )
1107
+ misc_options.add_argument(
1108
+ "--publishWorkflowMetrics",
1109
+ dest="publish_workflow_metrics",
1110
+ choices=["all", "current", "no"],
1111
+ default=None,
1112
+ help="Whether to publish workflow metrics reports (including unique workflow "
1113
+ "and task run IDs, job names, and version and Toil feature use information) to "
1114
+ "Dockstore when a workflow completes. Selecting \"current\" will publish metrics "
1115
+ "for the current workflow. Selecting \"all\" will also publish prior workflow "
1116
+ "runs from the Toil history database, even if they themselves were run with \"no\". "
1117
+ "Note that once published, workflow metrics CANNOT be deleted or un-published; they "
1118
+ "will stay published forever!"
1119
+ )
1105
1120
 
1106
1121
  # Debug options
1107
1122
  debug_options = parser.add_argument_group(
toil/options/cwl.py CHANGED
@@ -419,3 +419,13 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None:
419
419
  type=str,
420
420
  help=suppress_help or "Specify a cloud bucket endpoint for output files.",
421
421
  )
422
+ parser.add_argument(
423
+ "--cachedir",
424
+ type=str,
425
+ help=suppress_help
426
+ or "Directory to cache intermediate workflow outputs to avoid "
427
+ "recomputing steps. Can be very helpful in the development and "
428
+ "troubleshooting of CWL documents. This automatically bypasses the file store."
429
+ " Not to be confused with --caching.",
430
+ dest="cachedir"
431
+ )
@@ -19,7 +19,6 @@ import time
19
19
  import uuid
20
20
  from typing import Optional
21
21
 
22
- import requests
23
22
  from libcloud.compute.drivers.gce import GCEFailedNode
24
23
  from libcloud.compute.providers import get_driver
25
24
  from libcloud.compute.types import Provider
@@ -27,6 +26,7 @@ from libcloud.compute.types import Provider
27
26
  from toil.jobStores.googleJobStore import GoogleJobStore
28
27
  from toil.lib.compatibility import compat_bytes_recursive
29
28
  from toil.lib.conversions import human2bytes
29
+ from toil.lib.web import web_session
30
30
  from toil.provisioners import NoSuchClusterException
31
31
  from toil.provisioners.abstractProvisioner import AbstractProvisioner, Shape
32
32
  from toil.provisioners.node import Node
@@ -83,11 +83,11 @@ class GCEProvisioner(AbstractProvisioner):
83
83
  """
84
84
  metadata_server = "http://metadata/computeMetadata/v1/instance/"
85
85
  metadata_flavor = {"Metadata-Flavor": "Google"}
86
- zone = requests.get(metadata_server + "zone", headers=metadata_flavor).text
86
+ zone = web_session.get(metadata_server + "zone", headers=metadata_flavor).text
87
87
  self._zone = zone.split("/")[-1]
88
88
 
89
89
  project_metadata_server = "http://metadata/computeMetadata/v1/project/"
90
- self._projectId = requests.get(
90
+ self._projectId = web_session.get(
91
91
  project_metadata_server + "project-id", headers=metadata_flavor
92
92
  ).text
93
93
 
@@ -95,7 +95,7 @@ class GCEProvisioner(AbstractProvisioner):
95
95
  self._googleJson = ""
96
96
  self._clientEmail = ""
97
97
 
98
- self._tags = requests.get(
98
+ self._tags = web_session.get(
99
99
  metadata_server + "description", headers=metadata_flavor
100
100
  ).text
101
101
  tags = json.loads(self._tags)
@@ -10,13 +10,13 @@ from io import BytesIO
10
10
  from typing import Any, Optional, cast
11
11
  from urllib.parse import urldefrag, urljoin, urlparse
12
12
 
13
- import requests
14
13
  import ruamel.yaml
15
14
  import schema_salad
16
15
  from configargparse import ArgumentParser
17
16
  from wes_client.util import WESClient # type: ignore
18
17
  from wes_client.util import wes_reponse as wes_response
19
18
 
19
+ from toil.lib.web import web_session
20
20
  from toil.wdl.utils import get_version as get_wdl_version
21
21
 
22
22
  """
@@ -117,7 +117,7 @@ class WESClientWithWorkflowEngineParameters(WESClient): # type: ignore
117
117
  proto, host = endpoint.split("://")
118
118
  super().__init__(
119
119
  {
120
- # TODO: use the auth argument in requests.post so we don't need to encode it ourselves
120
+ # TODO: use the auth argument in requests' post so we don't need to encode it ourselves
121
121
  "auth": (
122
122
  {
123
123
  "Authorization": "Basic "
@@ -314,7 +314,7 @@ class WESClientWithWorkflowEngineParameters(WESClient): # type: ignore
314
314
  data, files = self.build_wes_request(
315
315
  workflow_file, workflow_params_file, attachments, workflow_engine_parameters
316
316
  )
317
- post_result = requests.post(
317
+ post_result = web_session.post(
318
318
  urljoin(f"{self.proto}://{self.host}", "/ga4gh/wes/v1/runs"),
319
319
  data=data,
320
320
  files=files,
toil/server/utils.py CHANGED
@@ -19,10 +19,9 @@ from datetime import datetime
19
19
  from typing import Optional
20
20
  from urllib.parse import urlparse
21
21
 
22
- import requests
23
-
24
22
  from toil.lib.io import AtomicFileCreate
25
23
  from toil.lib.retry import retry
24
+ from toil.lib.web import web_session
26
25
 
27
26
  try:
28
27
  from toil.lib.aws import get_current_aws_region
@@ -63,7 +62,7 @@ def download_file_from_internet(
63
62
  """
64
63
  Download a file from the Internet and write it to dest.
65
64
  """
66
- response = requests.get(src)
65
+ response = web_session.get(src)
67
66
 
68
67
  if not response.ok:
69
68
  raise RuntimeError("Request failed with a client error or a server error.")