toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. toil/__init__.py +5 -9
  2. toil/batchSystems/abstractBatchSystem.py +23 -22
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +4 -4
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/gridengine.py +3 -4
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +65 -63
  10. toil/batchSystems/local_support.py +2 -3
  11. toil/batchSystems/lsf.py +6 -7
  12. toil/batchSystems/mesos/batchSystem.py +11 -7
  13. toil/batchSystems/mesos/test/__init__.py +1 -2
  14. toil/batchSystems/options.py +9 -10
  15. toil/batchSystems/registry.py +3 -7
  16. toil/batchSystems/singleMachine.py +8 -11
  17. toil/batchSystems/slurm.py +49 -38
  18. toil/batchSystems/torque.py +3 -4
  19. toil/bus.py +36 -34
  20. toil/common.py +129 -89
  21. toil/cwl/cwltoil.py +857 -729
  22. toil/cwl/utils.py +44 -35
  23. toil/fileStores/__init__.py +3 -1
  24. toil/fileStores/abstractFileStore.py +28 -30
  25. toil/fileStores/cachingFileStore.py +8 -8
  26. toil/fileStores/nonCachingFileStore.py +10 -21
  27. toil/job.py +159 -158
  28. toil/jobStores/abstractJobStore.py +68 -69
  29. toil/jobStores/aws/jobStore.py +249 -213
  30. toil/jobStores/aws/utils.py +13 -24
  31. toil/jobStores/fileJobStore.py +28 -22
  32. toil/jobStores/googleJobStore.py +21 -17
  33. toil/jobStores/utils.py +3 -7
  34. toil/leader.py +17 -22
  35. toil/lib/accelerators.py +6 -4
  36. toil/lib/aws/__init__.py +9 -10
  37. toil/lib/aws/ami.py +33 -19
  38. toil/lib/aws/iam.py +6 -6
  39. toil/lib/aws/s3.py +259 -157
  40. toil/lib/aws/session.py +76 -76
  41. toil/lib/aws/utils.py +51 -43
  42. toil/lib/checksum.py +19 -15
  43. toil/lib/compatibility.py +3 -2
  44. toil/lib/conversions.py +45 -18
  45. toil/lib/directory.py +29 -26
  46. toil/lib/docker.py +93 -99
  47. toil/lib/dockstore.py +77 -50
  48. toil/lib/ec2.py +39 -38
  49. toil/lib/ec2nodes.py +11 -4
  50. toil/lib/exceptions.py +8 -5
  51. toil/lib/ftp_utils.py +9 -14
  52. toil/lib/generatedEC2Lists.py +161 -20
  53. toil/lib/history.py +141 -97
  54. toil/lib/history_submission.py +163 -72
  55. toil/lib/io.py +27 -17
  56. toil/lib/memoize.py +2 -1
  57. toil/lib/misc.py +15 -11
  58. toil/lib/pipes.py +40 -25
  59. toil/lib/plugins.py +12 -8
  60. toil/lib/resources.py +1 -0
  61. toil/lib/retry.py +32 -38
  62. toil/lib/threading.py +12 -12
  63. toil/lib/throttle.py +1 -2
  64. toil/lib/trs.py +113 -51
  65. toil/lib/url.py +14 -23
  66. toil/lib/web.py +7 -2
  67. toil/options/common.py +18 -15
  68. toil/options/cwl.py +2 -2
  69. toil/options/runner.py +9 -5
  70. toil/options/wdl.py +1 -3
  71. toil/provisioners/__init__.py +9 -9
  72. toil/provisioners/abstractProvisioner.py +22 -20
  73. toil/provisioners/aws/__init__.py +20 -14
  74. toil/provisioners/aws/awsProvisioner.py +10 -8
  75. toil/provisioners/clusterScaler.py +19 -18
  76. toil/provisioners/gceProvisioner.py +2 -3
  77. toil/provisioners/node.py +11 -13
  78. toil/realtimeLogger.py +4 -4
  79. toil/resource.py +5 -5
  80. toil/server/app.py +2 -2
  81. toil/server/cli/wes_cwl_runner.py +11 -11
  82. toil/server/utils.py +18 -21
  83. toil/server/wes/abstract_backend.py +9 -8
  84. toil/server/wes/amazon_wes_utils.py +3 -3
  85. toil/server/wes/tasks.py +3 -5
  86. toil/server/wes/toil_backend.py +17 -21
  87. toil/server/wsgi_app.py +3 -3
  88. toil/serviceManager.py +3 -4
  89. toil/statsAndLogging.py +12 -13
  90. toil/test/__init__.py +33 -24
  91. toil/test/batchSystems/batchSystemTest.py +12 -11
  92. toil/test/batchSystems/batch_system_plugin_test.py +3 -5
  93. toil/test/batchSystems/test_slurm.py +38 -24
  94. toil/test/cwl/conftest.py +5 -6
  95. toil/test/cwl/cwlTest.py +194 -78
  96. toil/test/cwl/download_file_uri.json +6 -0
  97. toil/test/cwl/download_file_uri_no_hostname.json +6 -0
  98. toil/test/docs/scripts/tutorial_staging.py +1 -0
  99. toil/test/jobStores/jobStoreTest.py +9 -7
  100. toil/test/lib/aws/test_iam.py +1 -3
  101. toil/test/lib/aws/test_s3.py +1 -1
  102. toil/test/lib/dockerTest.py +9 -9
  103. toil/test/lib/test_ec2.py +12 -11
  104. toil/test/lib/test_history.py +4 -4
  105. toil/test/lib/test_trs.py +16 -14
  106. toil/test/lib/test_url.py +7 -6
  107. toil/test/lib/url_plugin_test.py +12 -18
  108. toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
  109. toil/test/provisioners/clusterScalerTest.py +2 -5
  110. toil/test/provisioners/clusterTest.py +1 -3
  111. toil/test/server/serverTest.py +13 -4
  112. toil/test/sort/restart_sort.py +2 -6
  113. toil/test/sort/sort.py +3 -8
  114. toil/test/src/deferredFunctionTest.py +7 -7
  115. toil/test/src/environmentTest.py +1 -2
  116. toil/test/src/fileStoreTest.py +5 -5
  117. toil/test/src/importExportFileTest.py +5 -6
  118. toil/test/src/jobServiceTest.py +22 -14
  119. toil/test/src/jobTest.py +121 -25
  120. toil/test/src/miscTests.py +5 -7
  121. toil/test/src/promisedRequirementTest.py +8 -7
  122. toil/test/src/regularLogTest.py +2 -3
  123. toil/test/src/resourceTest.py +5 -8
  124. toil/test/src/restartDAGTest.py +5 -6
  125. toil/test/src/resumabilityTest.py +2 -2
  126. toil/test/src/retainTempDirTest.py +3 -3
  127. toil/test/src/systemTest.py +3 -3
  128. toil/test/src/threadingTest.py +1 -1
  129. toil/test/src/workerTest.py +1 -2
  130. toil/test/utils/toilDebugTest.py +6 -4
  131. toil/test/utils/toilKillTest.py +1 -1
  132. toil/test/utils/utilsTest.py +15 -14
  133. toil/test/wdl/wdltoil_test.py +247 -124
  134. toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
  135. toil/toilState.py +2 -3
  136. toil/utils/toilDebugFile.py +3 -8
  137. toil/utils/toilDebugJob.py +1 -2
  138. toil/utils/toilLaunchCluster.py +1 -2
  139. toil/utils/toilSshCluster.py +2 -0
  140. toil/utils/toilStats.py +19 -24
  141. toil/utils/toilStatus.py +11 -14
  142. toil/version.py +10 -10
  143. toil/wdl/wdltoil.py +313 -209
  144. toil/worker.py +18 -12
  145. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
  146. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
  147. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
  148. toil/test/cwl/staging_cat.cwl +0 -27
  149. toil/test/cwl/staging_make_file.cwl +0 -25
  150. toil/test/cwl/staging_workflow.cwl +0 -43
  151. toil/test/cwl/zero_default.cwl +0 -61
  152. toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
  153. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
  154. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
  155. {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/lib/threading.py CHANGED
@@ -30,7 +30,7 @@ import time
30
30
  import traceback
31
31
  from collections.abc import Iterator
32
32
  from contextlib import contextmanager
33
- from typing import Optional, Union, cast
33
+ from typing import cast
34
34
 
35
35
  import psutil
36
36
 
@@ -42,7 +42,7 @@ logger = logging.getLogger(__name__)
42
42
 
43
43
 
44
44
  def ensure_filesystem_lockable(
45
- path: StrPath, timeout: float = 30, hint: Optional[str] = None
45
+ path: StrPath, timeout: float = 30, hint: str | None = None
46
46
  ) -> None:
47
47
  """
48
48
  Make sure that the filesystem used at the given path is one where locks are safe to use.
@@ -221,12 +221,12 @@ class ExceptionalThread(threading.Thread):
221
221
  def tryRun(self) -> None:
222
222
  super().run()
223
223
 
224
- def join(self, *args: Optional[float], **kwargs: Optional[float]) -> None:
224
+ def join(self, *args: float | None, **kwargs: float | None) -> None:
225
225
  super().join(*args, **kwargs)
226
226
  if not self.is_alive() and self.exc_info is not None:
227
227
  exc_type, exc_value, traceback = self.exc_info
228
228
  self.exc_info = None
229
- raise_(exc_type, exc_value, traceback)
229
+ raise_(exc_type, exc_value, traceback)
230
230
 
231
231
 
232
232
  def cpu_count() -> int:
@@ -257,18 +257,18 @@ def cpu_count() -> int:
257
257
  if psutil_cpu_count is None:
258
258
  logger.debug("Could not retrieve the logical CPU count.")
259
259
 
260
- total_machine_size: Union[float, int] = (
260
+ total_machine_size: float | int = (
261
261
  psutil_cpu_count if psutil_cpu_count is not None else float("inf")
262
262
  )
263
263
  logger.debug("Total machine size: %s core(s)", total_machine_size)
264
264
 
265
265
  # cgroups may limit the size
266
- cgroup_size: Union[float, int] = float("inf")
266
+ cgroup_size: float | int = float("inf")
267
267
 
268
268
  try:
269
269
  # See if we can fetch these and use them
270
- quota: Optional[int] = None
271
- period: Optional[int] = None
270
+ quota: int | None = None
271
+ period: int | None = None
272
272
 
273
273
  # CGroups v1 keeps quota and period separate
274
274
  CGROUP1_QUOTA_FILE = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
@@ -312,7 +312,7 @@ def cpu_count() -> int:
312
312
  logger.debug("Could not inspect cgroup: %s", traceback.format_exc())
313
313
 
314
314
  # CPU affinity may limit the size
315
- affinity_size: Union[float, int] = float("inf")
315
+ affinity_size: float | int = float("inf")
316
316
  if hasattr(os, "sched_getaffinity"):
317
317
  try:
318
318
  logger.debug("CPU affinity available")
@@ -326,7 +326,7 @@ def cpu_count() -> int:
326
326
  else:
327
327
  logger.debug("CPU affinity not available")
328
328
 
329
- limit: Union[float, int] = float("inf")
329
+ limit: float | int = float("inf")
330
330
  # Apply all the limits to take the smallest
331
331
  limit = min(limit, total_machine_size)
332
332
  limit = min(limit, cgroup_size)
@@ -582,7 +582,7 @@ def global_mutex(base_dir: StrPath, mutex: str) -> Iterator[None]:
582
582
 
583
583
  try:
584
584
  # And get the stats for the name in the directory
585
- path_stats: Optional[os.stat_result] = os.stat(lock_filename)
585
+ path_stats: os.stat_result | None = os.stat(lock_filename)
586
586
  except FileNotFoundError:
587
587
  path_stats = None
588
588
 
@@ -786,7 +786,7 @@ class LastProcessStandingArena:
786
786
 
787
787
  try:
788
788
  fd = os.open(full_path, os.O_RDONLY)
789
- except OSError as e:
789
+ except OSError:
790
790
  # suddenly file doesnt exist on network file system?
791
791
  continue
792
792
 
toil/lib/throttle.py CHANGED
@@ -15,7 +15,6 @@
15
15
  # 5.14.2018: copied into Toil from https://github.com/BD2KGenomics/bd2k-python-lib
16
16
  import threading
17
17
  import time
18
- from typing import Union
19
18
 
20
19
 
21
20
  class LocalThrottle:
@@ -143,7 +142,7 @@ class throttle:
143
142
  True
144
143
  """
145
144
 
146
- def __init__(self, min_interval: Union[int, float]) -> None:
145
+ def __init__(self, min_interval: int | float) -> None:
147
146
  self.min_interval = min_interval
148
147
 
149
148
  def __enter__(self):
toil/lib/trs.py CHANGED
@@ -21,21 +21,25 @@ import hashlib
21
21
  import logging
22
22
  import os
23
23
  import shutil
24
- import sys
25
24
  import tempfile
26
25
  import zipfile
27
- from typing import Any, Literal, Optional, Union, TypedDict, cast
26
+ from typing import Any
27
+ from urllib.parse import quote, unquote, urlparse
28
28
 
29
- from urllib.parse import urlparse, unquote, quote
30
29
  import requests
31
30
 
32
- from toil.lib.retry import retry
33
31
  from toil.lib.io import file_digest, robust_rmtree
32
+ from toil.lib.retry import retry
34
33
  from toil.lib.web import web_session
35
34
 
36
35
  logger = logging.getLogger(__name__)
37
36
 
38
- TRS_ROOT = "https://dockstore.org" if "TOIL_TRS_ROOT" not in os.environ else os.environ["TOIL_TRS_ROOT"]
37
+ TRS_ROOT = (
38
+ "https://dockstore.org"
39
+ if "TOIL_TRS_ROOT" not in os.environ
40
+ else os.environ["TOIL_TRS_ROOT"]
41
+ )
42
+
39
43
 
40
44
  def is_trs_workflow(workflow: str) -> bool:
41
45
  """
@@ -44,7 +48,12 @@ def is_trs_workflow(workflow: str) -> bool:
44
48
  Detects Dockstore page URLs and strings that could be Dockstore TRS IDs.
45
49
  """
46
50
 
47
- return workflow.startswith(f"{TRS_ROOT}/workflows/") or workflow.startswith(f"{TRS_ROOT}/my-workflows/") or workflow.startswith("#workflow/")
51
+ return (
52
+ workflow.startswith(f"{TRS_ROOT}/workflows/")
53
+ or workflow.startswith(f"{TRS_ROOT}/my-workflows/")
54
+ or workflow.startswith("#workflow/")
55
+ )
56
+
48
57
 
49
58
  def extract_trs_spec(workflow: str) -> str:
50
59
  """
@@ -62,20 +71,21 @@ def extract_trs_spec(workflow: str) -> str:
62
71
  # TODO: We assume the Dockstore page URL structure and the TRS IDs are basically the same.
63
72
  page_path = unquote(parsed.path)
64
73
  if page_path.startswith("/workflows/"):
65
- trs_spec = "#workflow/" + page_path[len("/workflows/"):]
74
+ trs_spec = "#workflow/" + page_path[len("/workflows/") :]
66
75
  elif page_path.startswith("/my-workflows/"):
67
- trs_spec = "#workflow/" + page_path[len("/my-workflows/"):]
76
+ trs_spec = "#workflow/" + page_path[len("/my-workflows/") :]
68
77
  else:
69
78
  raise RuntimeError("Cannot parse Dockstore URL " + workflow)
70
79
  logger.debug("Translated %s to TRS: %s", workflow, trs_spec)
71
80
 
72
81
  return trs_spec
73
82
 
74
- def parse_trs_spec(trs_spec: str) -> tuple[str, Optional[str]]:
83
+
84
+ def parse_trs_spec(trs_spec: str) -> tuple[str, str | None]:
75
85
  """
76
86
  Parse a TRS ID to workflow and optional version.
77
87
  """
78
- parts = trs_spec.split(':', 1)
88
+ parts = trs_spec.split(":", 1)
79
89
  trs_workflow_id = parts[0]
80
90
  if len(parts) > 1:
81
91
  # The ID has the version we want after a colon
@@ -85,14 +95,18 @@ def parse_trs_spec(trs_spec: str) -> tuple[str, Optional[str]]:
85
95
  trs_version = None
86
96
  return trs_workflow_id, trs_version
87
97
 
98
+
88
99
  def compose_trs_spec(trs_workflow_id: str, trs_version: str) -> str:
89
100
  """
90
101
  Compose a TRS ID from a workflow ID and version ID.
91
102
  """
92
103
  return f"{trs_workflow_id}:{trs_version}"
93
104
 
105
+
94
106
  @retry(errors=[requests.exceptions.ConnectionError])
95
- def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> tuple[str, str, str]:
107
+ def find_workflow(
108
+ workflow: str, supported_languages: set[str] | None = None
109
+ ) -> tuple[str, str, str]:
96
110
  """
97
111
  Given a Dockstore URL or TRS identifier, get the root WDL or CWL URL for the workflow, along with the TRS workflow ID and version.
98
112
 
@@ -107,7 +121,7 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
107
121
 
108
122
  :raises FileNotFoundError: if the workflow or version doesn't exist.
109
123
  :raises ValueError: if the version is not specified but cannot be
110
- automatically determined.
124
+ automatically determined.
111
125
  """
112
126
 
113
127
  if supported_languages is not None and len(supported_languages) == 0:
@@ -118,11 +132,18 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
118
132
  # Parse out workflow and possible version
119
133
  trs_workflow_id, trs_version = parse_trs_spec(trs_spec)
120
134
 
121
- logger.debug("TRS %s parses to workflow %s and version %s", trs_spec, trs_workflow_id, trs_version)
135
+ logger.debug(
136
+ "TRS %s parses to workflow %s and version %s",
137
+ trs_spec,
138
+ trs_workflow_id,
139
+ trs_version,
140
+ )
122
141
 
123
142
  # Fetch the main TRS document.
124
143
  # See e.g. https://dockstore.org/api/ga4gh/trs/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker
125
- trs_workflow_url = f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}"
144
+ trs_workflow_url = (
145
+ f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}"
146
+ )
126
147
  logger.debug("Get versions: %s", trs_workflow_url)
127
148
  trs_workflow_response = web_session.get(trs_workflow_url)
128
149
  if trs_workflow_response.status_code in (400, 404):
@@ -168,7 +189,6 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
168
189
  trs_version = next(iter(eligible_workflow_versions))
169
190
  logger.debug("Defaulting to only eligible workflow version %s", trs_version)
170
191
 
171
-
172
192
  # If we don't like what we found we compose a useful error message.
173
193
  problems: list[str] = []
174
194
  problem_type: type[Exception] = RuntimeError
@@ -176,7 +196,9 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
176
196
  problems.append(f"Workflow {workflow} does not specify a version")
177
197
  problem_type = ValueError
178
198
  elif trs_version not in workflow_versions:
179
- problems.append(f"Workflow version {trs_version} from {workflow} does not exist")
199
+ problems.append(
200
+ f"Workflow version {trs_version} from {workflow} does not exist"
201
+ )
180
202
  problem_type = FileNotFoundError
181
203
  elif trs_version not in eligible_workflow_versions:
182
204
  message = f"Workflow version {trs_version} from {workflow} is not available"
@@ -192,24 +214,33 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
192
214
  problems.append(message)
193
215
  problem_type = FileNotFoundError
194
216
  elif trs_version is None:
195
- problems.append(f"Add ':' and the name of a workflow version ({', '.join(eligible_workflow_versions)}) after '{trs_workflow_id}'")
217
+ problems.append(
218
+ f"Add ':' and the name of a workflow version ({', '.join(eligible_workflow_versions)}) after '{trs_workflow_id}'"
219
+ )
196
220
  else:
197
- problems.append(f"Replace '{trs_version}' with one of ({', '.join(eligible_workflow_versions)})")
221
+ problems.append(
222
+ f"Replace '{trs_version}' with one of ({', '.join(eligible_workflow_versions)})"
223
+ )
198
224
  raise problem_type("; ".join(problems))
199
225
 
200
226
  # Tell MyPy we now have a version, or we would have raised
201
227
  assert trs_version is not None
202
228
 
203
229
  # Select the language we will actually run
204
- chosen_version_languages: list[str] = workflow_versions[trs_version]["descriptor_type"]
230
+ chosen_version_languages: list[str] = workflow_versions[trs_version][
231
+ "descriptor_type"
232
+ ]
205
233
  for candidate_language in chosen_version_languages:
206
234
  if supported_languages is None or candidate_language in supported_languages:
207
235
  language = candidate_language
208
236
 
209
- logger.debug("Going to use %s version %s in %s", trs_workflow_id, trs_version, language)
237
+ logger.debug(
238
+ "Going to use %s version %s in %s", trs_workflow_id, trs_version, language
239
+ )
210
240
 
211
241
  return trs_workflow_id, trs_version, language
212
-
242
+
243
+
213
244
  @retry(errors=[requests.exceptions.ConnectionError])
214
245
  def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str:
215
246
  """
@@ -233,22 +264,30 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
233
264
  # That's successful, so we need to handle it specifically. See
234
265
  # <https://github.com/dockstore/dockstore/issues/6048>
235
266
  # We can also get a 400 if the workflow ID is not in Dockstore's expected format (3 slash-separated segments).
236
- raise FileNotFoundError(f"Workflow {trs_workflow_id} version {trs_version} in language {language} does not exist.")
267
+ raise FileNotFoundError(
268
+ f"Workflow {trs_workflow_id} version {trs_version} in language {language} does not exist."
269
+ )
237
270
  trs_files_response.raise_for_status()
238
271
  trs_files_document = trs_files_response.json()
239
272
 
240
273
  # Find the information we need to ID the primary descriptor file
241
- primary_descriptor_path: Optional[str] = None
242
- primary_descriptor_hash_algorithm: Optional[str] = None
243
- primary_descriptor_hash: Optional[str] = None
274
+ primary_descriptor_path: str | None = None
275
+ primary_descriptor_hash_algorithm: str | None = None
276
+ primary_descriptor_hash: str | None = None
244
277
  for file_info in trs_files_document:
245
278
  if file_info["file_type"] == "PRIMARY_DESCRIPTOR":
246
279
  primary_descriptor_path = file_info["path"]
247
280
  primary_descriptor_hash_algorithm = file_info["checksum"]["type"]
248
281
  primary_descriptor_hash = file_info["checksum"]["checksum"]
249
282
  break
250
- if primary_descriptor_path is None or primary_descriptor_hash is None or primary_descriptor_hash_algorithm is None:
251
- raise RuntimeError(f"Could not find a primary descriptor file for workflow {trs_workflow_id} version {trs_version} in language {language}")
283
+ if (
284
+ primary_descriptor_path is None
285
+ or primary_descriptor_hash is None
286
+ or primary_descriptor_hash_algorithm is None
287
+ ):
288
+ raise RuntimeError(
289
+ f"Could not find a primary descriptor file for workflow {trs_workflow_id} version {trs_version} in language {language}"
290
+ )
252
291
  primary_descriptor_basename = os.path.basename(primary_descriptor_path)
253
292
 
254
293
  # Work out how to compute the hash we are looking for. See
@@ -257,16 +296,25 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
257
296
  # for the Python names.
258
297
  #
259
298
  # TODO: We don't support the various truncated hash flavors or the other checksums not in hashlib.
260
- python_hash_name = primary_descriptor_hash_algorithm.replace("sha-", "sha").replace("blake2b-512", "blake2b").replace("-", "_")
299
+ python_hash_name = (
300
+ primary_descriptor_hash_algorithm.replace("sha-", "sha")
301
+ .replace("blake2b-512", "blake2b")
302
+ .replace("-", "_")
303
+ )
261
304
  if python_hash_name not in hashlib.algorithms_available:
262
- raise RuntimeError(f"Primary descriptor is identified by a {primary_descriptor_hash_algorithm} hash but {python_hash_name} is not available in hashlib")
305
+ raise RuntimeError(
306
+ f"Primary descriptor is identified by a {primary_descriptor_hash_algorithm} hash but {python_hash_name} is not available in hashlib"
307
+ )
263
308
 
264
309
  # Figure out where to store the workflow. We don't want to deal with temp
265
310
  # dir cleanup since we don't want to run the whole workflow setup and
266
311
  # execution in a context manager. So we declare a cache.
267
312
  # Note that it's still not safe to symlink out of this cache since XDG
268
313
  # cache directories aren't guaranteed to be on shared storage.
269
- cache_base_dir = os.path.join(os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "toil/workflows")
314
+ cache_base_dir = os.path.join(
315
+ os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")),
316
+ "toil/workflows",
317
+ )
270
318
 
271
319
  # Hash the workflow file list.
272
320
  hasher = hashlib.sha256()
@@ -297,13 +345,17 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
297
345
  "Accept-Encoding": "identity",
298
346
  # Help Dockstore avoid serving ZIP with a JSON content type. See
299
347
  # <https://github.com/dockstore/dockstore/issues/6010>.
300
- "Accept": "application/zip"
348
+ "Accept": "application/zip",
301
349
  }
302
350
  # If we don't set stream=True, we can't actually read anything from the
303
351
  # raw stream, since Requests will have done it already.
304
- with web_session.get(trs_zip_file_url, headers=headers, stream=True) as response:
352
+ with web_session.get(
353
+ trs_zip_file_url, headers=headers, stream=True
354
+ ) as response:
305
355
  response_content_length = response.headers.get("Content-Length")
306
- logger.debug("Server reports content length: %s", response_content_length)
356
+ logger.debug(
357
+ "Server reports content length: %s", response_content_length
358
+ )
307
359
  shutil.copyfileobj(response.raw, zip_file)
308
360
  zip_file.flush()
309
361
 
@@ -323,34 +375,52 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
323
375
  except OSError:
324
376
  # Collision. Someone else installed the workflow before we could.
325
377
  robust_rmtree(workflow_temp_dir)
326
- logger.debug("Workflow cached at %s by someone else while we were donwloading it", cache_workflow_dir)
378
+ logger.debug(
379
+ "Workflow cached at %s by someone else while we were donwloading it",
380
+ cache_workflow_dir,
381
+ )
327
382
 
328
383
  # Hunt throught he directory for a file with the right basename and hash
329
- found_path: Optional[str] = None
384
+ found_path: str | None = None
330
385
  for containing_dir, subdirectories, files in os.walk(cache_workflow_dir):
331
386
  for filename in files:
332
387
  if filename == primary_descriptor_basename:
333
388
  # This could be it. Open the file off disk and hash it with the right algorithm.
334
389
  file_path = os.path.join(containing_dir, filename)
335
- file_hash = file_digest(open(file_path, "rb"), python_hash_name).hexdigest()
390
+ file_hash = file_digest(
391
+ open(file_path, "rb"), python_hash_name
392
+ ).hexdigest()
336
393
  if file_hash == primary_descriptor_hash:
337
394
  # This looks like the right file
338
395
  logger.debug("Found candidate primary descriptor %s", file_path)
339
396
  if found_path is not None:
340
397
  # But there are multiple instances of it so we can't know which to run.
341
398
  # TODO: Find out the right path from Dockstore somehow!
342
- raise RuntimeError(f"Workflow contains multiple files named {primary_descriptor_basename} with {python_hash_name} hash {file_hash}: {found_path} and {file_path}")
399
+ raise RuntimeError(
400
+ f"Workflow contains multiple files named {primary_descriptor_basename} with {python_hash_name} hash {file_hash}: {found_path} and {file_path}"
401
+ )
343
402
  # This is the first file with the right name and hash
344
403
  found_path = file_path
345
404
  else:
346
- logger.debug("Rejected %s because its %s hash %s is not %s", file_path, python_hash_name, file_hash, primary_descriptor_hash)
405
+ logger.debug(
406
+ "Rejected %s because its %s hash %s is not %s",
407
+ file_path,
408
+ python_hash_name,
409
+ file_hash,
410
+ primary_descriptor_hash,
411
+ )
347
412
  if found_path is None:
348
413
  # We couldn't find the promised primary descriptor
349
- raise RuntimeError(f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash} for workflow {trs_workflow_id} version {trs_version} in language {language}")
414
+ raise RuntimeError(
415
+ f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash} for workflow {trs_workflow_id} version {trs_version} in language {language}"
416
+ )
350
417
 
351
418
  return found_path
352
419
 
353
- def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = None) -> tuple[str, Optional[str]]:
420
+
421
+ def resolve_workflow(
422
+ workflow: str, supported_languages: set[str] | None = None
423
+ ) -> tuple[str, str | None]:
354
424
  """
355
425
  Find the real workflow URL or filename from a command line argument.
356
426
 
@@ -370,7 +440,9 @@ def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = No
370
440
 
371
441
  if is_trs_workflow(workflow):
372
442
  # Ask TRS host where to find TRS-looking things
373
- trs_workflow_id, trs_version, language = find_workflow(workflow, supported_languages)
443
+ trs_workflow_id, trs_version, language = find_workflow(
444
+ workflow, supported_languages
445
+ )
374
446
  resolved = fetch_workflow(trs_workflow_id, trs_version, language)
375
447
  logger.info("Resolved TRS workflow %s to %s", workflow, resolved)
376
448
  return resolved, compose_trs_spec(trs_workflow_id, trs_version)
@@ -378,13 +450,3 @@ def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = No
378
450
  # Pass other things through.
379
451
  # TODO: Find out if they have TRS names.
380
452
  return workflow, None
381
-
382
-
383
-
384
-
385
-
386
-
387
-
388
-
389
-
390
-
toil/lib/url.py CHANGED
@@ -12,26 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import logging
15
- import os
16
- from abc import ABC, ABCMeta, abstractmethod
17
- from typing import (
18
- IO,
19
- TYPE_CHECKING,
20
- Any,
21
- Callable,
22
- ContextManager,
23
- Literal,
24
- Optional,
25
- Union,
26
- cast,
27
- overload,
28
- Type,
29
- )
15
+ from abc import abstractmethod
16
+ from typing import IO, cast
30
17
  from urllib.parse import ParseResult, urlparse
31
18
 
32
19
  from toil.lib.exceptions import UnimplementedURLException
33
- from toil.lib.memoize import memoize
34
- from toil.lib.plugins import register_plugin, get_plugin
20
+ from toil.lib.plugins import get_plugin, register_plugin
35
21
 
36
22
  try:
37
23
  from botocore.exceptions import ProxyConnectionError
@@ -40,8 +26,10 @@ except ImportError:
40
26
  class ProxyConnectionError(BaseException): # type: ignore
41
27
  """Dummy class."""
42
28
 
29
+
43
30
  logger = logging.getLogger(__name__)
44
31
 
32
+
45
33
  class URLAccess:
46
34
  """
47
35
  Widget for accessing external storage (URLs).
@@ -62,7 +50,7 @@ class URLAccess:
62
50
  return otherCls._url_exists(parseResult)
63
51
 
64
52
  @classmethod
65
- def get_size(cls, src_uri: str) -> Optional[int]:
53
+ def get_size(cls, src_uri: str) -> int | None:
66
54
  """
67
55
  Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
68
56
 
@@ -147,7 +135,7 @@ class URLAccess:
147
135
 
148
136
  @classmethod
149
137
  @abstractmethod
150
- def _get_size(cls, url: ParseResult) -> Optional[int]:
138
+ def _get_size(cls, url: ParseResult) -> int | None:
151
139
  """
152
140
  Get the size of the object at the given URL, or None if it cannot be obtained.
153
141
  """
@@ -217,7 +205,7 @@ class URLAccess:
217
205
  @abstractmethod
218
206
  def _write_to_url(
219
207
  cls,
220
- readable: Union[IO[bytes], IO[str]],
208
+ readable: IO[bytes] | IO[str],
221
209
  url: ParseResult,
222
210
  executable: bool = False,
223
211
  ) -> None:
@@ -264,9 +252,9 @@ class URLAccess:
264
252
  implementation_factory = get_plugin("url_access", url.scheme.lower())
265
253
  except KeyError:
266
254
  raise UnimplementedURLException(url, "export" if export else "import")
267
-
255
+
268
256
  try:
269
- implementation = cast(Type[URLAccess], implementation_factory())
257
+ implementation = cast(type[URLAccess], implementation_factory())
270
258
  except (ImportError, ProxyConnectionError):
271
259
  logger.debug(
272
260
  "Unable to import implementation for scheme '%s', as is expected if the corresponding extra was "
@@ -279,10 +267,12 @@ class URLAccess:
279
267
  return implementation
280
268
  raise UnimplementedURLException(url, "export" if export else "import")
281
269
 
270
+
282
271
  #####
283
272
  # Built-in url access
284
273
  #####
285
274
 
275
+
286
276
  def file_job_store_factory() -> type[URLAccess]:
287
277
  from toil.jobStores.fileJobStore import FileJobStore
288
278
 
@@ -306,7 +296,8 @@ def job_store_support_factory() -> type[URLAccess]:
306
296
 
307
297
  return JobStoreSupport
308
298
 
309
- #make sure my py still works and the tests work
299
+
300
+ # make sure my py still works and the tests work
310
301
  # can then get rid of _url_access_classes method
311
302
 
312
303
  #####
toil/lib/web.py CHANGED
@@ -23,9 +23,10 @@ user agent.
23
23
  >>> web_session.get(httpserver.url_for("/path"))
24
24
  <Response [200]>
25
25
  """
26
- import requests
27
26
  import sys
28
27
 
28
+ import requests
29
+
29
30
  from toil.version import baseVersion
30
31
 
31
32
  # We manage a Requests session at the module level in case we're supposed to be
@@ -34,4 +35,8 @@ from toil.version import baseVersion
34
35
  # in theory (someone might make a new Toil version first, but there's no way
35
36
  # to know for sure that nobody else did the same thing).
36
37
  web_session = requests.Session()
37
- web_session.headers.update({"User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"})
38
+ web_session.headers.update(
39
+ {
40
+ "User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"
41
+ }
42
+ )