toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/__init__.py CHANGED
@@ -11,22 +11,19 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import errno
15
14
  import logging
16
15
  import os
17
16
  import re
18
17
  import socket
19
18
  import sys
20
- import time
21
19
  from datetime import datetime
22
- from typing import TYPE_CHECKING, Optional, Tuple
20
+ from typing import TYPE_CHECKING, Optional
23
21
 
24
22
  import requests
25
- from pytz import timezone
26
23
 
27
24
  from docker.errors import ImageNotFound
28
25
  from toil.lib.memoize import memoize
29
- from toil.lib.retry import retry
26
+ from toil.lib.retry import retry as retry
30
27
  from toil.version import currentCommit
31
28
 
32
29
  if TYPE_CHECKING:
@@ -44,15 +41,15 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None) -> Optional[str]:
44
41
  `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
45
42
  of os.environ.get("PATH"), or can be overridden with a custom search
46
43
  path.
47
-
44
+
48
45
  :returns: The path found, or None.
49
46
  """
47
+
50
48
  # Check that a given file can be accessed with the correct mode.
51
49
  # Additionally check that `file` is not a directory, as on Windows
52
50
  # directories pass the os.access check.
53
51
  def _access_check(fn, mode):
54
- return (os.path.exists(fn) and os.access(fn, mode)
55
- and not os.path.isdir(fn))
52
+ return os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)
56
53
 
57
54
  # If we're given a path with a directory part, look it up directly rather
58
55
  # than referring to PATH directories. This includes checking relative to the
@@ -107,17 +104,19 @@ def toilPackageDirPath() -> str:
107
104
  The return value is guaranteed to end in '/toil'.
108
105
  """
109
106
  result = os.path.dirname(os.path.realpath(__file__))
110
- if not result.endswith('/toil'):
107
+ if not result.endswith("/toil"):
111
108
  raise RuntimeError("The top-level toil package is not named Toil.")
112
109
  return result
113
110
 
114
111
 
115
112
  def inVirtualEnv() -> bool:
116
113
  """Test if we are inside a virtualenv or Conda virtual environment."""
117
- return ('VIRTUAL_ENV' in os.environ or
118
- 'CONDA_DEFAULT_ENV' in os.environ or
119
- hasattr(sys, 'real_prefix') or
120
- (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix))
114
+ return (
115
+ "VIRTUAL_ENV" in os.environ
116
+ or "CONDA_DEFAULT_ENV" in os.environ
117
+ or hasattr(sys, "real_prefix")
118
+ or (hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix)
119
+ )
121
120
 
122
121
 
123
122
  def resolveEntryPoint(entryPoint: str) -> str:
@@ -126,7 +125,7 @@ def resolveEntryPoint(entryPoint: str) -> str:
126
125
 
127
126
  :returns: The path found, which may be an absolute or a relative path.
128
127
  """
129
- if os.environ.get("TOIL_CHECK_ENV", None) == 'True' and inVirtualEnv():
128
+ if os.environ.get("TOIL_CHECK_ENV", None) == "True" and inVirtualEnv():
130
129
  path = os.path.join(os.path.dirname(sys.executable), entryPoint)
131
130
  # Inside a virtualenv we try to use absolute paths to the entrypoints.
132
131
  if os.path.isfile(path):
@@ -135,7 +134,9 @@ def resolveEntryPoint(entryPoint: str) -> str:
135
134
  # if Toil is installed in a virtualenv on the leader, it must be installed in
136
135
  # a virtualenv located at the same path on each worker as well.
137
136
  if not os.access(path, os.X_OK):
138
- raise RuntimeError("Cannot access the Toil virtualenv. If installed in a virtualenv on a cluster, make sure that the virtualenv path is the same for the leader and workers.")
137
+ raise RuntimeError(
138
+ "Cannot access the Toil virtualenv. If installed in a virtualenv on a cluster, make sure that the virtualenv path is the same for the leader and workers."
139
+ )
139
140
  return path
140
141
  # Otherwise, we aren't in a virtualenv, or we're in a virtualenv but Toil
141
142
  # came in via --system-site-packages, or we think the virtualenv might not
@@ -155,10 +156,15 @@ def physicalMemory() -> int:
155
156
  True
156
157
  """
157
158
  try:
158
- return os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
159
+ return os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
159
160
  except ValueError:
160
161
  import subprocess
161
- return int(subprocess.check_output(['sysctl', '-n', 'hw.memsize']).decode('utf-8').strip())
162
+
163
+ return int(
164
+ subprocess.check_output(["sysctl", "-n", "hw.memsize"])
165
+ .decode("utf-8")
166
+ .strip()
167
+ )
162
168
 
163
169
 
164
170
  def physicalDisk(directory: str) -> int:
@@ -182,15 +188,22 @@ def applianceSelf(forceDockerAppliance: bool = False) -> str:
182
188
  Setting TOIL_APPLIANCE_SELF will not be necessary in most cases.
183
189
  """
184
190
  import toil.version
185
- registry = lookupEnvVar(name='docker registry',
186
- envName='TOIL_DOCKER_REGISTRY',
187
- defaultValue=toil.version.dockerRegistry)
188
- name = lookupEnvVar(name='docker name',
189
- envName='TOIL_DOCKER_NAME',
190
- defaultValue=toil.version.dockerName)
191
- appliance = lookupEnvVar(name='docker appliance',
192
- envName='TOIL_APPLIANCE_SELF',
193
- defaultValue=registry + '/' + name + ':' + toil.version.dockerTag)
191
+
192
+ registry = lookupEnvVar(
193
+ name="docker registry",
194
+ envName="TOIL_DOCKER_REGISTRY",
195
+ defaultValue=toil.version.dockerRegistry,
196
+ )
197
+ name = lookupEnvVar(
198
+ name="docker name",
199
+ envName="TOIL_DOCKER_NAME",
200
+ defaultValue=toil.version.dockerName,
201
+ )
202
+ appliance = lookupEnvVar(
203
+ name="docker appliance",
204
+ envName="TOIL_APPLIANCE_SELF",
205
+ defaultValue=registry + "/" + name + ":" + toil.version.dockerTag,
206
+ )
194
207
 
195
208
  checkDockerSchema(appliance)
196
209
 
@@ -210,11 +223,13 @@ def customDockerInitCmd() -> str:
210
223
  private docker registries). Any single quotes are escaped and the command cannot contain a
211
224
  set of blacklisted chars (newline or tab).
212
225
 
213
- :returns: The custom commmand, or an empty string is returned if the environment variable is not set.
226
+ :returns: The custom command, or an empty string is returned if the environment variable is not set.
214
227
  """
215
- command = lookupEnvVar(name='user-defined custom docker init command',
216
- envName='TOIL_CUSTOM_DOCKER_INIT_COMMAND',
217
- defaultValue='')
228
+ command = lookupEnvVar(
229
+ name="user-defined custom docker init command",
230
+ envName="TOIL_CUSTOM_DOCKER_INIT_COMMAND",
231
+ defaultValue="",
232
+ )
218
233
  _check_custom_bash_cmd(command)
219
234
  return command.replace("'", "'\\''") # Ensure any single quotes are escaped.
220
235
 
@@ -225,24 +240,28 @@ def customInitCmd() -> str:
225
240
 
226
241
  The custom init command is run prior to running Toil appliance itself in workers and/or the
227
242
  primary node (i.e. this is run one stage before ``TOIL_CUSTOM_DOCKER_INIT_COMMAND``).
228
-
243
+
229
244
  This can be useful for doing any custom initialization on instances (e.g. authenticating to
230
245
  private docker registries). Any single quotes are escaped and the command cannot contain a
231
246
  set of blacklisted chars (newline or tab).
232
247
 
233
248
  returns: the custom command or n empty string is returned if the environment variable is not set.
234
249
  """
235
- command = lookupEnvVar(name='user-defined custom init command',
236
- envName='TOIL_CUSTOM_INIT_COMMAND',
237
- defaultValue='')
250
+ command = lookupEnvVar(
251
+ name="user-defined custom init command",
252
+ envName="TOIL_CUSTOM_INIT_COMMAND",
253
+ defaultValue="",
254
+ )
238
255
  _check_custom_bash_cmd(command)
239
256
  return command.replace("'", "'\\''") # Ensure any single quotes are escaped.
240
257
 
241
258
 
242
259
  def _check_custom_bash_cmd(cmd_str):
243
260
  """Ensure that the Bash command doesn't contain invalid characters."""
244
- if re.search(r'[\n\r\t]', cmd_str):
245
- raise RuntimeError(f'"{cmd_str}" contains invalid characters (newline and/or tab).')
261
+ if re.search(r"[\n\r\t]", cmd_str):
262
+ raise RuntimeError(
263
+ f'"{cmd_str}" contains invalid characters (newline and/or tab).'
264
+ )
246
265
 
247
266
 
248
267
  def lookupEnvVar(name: str, envName: str, defaultValue: str) -> str:
@@ -257,10 +276,14 @@ def lookupEnvVar(name: str, envName: str, defaultValue: str) -> str:
257
276
  try:
258
277
  value = os.environ[envName]
259
278
  except KeyError:
260
- log.info('Using default %s of %s as %s is not set.', name, defaultValue, envName)
279
+ log.info(
280
+ "Using default %s of %s as %s is not set.", name, defaultValue, envName
281
+ )
261
282
  return defaultValue
262
283
  else:
263
- log.info('Overriding %s of %s with %s from %s.', name, defaultValue, value, envName)
284
+ log.info(
285
+ "Overriding %s of %s with %s from %s.", name, defaultValue, value, envName
286
+ )
264
287
  return value
265
288
 
266
289
 
@@ -279,14 +302,20 @@ def checkDockerImageExists(appliance: str) -> str:
279
302
  return appliance
280
303
  registryName, imageName, tag = parseDockerAppliance(appliance)
281
304
 
282
- if registryName == 'docker.io':
283
- return requestCheckDockerIo(origAppliance=appliance, imageName=imageName, tag=tag)
305
+ if registryName == "docker.io":
306
+ return requestCheckDockerIo(
307
+ origAppliance=appliance, imageName=imageName, tag=tag
308
+ )
284
309
  else:
285
- return requestCheckRegularDocker(origAppliance=appliance, registryName=registryName, imageName=imageName,
286
- tag=tag)
310
+ return requestCheckRegularDocker(
311
+ origAppliance=appliance,
312
+ registryName=registryName,
313
+ imageName=imageName,
314
+ tag=tag,
315
+ )
287
316
 
288
317
 
289
- def parseDockerAppliance(appliance: str) -> Tuple[str, str, str]:
318
+ def parseDockerAppliance(appliance: str) -> tuple[str, str, str]:
290
319
  """
291
320
  Derive parsed registry, image reference, and tag from a docker image string.
292
321
 
@@ -304,21 +333,21 @@ def parseDockerAppliance(appliance: str) -> Tuple[str, str, str]:
304
333
  appliance = appliance.lower()
305
334
 
306
335
  # get the tag
307
- if ':' in appliance:
308
- tag = appliance.split(':')[-1]
309
- appliance = appliance[:-(len(':' + tag))] # remove only the tag
336
+ if ":" in appliance:
337
+ tag = appliance.split(":")[-1]
338
+ appliance = appliance[: -(len(":" + tag))] # remove only the tag
310
339
  else:
311
340
  # default to 'latest' if no tag is specified
312
- tag = 'latest'
341
+ tag = "latest"
313
342
 
314
343
  # get the registry and image
315
- registryName = 'docker.io' # default if not specified
344
+ registryName = "docker.io" # default if not specified
316
345
  imageName = appliance # will be true if not specified
317
- if '/' in appliance and '.' in appliance.split('/')[0]:
318
- registryName = appliance.split('/')[0]
319
- imageName = appliance[len(registryName):]
320
- registryName = registryName.strip('/')
321
- imageName = imageName.strip('/')
346
+ if "/" in appliance and "." in appliance.split("/")[0]:
347
+ registryName = appliance.split("/")[0]
348
+ imageName = appliance[len(registryName) :]
349
+ registryName = registryName.strip("/")
350
+ imageName = imageName.strip("/")
322
351
 
323
352
  return registryName, imageName, tag
324
353
 
@@ -326,12 +355,14 @@ def parseDockerAppliance(appliance: str) -> Tuple[str, str, str]:
326
355
  def checkDockerSchema(appliance):
327
356
  if not appliance:
328
357
  raise ImageNotFound("No docker image specified.")
329
- elif '://' in appliance:
330
- raise ImageNotFound("Docker images cannot contain a schema (such as '://'): %s"
331
- "" % appliance)
358
+ elif "://" in appliance:
359
+ raise ImageNotFound(
360
+ "Docker images cannot contain a schema (such as '://'): %s" "" % appliance
361
+ )
332
362
  elif len(appliance) > 256:
333
- raise ImageNotFound("Docker image must be less than 256 chars: %s"
334
- "" % appliance)
363
+ raise ImageNotFound(
364
+ "Docker image must be less than 256 chars: %s" "" % appliance
365
+ )
335
366
 
336
367
 
337
368
  class ApplianceImageNotFound(ImageNotFound):
@@ -346,22 +377,28 @@ class ApplianceImageNotFound(ImageNotFound):
346
377
  """
347
378
 
348
379
  def __init__(self, origAppliance, url, statusCode):
349
- msg = ("The docker image that TOIL_APPLIANCE_SELF specifies (%s) produced "
350
- "a nonfunctional manifest URL (%s). The HTTP status returned was %s. "
351
- "The specifier is most likely unsupported or malformed. "
352
- "Please supply a docker image with the format: "
353
- "'<websitehost>.io/<repo_path>:<tag>' or '<repo_path>:<tag>' "
354
- "(for official docker.io images). Examples: "
355
- "'quay.io/ucsc_cgl/toil:latest', 'ubuntu:latest', or "
356
- "'broadinstitute/genomes-in-the-cloud:2.0.0'."
357
- "" % (origAppliance, url, str(statusCode)))
380
+ msg = (
381
+ "The docker image that TOIL_APPLIANCE_SELF specifies (%s) produced "
382
+ "a nonfunctional manifest URL (%s). The HTTP status returned was %s. "
383
+ "The specifier is most likely unsupported or malformed. "
384
+ "Please supply a docker image with the format: "
385
+ "'<websitehost>.io/<repo_path>:<tag>' or '<repo_path>:<tag>' "
386
+ "(for official docker.io images). Examples: "
387
+ "'quay.io/ucsc_cgl/toil:latest', 'ubuntu:latest', or "
388
+ "'broadinstitute/genomes-in-the-cloud:2.0.0'."
389
+ "" % (origAppliance, url, str(statusCode))
390
+ )
358
391
  super().__init__(msg)
359
392
 
393
+
360
394
  # Cache images we know exist so we don't have to ask the registry about them
361
395
  # all the time.
362
396
  KNOWN_EXTANT_IMAGES = set()
363
397
 
364
- def requestCheckRegularDocker(origAppliance: str, registryName: str, imageName: str, tag: str) -> bool:
398
+
399
+ def requestCheckRegularDocker(
400
+ origAppliance: str, registryName: str, imageName: str, tag: str
401
+ ) -> bool:
365
402
  """
366
403
  Check if an image exists using the requests library.
367
404
 
@@ -385,8 +422,9 @@ def requestCheckRegularDocker(origAppliance: str, registryName: str, imageName:
385
422
  # Check the cache first
386
423
  return origAppliance
387
424
 
388
- ioURL = 'https://{webhost}/v2/{pathName}/manifests/{tag}' \
389
- ''.format(webhost=registryName, pathName=imageName, tag=tag)
425
+ ioURL = "https://{webhost}/v2/{pathName}/manifests/{tag}" "".format(
426
+ webhost=registryName, pathName=imageName, tag=tag
427
+ )
390
428
  response = requests.head(ioURL)
391
429
  if not response.ok:
392
430
  raise ApplianceImageNotFound(origAppliance, ioURL, response.status_code)
@@ -413,17 +451,20 @@ def requestCheckDockerIo(origAppliance: str, imageName: str, tag: str) -> bool:
413
451
  return origAppliance
414
452
 
415
453
  # only official images like 'busybox' or 'ubuntu'
416
- if '/' not in imageName:
417
- imageName = 'library/' + imageName
454
+ if "/" not in imageName:
455
+ imageName = "library/" + imageName
418
456
 
419
- token_url = 'https://auth.docker.io/token?service=registry.docker.io&scope=repository:{repo}:pull'.format(
420
- repo=imageName)
421
- requests_url = f'https://registry-1.docker.io/v2/{imageName}/manifests/{tag}'
457
+ token_url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:{repo}:pull".format(
458
+ repo=imageName
459
+ )
460
+ requests_url = f"https://registry-1.docker.io/v2/{imageName}/manifests/{tag}"
422
461
 
423
462
  token = requests.get(token_url)
424
463
  jsonToken = token.json()
425
464
  bearer = jsonToken["token"]
426
- response = requests.head(requests_url, headers={'Authorization': f'Bearer {bearer}'})
465
+ response = requests.head(
466
+ requests_url, headers={"Authorization": f"Bearer {bearer}"}
467
+ )
427
468
  if not response.ok:
428
469
  raise ApplianceImageNotFound(origAppliance, requests_url, response.status_code)
429
470
  else:
@@ -435,22 +476,18 @@ def logProcessContext(config: "Config") -> None:
435
476
  # toil.version.version (string) cannot be imported at top level because it conflicts with
436
477
  # toil.version (module) and Sphinx doesn't like that.
437
478
  from toil.version import version
479
+
438
480
  log.info("Running Toil version %s on host %s.", version, socket.gethostname())
439
481
  log.debug("Configuration: %s", config.__dict__)
440
482
 
441
483
 
442
484
  try:
443
- from boto import provider
444
- from botocore.credentials import (JSONFileCache,
445
- RefreshableCredentials,
446
- create_credential_resolver)
447
- from botocore.session import Session
448
-
449
- cache_path = '~/.cache/aws/cached_temporary_credentials'
450
- datetime_format = "%Y-%m-%dT%H:%M:%SZ" # incidentally the same as the format used by AWS
485
+ cache_path = "~/.cache/aws/cached_temporary_credentials"
486
+ datetime_format = (
487
+ "%Y-%m-%dT%H:%M:%SZ" # incidentally the same as the format used by AWS
488
+ )
451
489
  log = logging.getLogger(__name__)
452
490
 
453
-
454
491
  # But in addition to our manual cache, we also are going to turn on boto3's
455
492
  # new built-in caching layer.
456
493
 
@@ -463,7 +500,6 @@ try:
463
500
  """
464
501
  return dt.strftime(datetime_format)
465
502
 
466
-
467
503
  def str_to_datetime(s):
468
504
  """
469
505
  Convert a string, explicitly UTC into a naive (implicitly UTC) datetime object.
@@ -477,234 +513,5 @@ try:
477
513
  """
478
514
  return datetime.strptime(s, datetime_format)
479
515
 
480
-
481
- class BotoCredentialAdapter(provider.Provider):
482
- """
483
- Boto 2 Adapter to use AWS credentials obtained via Boto 3's credential finding logic.
484
-
485
- This allows for automatic role assumption
486
- respecting the Boto 3 config files, even when parts of the app still use
487
- Boto 2.
488
-
489
- This class also handles caching credentials in multi-process environments
490
- to avoid loads of processes swamping the EC2 metadata service.
491
- """
492
-
493
- # TODO: We take kwargs because new boto2 versions have an 'anon'
494
- # argument and we want to be future proof
495
-
496
- def __init__(self, name, access_key=None, secret_key=None,
497
- security_token=None, profile_name=None, **kwargs):
498
- """Create a new BotoCredentialAdapter."""
499
- # TODO: We take kwargs because new boto2 versions have an 'anon'
500
- # argument and we want to be future proof
501
-
502
- if (name == 'aws' or name is None) and access_key is None and not kwargs.get('anon', False):
503
- # We are on AWS and we don't have credentials passed along and we aren't anonymous.
504
- # We will backend into a boto3 resolver for getting credentials.
505
- # Make sure to enable boto3's own caching, so we can share that
506
- # cache with pure boto3 code elsewhere in Toil.
507
- # Keep synced with toil.lib.aws.session.establish_boto3_session
508
- self._boto3_resolver = create_credential_resolver(Session(profile=profile_name), cache=JSONFileCache())
509
- else:
510
- # We will use the normal flow
511
- self._boto3_resolver = None
512
-
513
- # Pass along all the arguments
514
- super().__init__(name, access_key=access_key,
515
- secret_key=secret_key, security_token=security_token,
516
- profile_name=profile_name, **kwargs)
517
-
518
- def get_credentials(self, access_key=None, secret_key=None, security_token=None, profile_name=None):
519
- """
520
- Make sure our credential fields are populated.
521
-
522
- Called by the base class constructor.
523
- """
524
- if self._boto3_resolver is not None:
525
- # Go get the credentials from the cache, or from boto3 if not cached.
526
- # We need to be eager here; having the default None
527
- # _credential_expiry_time makes the accessors never try to refresh.
528
- self._obtain_credentials_from_cache_or_boto3()
529
- else:
530
- # We're not on AWS, or they passed a key, or we're anonymous.
531
- # Use the normal route; our credentials shouldn't expire.
532
- super().get_credentials(access_key=access_key,
533
- secret_key=secret_key, security_token=security_token,
534
- profile_name=profile_name)
535
-
536
- def _populate_keys_from_metadata_server(self):
537
- """
538
- Hack to catch _credential_expiry_time being too soon and refresh the credentials.
539
-
540
- This override is misnamed; it's actually the only hook we have to catch
541
- _credential_expiry_time being too soon and refresh the credentials. We
542
- actually just go back and poke the cache to see if it feels like
543
- getting us new credentials.
544
-
545
- Boto 2 hardcodes a refresh within 5 minutes of expiry:
546
- https://github.com/boto/boto/blob/591911db1029f2fbb8ba1842bfcc514159b37b32/boto/provider.py#L247
547
-
548
- Boto 3 wants to refresh 15 or 10 minutes before expiry:
549
- https://github.com/boto/botocore/blob/8d3ea0e61473fba43774eb3c74e1b22995ee7370/botocore/credentials.py#L279
550
-
551
- So if we ever want to refresh, Boto 3 wants to refresh too.
552
- """
553
- # This should only happen if we have expiring credentials, which we should only get from boto3
554
- if self._boto3_resolver is None:
555
- raise RuntimeError("The Boto3 resolver should not be None.")
556
-
557
- self._obtain_credentials_from_cache_or_boto3()
558
-
559
- @retry()
560
- def _obtain_credentials_from_boto3(self):
561
- """
562
- Fill our credential fields from Boto 3.
563
-
564
- We know the current cached credentials are not good, and that we
565
- need to get them from Boto 3. Fill in our credential fields
566
- (_access_key, _secret_key, _security_token,
567
- _credential_expiry_time) from Boto 3.
568
- """
569
- # We get a Credentials object
570
- # <https://github.com/boto/botocore/blob/8d3ea0e61473fba43774eb3c74e1b22995ee7370/botocore/credentials.py#L227>
571
- # or a RefreshableCredentials, or None on failure.
572
- creds = self._boto3_resolver.load_credentials()
573
-
574
- if creds is None:
575
- try:
576
- resolvers = str(self._boto3_resolver.providers)
577
- except:
578
- resolvers = "(Resolvers unavailable)"
579
- raise RuntimeError("Could not obtain AWS credentials from Boto3. Resolvers tried: " + resolvers)
580
-
581
- # Make sure the credentials actually has some credentials if it is lazy
582
- creds.get_frozen_credentials()
583
-
584
- # Get when the credentials will expire, if ever
585
- if isinstance(creds, RefreshableCredentials):
586
- # Credentials may expire.
587
- # Get a naive UTC datetime like boto 2 uses from the boto 3 time.
588
- self._credential_expiry_time = creds._expiry_time.astimezone(timezone('UTC')).replace(tzinfo=None)
589
- else:
590
- # Credentials never expire
591
- self._credential_expiry_time = None
592
-
593
- # Then, atomically get all the credentials bits. They may be newer than we think they are, but never older.
594
- frozen = creds.get_frozen_credentials()
595
-
596
- # Copy them into us
597
- self._access_key = frozen.access_key
598
- self._secret_key = frozen.secret_key
599
- self._security_token = frozen.token
600
-
601
- def _obtain_credentials_from_cache_or_boto3(self):
602
- """
603
- Get the cached credentials.
604
-
605
- Or retrieve them from Boto 3 and cache them
606
- (or wait for another cooperating process to do so) if they are missing
607
- or not fresh enough.
608
- """
609
- cache_path = '~/.cache/aws/cached_temporary_credentials'
610
- path = os.path.expanduser(cache_path)
611
- tmp_path = path + '.tmp'
612
- while True:
613
- log.debug('Attempting to read cached credentials from %s.', path)
614
- try:
615
- with open(path) as f:
616
- content = f.read()
617
- if content:
618
- record = content.split('\n')
619
- if len(record) != 4:
620
- raise RuntimeError("Number of cached credentials is not 4.")
621
- self._access_key = record[0]
622
- self._secret_key = record[1]
623
- self._security_token = record[2]
624
- self._credential_expiry_time = str_to_datetime(record[3])
625
- else:
626
- log.debug('%s is empty. Credentials are not temporary.', path)
627
- self._obtain_credentials_from_boto3()
628
- return
629
- except OSError as e:
630
- if e.errno == errno.ENOENT:
631
- log.debug('Cached credentials are missing.')
632
- dir_path = os.path.dirname(path)
633
- if not os.path.exists(dir_path):
634
- log.debug('Creating parent directory %s', dir_path)
635
- try:
636
- # A race would be ok at this point
637
- os.makedirs(dir_path, exist_ok=True)
638
- except OSError as e2:
639
- if e2.errno == errno.EROFS:
640
- # Sometimes we don't actually have write access to ~.
641
- # We may be running in a non-writable Toil container.
642
- # We should just go get our own credentials
643
- log.debug('Cannot use the credentials cache because we are working on a read-only filesystem.')
644
- self._obtain_credentials_from_boto3()
645
- else:
646
- raise
647
- else:
648
- raise
649
- else:
650
- if self._credentials_need_refresh():
651
- log.debug('Cached credentials are expired.')
652
- else:
653
- log.debug('Cached credentials exist and are still fresh.')
654
- return
655
- # We get here if credentials are missing or expired
656
- log.debug('Racing to create %s.', tmp_path)
657
- # Only one process, the winner, will succeed
658
- try:
659
- fd = os.open(tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
660
- except OSError as e:
661
- if e.errno == errno.EEXIST:
662
- log.debug('Lost the race to create %s. Waiting on winner to remove it.', tmp_path)
663
- while os.path.exists(tmp_path):
664
- time.sleep(0.1)
665
- log.debug('Winner removed %s. Trying from the top.', tmp_path)
666
- else:
667
- raise
668
- else:
669
- try:
670
- log.debug('Won the race to create %s. Requesting credentials from backend.', tmp_path)
671
- self._obtain_credentials_from_boto3()
672
- except:
673
- os.close(fd)
674
- fd = None
675
- log.debug('Failed to obtain credentials, removing %s.', tmp_path)
676
- # This unblocks the losers.
677
- os.unlink(tmp_path)
678
- # Bail out. It's too likely to happen repeatedly
679
- raise
680
- else:
681
- if self._credential_expiry_time is None:
682
- os.close(fd)
683
- fd = None
684
- log.debug('Credentials are not temporary. Leaving %s empty and renaming it to %s.',
685
- tmp_path, path)
686
- # No need to actually cache permanent credentials,
687
- # because we know we aren't getting them from the
688
- # metadata server or by assuming a role. Those both
689
- # give temporary credentials.
690
- else:
691
- log.debug('Writing credentials to %s.', tmp_path)
692
- with os.fdopen(fd, 'w') as fh:
693
- fd = None
694
- fh.write('\n'.join([
695
- self._access_key,
696
- self._secret_key,
697
- self._security_token,
698
- datetime_to_str(self._credential_expiry_time)]))
699
- log.debug('Wrote credentials to %s. Renaming to %s.', tmp_path, path)
700
- os.rename(tmp_path, path)
701
- return
702
- finally:
703
- if fd is not None:
704
- os.close(fd)
705
-
706
-
707
- provider.Provider = BotoCredentialAdapter
708
-
709
516
  except ImportError:
710
517
  pass
@@ -18,6 +18,7 @@ class DeadlockException(Exception):
18
18
  Exception thrown by the Leader or BatchSystem when a deadlock is encountered due to insufficient
19
19
  resources to run the workflow
20
20
  """
21
+
21
22
  def __init__(self, msg):
22
23
  self.msg = f"Deadlock encountered: {msg}"
23
24
  super().__init__()