toil 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +17 -22
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.1.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/lib/dockstore.py
CHANGED
|
@@ -18,23 +18,19 @@ Contains functions for integrating Toil with UCSC Dockstore, for reporting metri
|
|
|
18
18
|
For basic TRS functionality for fetching workflows, see trs.py.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
import datetime
|
|
22
21
|
import logging
|
|
23
22
|
import math
|
|
24
23
|
import os
|
|
25
24
|
import re
|
|
26
25
|
import sys
|
|
27
|
-
import
|
|
28
|
-
from
|
|
26
|
+
from typing import Any, Literal, TypedDict, Union
|
|
27
|
+
from urllib.parse import quote
|
|
29
28
|
|
|
30
|
-
from urllib.parse import urlparse, unquote, quote
|
|
31
29
|
import requests
|
|
32
30
|
|
|
33
|
-
from toil.lib.misc import
|
|
31
|
+
from toil.lib.misc import seconds_to_duration, unix_seconds_to_timestamp
|
|
34
32
|
from toil.lib.trs import TRS_ROOT
|
|
35
|
-
from toil.lib.retry import retry
|
|
36
33
|
from toil.lib.web import web_session
|
|
37
|
-
from toil.version import baseVersion
|
|
38
34
|
|
|
39
35
|
if sys.version_info < (3, 11):
|
|
40
36
|
from typing_extensions import NotRequired
|
|
@@ -47,7 +43,9 @@ logger = logging.getLogger(__name__)
|
|
|
47
43
|
|
|
48
44
|
# This is a publish-able token for production Dockstore for Toil to use.
|
|
49
45
|
# This is NOT a secret value.
|
|
50
|
-
DEFAULT_DOCKSTORE_TOKEN =
|
|
46
|
+
DEFAULT_DOCKSTORE_TOKEN = (
|
|
47
|
+
"2bff46294daddef6df185452b04db6143ea8a59f52ee3c325d3e1df418511b7d"
|
|
48
|
+
)
|
|
51
49
|
|
|
52
50
|
# How should we authenticate our Dockstore requests?
|
|
53
51
|
DOCKSTORE_TOKEN = os.environ.get("TOIL_DOCKSTORE_TOKEN", DEFAULT_DOCKSTORE_TOKEN)
|
|
@@ -59,7 +57,15 @@ DOCKSTORE_PLATFORM = "TOIL"
|
|
|
59
57
|
# This is a https://schema.org/CompletedActionStatus
|
|
60
58
|
# The values here are from expanding the type info in the Docksotre docs at
|
|
61
59
|
# <https://dockstore.org/api/static/swagger-ui/index.html#/extendedGA4GH/executionMetricsPost>
|
|
62
|
-
ExecutionStatus = Union[
|
|
60
|
+
ExecutionStatus = Union[
|
|
61
|
+
Literal["ALL"],
|
|
62
|
+
Literal["SUCCESSFUL"],
|
|
63
|
+
Literal["FAILED"],
|
|
64
|
+
Literal["FAILED_SEMANTIC_INVALID"],
|
|
65
|
+
Literal["FAILED_RUNTIME_INVALID"],
|
|
66
|
+
Literal["ABORTED"],
|
|
67
|
+
]
|
|
68
|
+
|
|
63
69
|
|
|
64
70
|
class Cost(TypedDict):
|
|
65
71
|
"""
|
|
@@ -71,6 +77,7 @@ class Cost(TypedDict):
|
|
|
71
77
|
Cost in US Dollars.
|
|
72
78
|
"""
|
|
73
79
|
|
|
80
|
+
|
|
74
81
|
class RunExecution(TypedDict):
|
|
75
82
|
"""
|
|
76
83
|
Dockstore metrics data for a workflow or task run.
|
|
@@ -83,7 +90,7 @@ class RunExecution(TypedDict):
|
|
|
83
90
|
|
|
84
91
|
dateExecuted: str
|
|
85
92
|
"""
|
|
86
|
-
ISO 8601 UTC timestamp when the execution
|
|
93
|
+
ISO 8601 UTC timestamp when the execution happened.
|
|
87
94
|
"""
|
|
88
95
|
|
|
89
96
|
executionStatus: ExecutionStatus
|
|
@@ -125,6 +132,7 @@ class RunExecution(TypedDict):
|
|
|
125
132
|
Dockstore can take any JSON-able structured data.
|
|
126
133
|
"""
|
|
127
134
|
|
|
135
|
+
|
|
128
136
|
class TaskExecutions(TypedDict):
|
|
129
137
|
"""
|
|
130
138
|
Dockstore metrics data for all the tasks in a workflow.
|
|
@@ -140,7 +148,7 @@ class TaskExecutions(TypedDict):
|
|
|
140
148
|
|
|
141
149
|
dateExecuted: str
|
|
142
150
|
"""
|
|
143
|
-
ISO 8601 UTC timestamp when the execution
|
|
151
|
+
ISO 8601 UTC timestamp when the execution happened.
|
|
144
152
|
"""
|
|
145
153
|
|
|
146
154
|
taskExecutions: list[RunExecution]
|
|
@@ -155,6 +163,7 @@ class TaskExecutions(TypedDict):
|
|
|
155
163
|
Dockstore can take any JSON-able structured data.
|
|
156
164
|
"""
|
|
157
165
|
|
|
166
|
+
|
|
158
167
|
def ensure_valid_id(execution_id: str) -> None:
|
|
159
168
|
"""
|
|
160
169
|
Make sure the given execution ID is in Dockstore format and will be accepted by Dockstore.
|
|
@@ -173,29 +182,30 @@ def ensure_valid_id(execution_id: str) -> None:
|
|
|
173
182
|
if not re.fullmatch("[a-zA-Z0-9_]+", execution_id):
|
|
174
183
|
raise ValueError("Execution ID must be alphanumeric with internal underscores")
|
|
175
184
|
|
|
185
|
+
|
|
176
186
|
def pack_workflow_metrics(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
execution_id: str,
|
|
188
|
+
start_time: float,
|
|
189
|
+
runtime: float,
|
|
190
|
+
succeeded: bool,
|
|
191
|
+
job_store_type: str | None = None,
|
|
192
|
+
batch_system: str | None = None,
|
|
193
|
+
caching: bool | None = None,
|
|
194
|
+
toil_version: str | None = None,
|
|
195
|
+
python_version: str | None = None,
|
|
196
|
+
platform_system: str | None = None,
|
|
197
|
+
platform_machine: str | None = None,
|
|
198
|
+
) -> RunExecution:
|
|
189
199
|
"""
|
|
190
200
|
Pack up per-workflow metrics into a format that can be submitted to Dockstore.
|
|
191
201
|
|
|
192
202
|
:param execution_id: Unique ID for the workflow execution. Must be in
|
|
193
|
-
Dockstore format.
|
|
203
|
+
Dockstore format.
|
|
194
204
|
:param start_time: Execution start time in seconds since the Unix epoch.
|
|
195
205
|
:param rutime: Execution duration in seconds.
|
|
196
206
|
:param jobstore_type: Kind of job store used, like "file" or "aws".
|
|
197
207
|
:param batch_system: Python class name implementing the batch system used.
|
|
198
|
-
:param caching: Whether Toil filestore-level
|
|
208
|
+
:param caching: Whether Toil filestore-level caching was used.
|
|
199
209
|
:param toil_version: Version of Toil used (without any Git hash).
|
|
200
210
|
:param python_version: Version of Python used.
|
|
201
211
|
:param platform_system: Operating system type (like "Darwin" or "Linux").
|
|
@@ -210,7 +220,7 @@ def pack_workflow_metrics(
|
|
|
210
220
|
executionId=execution_id,
|
|
211
221
|
dateExecuted=unix_seconds_to_timestamp(start_time),
|
|
212
222
|
executionTime=seconds_to_duration(runtime),
|
|
213
|
-
executionStatus="SUCCESSFUL" if succeeded else "FAILED"
|
|
223
|
+
executionStatus="SUCCESSFUL" if succeeded else "FAILED",
|
|
214
224
|
)
|
|
215
225
|
|
|
216
226
|
# TODO: Just use kwargs here?
|
|
@@ -242,22 +252,23 @@ def pack_workflow_metrics(
|
|
|
242
252
|
|
|
243
253
|
return result
|
|
244
254
|
|
|
255
|
+
|
|
245
256
|
def pack_single_task_metrics(
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
257
|
+
execution_id: str,
|
|
258
|
+
start_time: float,
|
|
259
|
+
runtime: float,
|
|
260
|
+
succeeded: bool,
|
|
261
|
+
job_name: str | None = None,
|
|
262
|
+
cores: float | None = None,
|
|
263
|
+
cpu_seconds: float | None = None,
|
|
264
|
+
memory_bytes: int | None = None,
|
|
265
|
+
disk_bytes: int | None = None,
|
|
266
|
+
) -> RunExecution:
|
|
256
267
|
"""
|
|
257
268
|
Pack up metrics for a single task execution in a format that can be used in a Dockstore submission.
|
|
258
269
|
|
|
259
270
|
:param execution_id: Unique ID for the workflow execution. Must be in
|
|
260
|
-
Dockstore format.
|
|
271
|
+
Dockstore format.
|
|
261
272
|
:param start_time: Execution start time in seconds since the Unix epoch.
|
|
262
273
|
:param rutime: Execution duration in seconds.
|
|
263
274
|
:param succeeded: Whether the execution succeeded.
|
|
@@ -278,7 +289,7 @@ def pack_single_task_metrics(
|
|
|
278
289
|
executionId=execution_id,
|
|
279
290
|
dateExecuted=unix_seconds_to_timestamp(start_time),
|
|
280
291
|
executionTime=seconds_to_duration(runtime),
|
|
281
|
-
executionStatus="SUCCESSFUL" if succeeded else "FAILED"
|
|
292
|
+
executionStatus="SUCCESSFUL" if succeeded else "FAILED",
|
|
282
293
|
)
|
|
283
294
|
|
|
284
295
|
if memory_bytes is not None:
|
|
@@ -310,12 +321,14 @@ def pack_single_task_metrics(
|
|
|
310
321
|
return result
|
|
311
322
|
|
|
312
323
|
|
|
313
|
-
def pack_workflow_task_set_metrics(
|
|
324
|
+
def pack_workflow_task_set_metrics(
|
|
325
|
+
execution_id: str, start_time: float, tasks: list[RunExecution]
|
|
326
|
+
) -> TaskExecutions:
|
|
314
327
|
"""
|
|
315
328
|
Pack up metrics for all the tasks in a workflow execution into a format that can be submitted to Dockstore.
|
|
316
|
-
|
|
329
|
+
|
|
317
330
|
:param execution_id: Unique ID for the workflow execution. Must be in
|
|
318
|
-
Dockstore format.
|
|
331
|
+
Dockstore format.
|
|
319
332
|
:param start_time: Execution start time for the overall workflow execution
|
|
320
333
|
in seconds since the Unix epoch.
|
|
321
334
|
:param tasks: Packed tasks from pack_single_task_metrics()
|
|
@@ -327,10 +340,16 @@ def pack_workflow_task_set_metrics(execution_id: str, start_time: float, tasks:
|
|
|
327
340
|
return TaskExecutions(
|
|
328
341
|
executionId=execution_id,
|
|
329
342
|
dateExecuted=unix_seconds_to_timestamp(start_time),
|
|
330
|
-
taskExecutions=tasks
|
|
343
|
+
taskExecutions=tasks,
|
|
331
344
|
)
|
|
332
345
|
|
|
333
|
-
|
|
346
|
+
|
|
347
|
+
def send_metrics(
|
|
348
|
+
trs_workflow_id: str,
|
|
349
|
+
trs_version: str,
|
|
350
|
+
workflow_runs: list[RunExecution],
|
|
351
|
+
workflow_task_sets: list[TaskExecutions],
|
|
352
|
+
) -> None:
|
|
334
353
|
"""
|
|
335
354
|
Send packed workflow and/or task metrics to Dockstore.
|
|
336
355
|
|
|
@@ -340,7 +359,7 @@ def send_metrics(trs_workflow_id: str, trs_version: str, workflow_runs: list[Run
|
|
|
340
359
|
each workflow. Each workflow should have one entry containing all its
|
|
341
360
|
tasks. Does not have to be the same order/set of workflows as
|
|
342
361
|
workflow_runs.
|
|
343
|
-
|
|
362
|
+
|
|
344
363
|
:raises requests.HTTPError: if Dockstore does not accept the metrics.
|
|
345
364
|
"""
|
|
346
365
|
|
|
@@ -348,13 +367,13 @@ def send_metrics(trs_workflow_id: str, trs_version: str, workflow_runs: list[Run
|
|
|
348
367
|
to_post = {
|
|
349
368
|
"runExecutions": workflow_runs,
|
|
350
369
|
"taskExecutions": workflow_task_sets,
|
|
351
|
-
"validationExecutions": []
|
|
370
|
+
"validationExecutions": [],
|
|
352
371
|
}
|
|
353
372
|
|
|
354
373
|
# Set the submission query string metadata
|
|
355
374
|
submission_params = {
|
|
356
375
|
"platform": DOCKSTORE_PLATFORM,
|
|
357
|
-
"description": "Workflow status from Toil"
|
|
376
|
+
"description": "Workflow status from Toil",
|
|
358
377
|
}
|
|
359
378
|
|
|
360
379
|
# Set the headers. Even though user agent isn't in here, it still gets
|
|
@@ -371,17 +390,25 @@ def send_metrics(trs_workflow_id: str, trs_version: str, workflow_runs: list[Run
|
|
|
371
390
|
logger.debug("With headers: %s", headers)
|
|
372
391
|
|
|
373
392
|
try:
|
|
374
|
-
result = web_session.post(
|
|
393
|
+
result = web_session.post(
|
|
394
|
+
endpoint_url, params=submission_params, json=to_post, headers=headers
|
|
395
|
+
)
|
|
375
396
|
result.raise_for_status()
|
|
376
|
-
logger.debug(
|
|
397
|
+
logger.debug(
|
|
398
|
+
"Workflow metrics were accepted by Dockstore. Dockstore response code: %s",
|
|
399
|
+
result.status_code,
|
|
400
|
+
)
|
|
377
401
|
except requests.HTTPError as e:
|
|
378
|
-
logger.warning(
|
|
402
|
+
logger.warning(
|
|
403
|
+
"Workflow metrics were not accepted by Dockstore. Dockstore complained: %s",
|
|
404
|
+
e.response.text,
|
|
405
|
+
)
|
|
379
406
|
raise
|
|
380
407
|
|
|
408
|
+
|
|
381
409
|
def get_metrics_url(trs_workflow_id: str, trs_version: str, execution_id: str) -> str:
|
|
382
410
|
"""
|
|
383
411
|
Get the URL where a workflow metrics object (for a workflow, or for a set of tasks) can be fetched back from.
|
|
384
412
|
"""
|
|
385
413
|
|
|
386
414
|
return f"{TRS_ROOT}/api/api/ga4gh/v2/extended/{quote(trs_workflow_id, safe='')}/versions/{quote(trs_version, safe='')}/execution?platform={DOCKSTORE_PLATFORM}&executionId={quote(execution_id, safe='')}"
|
|
387
|
-
|
toil/lib/ec2.py
CHANGED
|
@@ -1,19 +1,12 @@
|
|
|
1
|
+
import binascii
|
|
1
2
|
import logging
|
|
2
3
|
import time
|
|
3
|
-
from base64 import
|
|
4
|
-
import
|
|
5
|
-
from
|
|
6
|
-
from typing import (
|
|
7
|
-
TYPE_CHECKING,
|
|
8
|
-
Any,
|
|
9
|
-
Callable,
|
|
10
|
-
Literal,
|
|
11
|
-
Optional,
|
|
12
|
-
Union,
|
|
13
|
-
)
|
|
4
|
+
from base64 import b64decode, b64encode
|
|
5
|
+
from collections.abc import Callable, Generator, Iterable, Mapping
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
14
7
|
|
|
15
8
|
from toil.lib.aws.session import establish_boto3_session
|
|
16
|
-
from toil.lib.aws.utils import
|
|
9
|
+
from toil.lib.aws.utils import boto3_pager, flatten_tags
|
|
17
10
|
from toil.lib.exceptions import panic
|
|
18
11
|
from toil.lib.retry import (
|
|
19
12
|
ErrorCondition,
|
|
@@ -37,15 +30,13 @@ a_short_time = 5
|
|
|
37
30
|
a_long_time = 60 * 60
|
|
38
31
|
logger = logging.getLogger(__name__)
|
|
39
32
|
|
|
33
|
+
|
|
40
34
|
def is_base64(value: str) -> bool:
|
|
41
35
|
"""
|
|
42
36
|
Return True if value is base64-decodeable, and False otherwise.
|
|
43
37
|
"""
|
|
44
38
|
try:
|
|
45
|
-
b64decode(
|
|
46
|
-
value.encode("utf-8"),
|
|
47
|
-
validate=True
|
|
48
|
-
)
|
|
39
|
+
b64decode(value.encode("utf-8"), validate=True)
|
|
49
40
|
return True
|
|
50
41
|
except binascii.Error:
|
|
51
42
|
return False
|
|
@@ -91,7 +82,9 @@ def retry_ec2(t=a_short_time, retry_for=10 * a_short_time, retry_while=not_found
|
|
|
91
82
|
class UnexpectedResourceState(Exception):
|
|
92
83
|
def __init__(self, resource, to_state, state):
|
|
93
84
|
super().__init__(
|
|
94
|
-
"Expected state of
|
|
85
|
+
"Expected state of {} to be '{}' but got '{}'".format(
|
|
86
|
+
resource, to_state, state
|
|
87
|
+
)
|
|
95
88
|
)
|
|
96
89
|
|
|
97
90
|
|
|
@@ -176,7 +169,7 @@ def wait_instances_running(
|
|
|
176
169
|
reservations = boto3_pager(
|
|
177
170
|
boto3_ec2.describe_instances,
|
|
178
171
|
"Reservations",
|
|
179
|
-
InstanceIds=list(pending_ids)
|
|
172
|
+
InstanceIds=list(pending_ids),
|
|
180
173
|
)
|
|
181
174
|
instances = [
|
|
182
175
|
instance
|
|
@@ -228,7 +221,9 @@ def wait_spot_requests_active(
|
|
|
228
221
|
batch = []
|
|
229
222
|
for r in requests:
|
|
230
223
|
r: "SpotInstanceRequestTypeDef" # pycharm thinks it is a string
|
|
231
|
-
assert isinstance(
|
|
224
|
+
assert isinstance(
|
|
225
|
+
r, dict
|
|
226
|
+
), f"Found garbage posing as a spot request: {r}"
|
|
232
227
|
if r["State"] == "open":
|
|
233
228
|
open_ids.add(r["SpotInstanceRequestId"])
|
|
234
229
|
if r["Status"]["Code"] == "pending-evaluation":
|
|
@@ -320,7 +315,9 @@ def create_spot_instances(
|
|
|
320
315
|
) # boto3 image id is in the launch specification
|
|
321
316
|
|
|
322
317
|
user_data = spec["LaunchSpecification"].get("UserData", "")
|
|
323
|
-
assert is_base64(
|
|
318
|
+
assert is_base64(
|
|
319
|
+
user_data
|
|
320
|
+
), f"Spot user data needs to be base64-encoded: {user_data}"
|
|
324
321
|
|
|
325
322
|
for attempt in retry_ec2(
|
|
326
323
|
retry_for=a_long_time, retry_while=inconsistencies_detected
|
|
@@ -375,7 +372,9 @@ def create_spot_instances(
|
|
|
375
372
|
page = boto3_ec2.describe_instances(InstanceIds=instance_ids)
|
|
376
373
|
while page.get("NextToken") is not None:
|
|
377
374
|
yield page
|
|
378
|
-
page = boto3_ec2.describe_instances(
|
|
375
|
+
page = boto3_ec2.describe_instances(
|
|
376
|
+
InstanceIds=instance_ids, NextToken=page["NextToken"]
|
|
377
|
+
)
|
|
379
378
|
yield page
|
|
380
379
|
if not num_active:
|
|
381
380
|
message = "None of the spot requests entered the active state"
|
|
@@ -410,7 +409,9 @@ def create_ondemand_instances(
|
|
|
410
409
|
user_data: str = spec.get("UserData", "")
|
|
411
410
|
if user_data:
|
|
412
411
|
# Hope any real user data contains some characters not allowed in base64
|
|
413
|
-
assert not is_base64(
|
|
412
|
+
assert not is_base64(
|
|
413
|
+
user_data
|
|
414
|
+
), f"On-demand user data needs to not be base64-encoded: {user_data}"
|
|
414
415
|
|
|
415
416
|
instance_type = spec["InstanceType"]
|
|
416
417
|
logger.info("Creating %s instance(s) ... ", instance_type)
|
|
@@ -485,13 +486,13 @@ def create_instances(
|
|
|
485
486
|
key_name: str,
|
|
486
487
|
instance_type: str,
|
|
487
488
|
num_instances: int = 1,
|
|
488
|
-
security_group_ids:
|
|
489
|
-
user_data:
|
|
490
|
-
block_device_map:
|
|
491
|
-
instance_profile_arn:
|
|
492
|
-
placement_az:
|
|
489
|
+
security_group_ids: list | None = None,
|
|
490
|
+
user_data: str | bytes | None = None,
|
|
491
|
+
block_device_map: list[dict] | None = None,
|
|
492
|
+
instance_profile_arn: str | None = None,
|
|
493
|
+
placement_az: str | None = None,
|
|
493
494
|
subnet_id: str = None,
|
|
494
|
-
tags:
|
|
495
|
+
tags: dict[str, str] | None = None,
|
|
495
496
|
) -> list["Instance"]:
|
|
496
497
|
"""
|
|
497
498
|
Replaces create_ondemand_instances. Uses boto3 and returns a list of Boto3 instance dicts.
|
|
@@ -553,13 +554,13 @@ def create_launch_template(
|
|
|
553
554
|
image_id: str,
|
|
554
555
|
key_name: str,
|
|
555
556
|
instance_type: str,
|
|
556
|
-
security_group_ids:
|
|
557
|
-
user_data:
|
|
558
|
-
block_device_map:
|
|
559
|
-
instance_profile_arn:
|
|
560
|
-
placement_az:
|
|
561
|
-
subnet_id:
|
|
562
|
-
tags:
|
|
557
|
+
security_group_ids: list | None = None,
|
|
558
|
+
user_data: str | bytes | None = None,
|
|
559
|
+
block_device_map: list[dict] | None = None,
|
|
560
|
+
instance_profile_arn: str | None = None,
|
|
561
|
+
placement_az: str | None = None,
|
|
562
|
+
subnet_id: str | None = None,
|
|
563
|
+
tags: dict[str, str] | None = None,
|
|
563
564
|
) -> str:
|
|
564
565
|
"""
|
|
565
566
|
Creates a launch template with the given name for launching instances with the given parameters.
|
|
@@ -640,10 +641,10 @@ def create_auto_scaling_group(
|
|
|
640
641
|
vpc_subnets: list[str],
|
|
641
642
|
min_size: int,
|
|
642
643
|
max_size: int,
|
|
643
|
-
instance_types:
|
|
644
|
-
spot_bid:
|
|
644
|
+
instance_types: Iterable[str] | None = None,
|
|
645
|
+
spot_bid: float | None = None,
|
|
645
646
|
spot_cheapest: bool = False,
|
|
646
|
-
tags:
|
|
647
|
+
tags: dict[str, str] | None = None,
|
|
647
648
|
) -> None:
|
|
648
649
|
"""
|
|
649
650
|
Create a new Auto Scaling Group with the given name (which is also its
|
toil/lib/ec2nodes.py
CHANGED
|
@@ -18,7 +18,7 @@ import os
|
|
|
18
18
|
import re
|
|
19
19
|
import shutil
|
|
20
20
|
import textwrap
|
|
21
|
-
from typing import Any
|
|
21
|
+
from typing import Any
|
|
22
22
|
|
|
23
23
|
import enlighten # type: ignore
|
|
24
24
|
|
|
@@ -130,7 +130,7 @@ def is_number(s: str) -> bool:
|
|
|
130
130
|
|
|
131
131
|
def parse_storage(
|
|
132
132
|
storage_info: str,
|
|
133
|
-
) ->
|
|
133
|
+
) -> list[int] | tuple[int | float, float]:
|
|
134
134
|
"""
|
|
135
135
|
Parses EC2 JSON storage param string into a number.
|
|
136
136
|
|
|
@@ -158,8 +158,15 @@ def parse_storage(
|
|
|
158
158
|
and specs[3] == "SSD"
|
|
159
159
|
):
|
|
160
160
|
return 1, float(specs[0].replace(",", ""))
|
|
161
|
-
elif
|
|
162
|
-
|
|
161
|
+
elif (
|
|
162
|
+
is_number(specs[0])
|
|
163
|
+
and specs[1].lower() == "x"
|
|
164
|
+
and is_number(specs[2][:-2])
|
|
165
|
+
and specs[2][-2:] == "GB"
|
|
166
|
+
):
|
|
167
|
+
return float(specs[0].replace(",", "")), float(
|
|
168
|
+
specs[2][:-2].replace(",", "")
|
|
169
|
+
)
|
|
163
170
|
else:
|
|
164
171
|
raise RuntimeError(
|
|
165
172
|
f"EC2 JSON format has likely changed. Error parsing disk specs : {storage_info.strip()}"
|
toil/lib/exceptions.py
CHANGED
|
@@ -14,11 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
# 5.14.2018: copied into Toil from https://github.com/BD2KGenomics/bd2k-python-lib
|
|
16
16
|
|
|
17
|
-
import sys
|
|
18
|
-
from typing import Optional
|
|
19
17
|
import logging
|
|
20
|
-
|
|
18
|
+
import sys
|
|
21
19
|
from types import TracebackType
|
|
20
|
+
from urllib.parse import ParseResult
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
# TODO: isn't this built in to Python 3 now?
|
|
@@ -42,7 +41,7 @@ class panic:
|
|
|
42
41
|
the primary exception will be reraised.
|
|
43
42
|
"""
|
|
44
43
|
|
|
45
|
-
def __init__(self, log:
|
|
44
|
+
def __init__(self, log: logging.Logger | None = None) -> None:
|
|
46
45
|
super().__init__()
|
|
47
46
|
self.log = log
|
|
48
47
|
self.exc_info = None
|
|
@@ -57,7 +56,11 @@ class panic:
|
|
|
57
56
|
raise_(exc_type, exc_value, traceback)
|
|
58
57
|
|
|
59
58
|
|
|
60
|
-
def raise_(
|
|
59
|
+
def raise_(
|
|
60
|
+
exc_type: type[BaseException] | None,
|
|
61
|
+
exc_value: BaseException | None,
|
|
62
|
+
traceback: TracebackType | None,
|
|
63
|
+
) -> None:
|
|
61
64
|
if exc_value is not None:
|
|
62
65
|
exc = exc_value
|
|
63
66
|
else:
|
toil/lib/ftp_utils.py
CHANGED
|
@@ -20,7 +20,7 @@ import logging
|
|
|
20
20
|
import netrc
|
|
21
21
|
import os
|
|
22
22
|
from contextlib import closing
|
|
23
|
-
from typing import
|
|
23
|
+
from typing import IO, Any, cast
|
|
24
24
|
from urllib.parse import urlparse
|
|
25
25
|
from urllib.request import urlopen
|
|
26
26
|
|
|
@@ -33,11 +33,10 @@ class FtpFsAccess:
|
|
|
33
33
|
|
|
34
34
|
Taken and modified from https://github.com/ohsu-comp-bio/cwl-tes/blob/03f0096f9fae8acd527687d3460a726e09190c3a/cwl_tes/ftp.py#L37-L251
|
|
35
35
|
"""
|
|
36
|
+
|
|
36
37
|
# TODO: Properly support FTP over SSL
|
|
37
38
|
|
|
38
|
-
def __init__(
|
|
39
|
-
self, cache: Optional[dict[Any, ftplib.FTP]] = None
|
|
40
|
-
):
|
|
39
|
+
def __init__(self, cache: dict[Any, ftplib.FTP] | None = None):
|
|
41
40
|
"""
|
|
42
41
|
FTP object to handle FTP connections. By default, connect over FTP with TLS.
|
|
43
42
|
|
|
@@ -107,14 +106,12 @@ class FtpFsAccess:
|
|
|
107
106
|
"""
|
|
108
107
|
if "r" in mode:
|
|
109
108
|
host, port, user, passwd, path = self._parse_url(fn)
|
|
110
|
-
handle = urlopen("ftp://{}:{}@{}:{}/{}"
|
|
109
|
+
handle = urlopen(f"ftp://{user}:{passwd}@{host}:{port}/{path}")
|
|
111
110
|
return cast(IO[bytes], closing(handle))
|
|
112
111
|
# TODO: support write mode
|
|
113
112
|
raise Exception("Write mode FTP not implemented")
|
|
114
113
|
|
|
115
|
-
def _parse_url(
|
|
116
|
-
self, url: str
|
|
117
|
-
) -> tuple[str, int, Optional[str], Optional[str], str]:
|
|
114
|
+
def _parse_url(self, url: str) -> tuple[str, int, str | None, str | None, str]:
|
|
118
115
|
"""
|
|
119
116
|
Parse an FTP url into hostname, username, password, and path
|
|
120
117
|
:param url:
|
|
@@ -147,7 +144,7 @@ class FtpFsAccess:
|
|
|
147
144
|
user = "anonymous"
|
|
148
145
|
return host, port, user, passwd, path
|
|
149
146
|
|
|
150
|
-
def _connect(self, url: str) ->
|
|
147
|
+
def _connect(self, url: str) -> ftplib.FTP | None:
|
|
151
148
|
"""
|
|
152
149
|
Connect to an FTP server. Handles authentication.
|
|
153
150
|
:param url: FTP url
|
|
@@ -177,9 +174,7 @@ class FtpFsAccess:
|
|
|
177
174
|
return ftp
|
|
178
175
|
return None
|
|
179
176
|
|
|
180
|
-
def _recall_credentials(
|
|
181
|
-
self, desired_host: str
|
|
182
|
-
) -> tuple[Optional[str], Optional[str]]:
|
|
177
|
+
def _recall_credentials(self, desired_host: str) -> tuple[str | None, str | None]:
|
|
183
178
|
"""
|
|
184
179
|
Grab the cached credentials
|
|
185
180
|
:param desired_host: FTP hostname
|
|
@@ -190,7 +185,7 @@ class FtpFsAccess:
|
|
|
190
185
|
return user, passwd
|
|
191
186
|
return None, None
|
|
192
187
|
|
|
193
|
-
def size(self, fn: str) ->
|
|
188
|
+
def size(self, fn: str) -> int | None:
|
|
194
189
|
"""
|
|
195
190
|
Get the size of an FTP object
|
|
196
191
|
:param fn: FTP url
|
|
@@ -207,7 +202,7 @@ class FtpFsAccess:
|
|
|
207
202
|
# https://stackoverflow.com/questions/22090001/get-folder-size-using-ftplib/22093848#22093848
|
|
208
203
|
ftp.voidcmd("TYPE I")
|
|
209
204
|
return ftp.size(path)
|
|
210
|
-
handle = urlopen("ftp://{}:{}@{}:{}/{}"
|
|
205
|
+
handle = urlopen(f"ftp://{user}:{passwd}@{host}:{port}/{path}")
|
|
211
206
|
info = handle.info()
|
|
212
207
|
handle.close()
|
|
213
208
|
if "Content-length" in info:
|