toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/lib/conversions.py
CHANGED
|
@@ -46,8 +46,10 @@ def convert_units(num: float,
|
|
|
46
46
|
src_unit: str,
|
|
47
47
|
dst_unit: str = 'B') -> float:
|
|
48
48
|
"""Returns a float representing the converted input in dst_units."""
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
if not src_unit.lower() in VALID_PREFIXES:
|
|
50
|
+
raise RuntimeError(f"{src_unit} not a valid unit, valid units are {VALID_PREFIXES}.")
|
|
51
|
+
if not dst_unit.lower() in VALID_PREFIXES:
|
|
52
|
+
raise RuntimeError(f"{dst_unit} not a valid unit, valid units are {VALID_PREFIXES}.")
|
|
51
53
|
return (num * bytes_in_unit(src_unit)) / bytes_in_unit(dst_unit)
|
|
52
54
|
|
|
53
55
|
|
|
@@ -60,7 +62,8 @@ def parse_memory_string(string: str) -> Tuple[float, str]:
|
|
|
60
62
|
# find the first character of the unit
|
|
61
63
|
if character not in '0123456789.-_ ':
|
|
62
64
|
units = string[i:].strip()
|
|
63
|
-
|
|
65
|
+
if not units.lower() in VALID_PREFIXES:
|
|
66
|
+
raise RuntimeError(f"{units} not a valid unit, valid units are {VALID_PREFIXES}.")
|
|
64
67
|
return float(string[:i]), units
|
|
65
68
|
return float(string), 'b'
|
|
66
69
|
|
|
@@ -71,6 +74,7 @@ def human2bytes(string: str) -> int:
|
|
|
71
74
|
integer number of bytes.
|
|
72
75
|
"""
|
|
73
76
|
value, unit = parse_memory_string(string)
|
|
77
|
+
|
|
74
78
|
return int(convert_units(value, src_unit=unit, dst_unit='b'))
|
|
75
79
|
|
|
76
80
|
|
|
@@ -124,3 +128,22 @@ def hms_duration_to_seconds(hms: str) -> float:
|
|
|
124
128
|
seconds += float(vals_to_convert[2])
|
|
125
129
|
|
|
126
130
|
return seconds
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def strtobool(val: str) -> bool:
|
|
134
|
+
"""
|
|
135
|
+
Make a human-readable string into a bool.
|
|
136
|
+
|
|
137
|
+
Convert a string along the lines of "y", "1", "ON", "TrUe", or
|
|
138
|
+
"Yes" to True, and the corresponding false-ish values to False.
|
|
139
|
+
"""
|
|
140
|
+
# We only track prefixes, so "y" covers "y", "yes",
|
|
141
|
+
# and "yeah no" and makes them all True.
|
|
142
|
+
TABLE = {True: ["1", "on", "y", "t"], False: ["0", "off", "n", "f"]}
|
|
143
|
+
lowered = val.lower()
|
|
144
|
+
for result, prefixes in TABLE.items():
|
|
145
|
+
for prefix in prefixes:
|
|
146
|
+
if lowered.startswith(prefix):
|
|
147
|
+
return result
|
|
148
|
+
raise ValueError(f"Cannot convert \"{val}\" to a bool")
|
|
149
|
+
|
toil/lib/docker.py
CHANGED
|
@@ -17,7 +17,7 @@ import os
|
|
|
17
17
|
import re
|
|
18
18
|
import struct
|
|
19
19
|
from shlex import quote
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import List, Optional
|
|
21
21
|
|
|
22
22
|
import requests
|
|
23
23
|
|
|
@@ -27,7 +27,6 @@ from docker.errors import (ContainerError,
|
|
|
27
27
|
NotFound,
|
|
28
28
|
create_api_error_from_http_exception)
|
|
29
29
|
from docker.utils.socket import consume_socket_output, demux_adaptor
|
|
30
|
-
|
|
31
30
|
from toil.lib.accelerators import get_host_accelerator_numbers
|
|
32
31
|
|
|
33
32
|
logger = logging.getLogger(__name__)
|
|
@@ -84,16 +83,17 @@ def apiDockerCall(job,
|
|
|
84
83
|
jobs, with the intention that failed/orphaned docker jobs be handled
|
|
85
84
|
appropriately.
|
|
86
85
|
|
|
87
|
-
Example of using dockerCall in toil to index a FASTA file with SAMtools
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
86
|
+
Example of using dockerCall in toil to index a FASTA file with SAMtools::
|
|
87
|
+
|
|
88
|
+
def toil_job(job):
|
|
89
|
+
working_dir = job.fileStore.getLocalTempDir()
|
|
90
|
+
path = job.fileStore.readGlobalFile(ref_id,
|
|
91
|
+
os.path.join(working_dir, 'ref.fasta')
|
|
92
|
+
parameters = ['faidx', path]
|
|
93
|
+
apiDockerCall(job,
|
|
94
|
+
image='quay.io/ucgc_cgl/samtools:latest',
|
|
95
|
+
working_dir=working_dir,
|
|
96
|
+
parameters=parameters)
|
|
97
97
|
|
|
98
98
|
Note that when run with detach=False, or with detach=True and stdout=True
|
|
99
99
|
or stderr=True, this is a blocking call. When run with detach=True and
|
|
@@ -103,13 +103,13 @@ def apiDockerCall(job,
|
|
|
103
103
|
:param toil.Job.job job: The Job instance for the calling function.
|
|
104
104
|
:param str image: Name of the Docker image to be used.
|
|
105
105
|
(e.g. 'quay.io/ucsc_cgl/samtools:latest')
|
|
106
|
-
:param list[str] parameters: A list of string elements.
|
|
106
|
+
:param list[str] parameters: A list of string elements. If there are
|
|
107
107
|
multiple elements, these will be joined with
|
|
108
|
-
spaces.
|
|
108
|
+
spaces. This handling of multiple elements
|
|
109
109
|
provides backwards compatibility with previous
|
|
110
110
|
versions which called docker using
|
|
111
111
|
subprocess.check_call().
|
|
112
|
-
|
|
112
|
+
If list of lists: list[list[str]], then treat
|
|
113
113
|
as successive commands chained with pipe.
|
|
114
114
|
:param str working_dir: The working directory.
|
|
115
115
|
:param int deferParam: Action to take on the container upon job completion.
|
|
@@ -225,8 +225,8 @@ def apiDockerCall(job,
|
|
|
225
225
|
working_dir = os.path.abspath(working_dir)
|
|
226
226
|
|
|
227
227
|
# Ensure the user has passed a valid value for deferParam
|
|
228
|
-
|
|
229
|
-
'Please provide a valid value for deferParam.'
|
|
228
|
+
if deferParam not in (None, FORGO, STOP, RM):
|
|
229
|
+
raise RuntimeError('Please provide a valid value for deferParam.')
|
|
230
230
|
|
|
231
231
|
client = docker.from_env(version='auto', timeout=timeout)
|
|
232
232
|
|
|
@@ -413,12 +413,11 @@ def containerIsRunning(container_name: str, timeout: int = 365 * 24 * 60 * 60):
|
|
|
413
413
|
|
|
414
414
|
:param container_name: Name of the container being checked.
|
|
415
415
|
:param int timeout: Use the given timeout in seconds for interactions with
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
essentially indefinitely).
|
|
416
|
+
the Docker daemon. Note that the underlying docker module is not always
|
|
417
|
+
able to abort ongoing reads and writes in order to respect the timeout.
|
|
418
|
+
Defaults to 1 year (i.e. wait essentially indefinitely).
|
|
420
419
|
:returns: True if status is 'running', False if status is anything else,
|
|
421
|
-
|
|
420
|
+
and None if the container does not exist.
|
|
422
421
|
"""
|
|
423
422
|
client = docker.from_env(version='auto', timeout=timeout)
|
|
424
423
|
try:
|
|
@@ -439,7 +438,7 @@ def containerIsRunning(container_name: str, timeout: int = 365 * 24 * 60 * 60):
|
|
|
439
438
|
def getContainerName(job):
|
|
440
439
|
"""
|
|
441
440
|
Create a random string including the job name, and return it. Name will
|
|
442
|
-
match [a-zA-Z0-9][a-zA-Z0-9_.-]
|
|
441
|
+
match ``[a-zA-Z0-9][a-zA-Z0-9_.-]``.
|
|
443
442
|
"""
|
|
444
443
|
parts = ['toil', str(job.description), base64.b64encode(os.urandom(9), b'-_').decode('utf-8')]
|
|
445
444
|
name = re.sub('[^a-zA-Z0-9_.-]', '', '--'.join(parts))
|
toil/lib/ec2.py
CHANGED
|
@@ -103,11 +103,13 @@ def wait_instances_running(ec2, instances: Iterable[Boto2Instance]) -> Iterable[
|
|
|
103
103
|
if i.state == 'pending':
|
|
104
104
|
pending_ids.add(i.id)
|
|
105
105
|
elif i.state == 'running':
|
|
106
|
-
|
|
106
|
+
if i.id in running_ids:
|
|
107
|
+
raise RuntimeError("An instance was already added to the list of running instance IDs. Maybe there is a duplicate.")
|
|
107
108
|
running_ids.add(i.id)
|
|
108
109
|
yield i
|
|
109
110
|
else:
|
|
110
|
-
|
|
111
|
+
if i.id in other_ids:
|
|
112
|
+
raise RuntimeError("An instance was already added to the list of other instances. Maybe there is a duplicate.")
|
|
111
113
|
other_ids.add(i.id)
|
|
112
114
|
yield i
|
|
113
115
|
logger.info('%i instance(s) pending, %i running, %i other.',
|
|
@@ -130,10 +132,10 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
130
132
|
:param requests: The requests to wait on.
|
|
131
133
|
|
|
132
134
|
:param timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
|
|
133
|
-
|
|
135
|
+
timeout occurs, the remaining open requests will be cancelled.
|
|
134
136
|
|
|
135
137
|
:param tentative: if True, give up on a spot request at the earliest indication of it
|
|
136
|
-
|
|
138
|
+
not being fulfilled immediately
|
|
137
139
|
|
|
138
140
|
"""
|
|
139
141
|
|
|
@@ -166,11 +168,13 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
|
|
|
166
168
|
'Request %s entered status %s indicating that it will not be '
|
|
167
169
|
'fulfilled anytime soon.', r.id, r.status.code)
|
|
168
170
|
elif r.state == 'active':
|
|
169
|
-
|
|
171
|
+
if r.id in active_ids:
|
|
172
|
+
raise RuntimeError("A request was already added to the list of active requests. Maybe there are duplicate requests.")
|
|
170
173
|
active_ids.add(r.id)
|
|
171
174
|
batch.append(r)
|
|
172
175
|
else:
|
|
173
|
-
|
|
176
|
+
if r.id in other_ids:
|
|
177
|
+
raise RuntimeError("A request was already added to the list of other IDs. Maybe there are duplicate requests.")
|
|
174
178
|
other_ids.add(r.id)
|
|
175
179
|
batch.append(r)
|
|
176
180
|
if batch:
|
toil/lib/ec2nodes.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2015-
|
|
1
|
+
# Copyright (C) 2015-2024 Regents of the University of California
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,12 +17,17 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import re
|
|
19
19
|
import textwrap
|
|
20
|
-
from typing import Any, Dict, List, Tuple, Union
|
|
21
|
-
|
|
22
20
|
import requests
|
|
21
|
+
import shutil
|
|
22
|
+
import enlighten # type: ignore
|
|
23
|
+
|
|
24
|
+
from typing import Dict, List, Tuple, Union, Any
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
logger = logging.getLogger(__name__)
|
|
28
|
+
manager = enlighten.get_manager()
|
|
25
29
|
dirname = os.path.dirname(__file__)
|
|
30
|
+
region_json_dirname = os.path.join(dirname, 'region_jsons')
|
|
26
31
|
|
|
27
32
|
|
|
28
33
|
EC2Regions = {'us-west-1': 'US West (N. California)',
|
|
@@ -83,7 +88,7 @@ class InstanceType:
|
|
|
83
88
|
return False
|
|
84
89
|
|
|
85
90
|
|
|
86
|
-
def
|
|
91
|
+
def is_number(s: str) -> bool:
|
|
87
92
|
"""
|
|
88
93
|
Determines if a unicode string (that may include commas) is a number.
|
|
89
94
|
|
|
@@ -105,7 +110,7 @@ def isNumber(s: str) -> bool:
|
|
|
105
110
|
return False
|
|
106
111
|
|
|
107
112
|
|
|
108
|
-
def
|
|
113
|
+
def parse_storage(storage_info: str) -> Union[List[int], Tuple[Union[int, float], float]]:
|
|
109
114
|
"""
|
|
110
115
|
Parses EC2 JSON storage param string into a number.
|
|
111
116
|
|
|
@@ -117,22 +122,22 @@ def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float],
|
|
|
117
122
|
"8 x 1.9 NVMe SSD"
|
|
118
123
|
"900 GB NVMe SSD"
|
|
119
124
|
|
|
120
|
-
:param str
|
|
125
|
+
:param str storage_info: EC2 JSON storage param string.
|
|
121
126
|
:return: Two floats representing: (# of disks), and (disk_capacity in GiB of each disk).
|
|
122
127
|
"""
|
|
123
|
-
if
|
|
128
|
+
if storage_info == "EBS only":
|
|
124
129
|
return [0, 0]
|
|
125
130
|
else:
|
|
126
|
-
specs =
|
|
127
|
-
if
|
|
131
|
+
specs = storage_info.strip().split()
|
|
132
|
+
if is_number(specs[0]) and specs[1] == 'x' and is_number(specs[2]):
|
|
128
133
|
return float(specs[0].replace(',', '')), float(specs[2].replace(',', ''))
|
|
129
|
-
elif
|
|
134
|
+
elif is_number(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
|
|
130
135
|
return 1, float(specs[0].replace(',', ''))
|
|
131
136
|
else:
|
|
132
137
|
raise RuntimeError('EC2 JSON format has likely changed. Error parsing disk specs.')
|
|
133
138
|
|
|
134
139
|
|
|
135
|
-
def
|
|
140
|
+
def parse_memory(mem_info: str) -> float:
|
|
136
141
|
"""
|
|
137
142
|
Returns EC2 'memory' string as a float.
|
|
138
143
|
|
|
@@ -140,18 +145,19 @@ def parseMemory(memAttribute: str) -> float:
|
|
|
140
145
|
Amazon loves to put commas in their numbers, so we have to accommodate that.
|
|
141
146
|
If the syntax ever changes, this will raise.
|
|
142
147
|
|
|
143
|
-
:param
|
|
148
|
+
:param mem_info: EC2 JSON memory param string.
|
|
144
149
|
:return: A float representing memory in GiB.
|
|
145
150
|
"""
|
|
146
|
-
mem =
|
|
151
|
+
mem = mem_info.replace(',', '').split()
|
|
147
152
|
if mem[1] == 'GiB':
|
|
148
153
|
return float(mem[0])
|
|
149
154
|
else:
|
|
150
155
|
raise RuntimeError('EC2 JSON format has likely changed. Error parsing memory.')
|
|
151
156
|
|
|
152
157
|
|
|
153
|
-
def
|
|
154
|
-
"""
|
|
158
|
+
def download_region_json(filename: str, region: str = 'us-east-1') -> None:
|
|
159
|
+
"""
|
|
160
|
+
Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
|
|
155
161
|
|
|
156
162
|
See: https://aws.amazon.com/blogs/aws/new-aws-price-list-api/
|
|
157
163
|
|
|
@@ -159,61 +165,45 @@ def fetchEC2Index(filename: str) -> None:
|
|
|
159
165
|
aws instance name (example: 't2.micro'), and the value is an
|
|
160
166
|
InstanceType object representing that aws instance name.
|
|
161
167
|
"""
|
|
162
|
-
|
|
168
|
+
response = requests.get(f'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/{region}/index.json', stream=True)
|
|
169
|
+
file_size = int(response.headers.get("content-length", 0))
|
|
170
|
+
print(f'Downloading ~{file_size / 1000000000}Gb {region} AWS billing file to: {filename}')
|
|
163
171
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
else:
|
|
170
|
-
raise RuntimeError('Error: ' + str(response) + ' :: ' + str(response.text))
|
|
172
|
+
with manager.counter(total=file_size, desc=os.path.basename(filename), unit='bytes', leave=False) as progress_bar:
|
|
173
|
+
with open(filename, "wb") as file:
|
|
174
|
+
for data in response.iter_content(1048576):
|
|
175
|
+
progress_bar.update(len(data))
|
|
176
|
+
file.write(data)
|
|
171
177
|
|
|
172
178
|
|
|
173
|
-
def
|
|
179
|
+
def reduce_region_json_size(filename:str) -> List[Dict[str, Any]]:
|
|
174
180
|
"""
|
|
175
|
-
|
|
181
|
+
Deletes information in the json file that we don't need, and rewrites it. This makes the file smaller.
|
|
176
182
|
|
|
177
|
-
:
|
|
178
|
-
|
|
183
|
+
The reason being: we used to download the unified AWS Bulk API JSON, which eventually crept up to 5.6Gb,
|
|
184
|
+
the loading of which could not be done on a 32Gb RAM machine. Now we download each region JSON individually
|
|
185
|
+
(with AWS's new Query API), but even those may eventually one day grow ridiculously large, so we do what we can to
|
|
186
|
+
keep the file sizes down (and thus also the amount loaded into memory) to keep this script working for longer.
|
|
179
187
|
"""
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
disks, disk_capacity = parseStorage(v["attributes"]["storage"])
|
|
200
|
-
|
|
201
|
-
# Determines whether the instance type is from an ARM or AMD family
|
|
202
|
-
# ARM instance names include a digit followed by a 'g' before the instance size
|
|
203
|
-
architecture = 'arm64' if re.search(r".*\dg.*\..*", i["instanceType"]) else 'amd64'
|
|
204
|
-
|
|
205
|
-
instance = InstanceType(name=i["instanceType"],
|
|
206
|
-
cores=i["vcpu"],
|
|
207
|
-
memory=parseMemory(i["memory"]),
|
|
208
|
-
disks=disks,
|
|
209
|
-
disk_capacity=disk_capacity,
|
|
210
|
-
architecture=architecture)
|
|
211
|
-
if instance in ec2InstanceList:
|
|
212
|
-
raise RuntimeError('EC2 JSON format has likely changed. '
|
|
213
|
-
'Duplicate instance {} found.'.format(instance))
|
|
214
|
-
ec2InstanceList.append(instance)
|
|
215
|
-
print('Finished for ' + str(region) + '. ' + str(len(ec2InstanceList)) + ' added.')
|
|
216
|
-
return {_.name: _ for _ in ec2InstanceList}
|
|
188
|
+
with open(filename, 'r') as f:
|
|
189
|
+
aws_products = json.loads(f.read())['products']
|
|
190
|
+
aws_product_list = list()
|
|
191
|
+
for k in list(aws_products.keys()):
|
|
192
|
+
ec2_attributes = aws_products[k]['attributes']
|
|
193
|
+
if (ec2_attributes.get('tenancy') == 'Shared' and
|
|
194
|
+
ec2_attributes.get('operatingSystem') == 'Linux' and
|
|
195
|
+
ec2_attributes.get('operation') == 'RunInstances' and
|
|
196
|
+
ec2_attributes.get('usagetype').endswith('BoxUsage:' + ec2_attributes['instanceType'])):
|
|
197
|
+
aws_product_list.append(dict(disk=ec2_attributes["storage"],
|
|
198
|
+
loc=ec2_attributes["location"],
|
|
199
|
+
name=ec2_attributes["instanceType"],
|
|
200
|
+
mem=ec2_attributes["memory"],
|
|
201
|
+
cpu=ec2_attributes["vcpu"]))
|
|
202
|
+
del aws_products[k]
|
|
203
|
+
del aws_products
|
|
204
|
+
with open(filename, 'w') as f:
|
|
205
|
+
f.write(json.dumps(dict(aws=aws_product_list), indent=2))
|
|
206
|
+
return aws_product_list
|
|
217
207
|
|
|
218
208
|
|
|
219
209
|
def updateStaticEC2Instances() -> None:
|
|
@@ -225,39 +215,58 @@ def updateStaticEC2Instances() -> None:
|
|
|
225
215
|
:return: Nothing. Writes a new 'generatedEC2Lists.py' file.
|
|
226
216
|
"""
|
|
227
217
|
print("Updating Toil's EC2 lists to the most current version from AWS's bulk API.\n"
|
|
228
|
-
"This may take a while, depending on your internet connection
|
|
218
|
+
"This may take a while, depending on your internet connection.\n")
|
|
229
219
|
|
|
230
|
-
|
|
231
|
-
|
|
220
|
+
original_aws_instance_list = os.path.join(dirname, 'generatedEC2Lists.py') # original
|
|
221
|
+
if not os.path.exists(original_aws_instance_list):
|
|
222
|
+
raise RuntimeError(f"Path {original_aws_instance_list} does not exist.")
|
|
232
223
|
# use a temporary file until all info is fetched
|
|
233
|
-
|
|
234
|
-
if os.path.exists(
|
|
235
|
-
os.remove(
|
|
224
|
+
updated_aws_instance_list = os.path.join(dirname, 'generatedEC2Lists_tmp.py') # temp
|
|
225
|
+
if os.path.exists(updated_aws_instance_list):
|
|
226
|
+
os.remove(updated_aws_instance_list)
|
|
236
227
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
# and debugging is faster with the file stored locally
|
|
240
|
-
awsJsonIndex = os.path.join(dirname, 'index.json')
|
|
241
|
-
|
|
242
|
-
if not os.path.exists(awsJsonIndex):
|
|
243
|
-
fetchEC2Index(filename=awsJsonIndex)
|
|
244
|
-
else:
|
|
245
|
-
print('Reusing previously downloaded json @: ' + awsJsonIndex)
|
|
246
|
-
|
|
247
|
-
with open(awsJsonIndex) as f:
|
|
248
|
-
awsProductDict = json.loads(f.read())
|
|
228
|
+
if not os.path.exists(region_json_dirname):
|
|
229
|
+
os.mkdir(region_json_dirname)
|
|
249
230
|
|
|
250
231
|
currentEC2List = []
|
|
251
232
|
instancesByRegion: Dict[str, List[str]] = {}
|
|
252
|
-
for
|
|
253
|
-
|
|
233
|
+
for region in EC2Regions.keys():
|
|
234
|
+
region_json = os.path.join(region_json_dirname, f'{region}.json')
|
|
235
|
+
|
|
236
|
+
if os.path.exists(region_json):
|
|
237
|
+
try:
|
|
238
|
+
with open(region_json, 'r') as f:
|
|
239
|
+
aws_products = json.loads(f.read())['aws']
|
|
240
|
+
print(f'Reusing previously downloaded json @: {region_json}')
|
|
241
|
+
except:
|
|
242
|
+
os.remove(region_json)
|
|
243
|
+
download_region_json(filename=region_json, region=region)
|
|
244
|
+
aws_products = reduce_region_json_size(filename=region_json)
|
|
245
|
+
else:
|
|
246
|
+
download_region_json(filename=region_json, region=region)
|
|
247
|
+
aws_products = reduce_region_json_size(filename=region_json)
|
|
248
|
+
|
|
249
|
+
ec2InstanceList = []
|
|
250
|
+
for i in aws_products:
|
|
251
|
+
disks, disk_capacity = parse_storage(i["disk"])
|
|
252
|
+
# Determines whether the instance type is from an ARM or AMD family
|
|
253
|
+
# ARM instance names include a digit followed by a 'g' before the instance size
|
|
254
|
+
architecture = 'arm64' if re.search(r".*\dg.*\..*", i["name"]) else 'amd64'
|
|
255
|
+
ec2InstanceList.append(InstanceType(name=i["name"],
|
|
256
|
+
cores=i["cpu"],
|
|
257
|
+
memory=parse_memory(i["mem"]),
|
|
258
|
+
disks=disks,
|
|
259
|
+
disk_capacity=disk_capacity,
|
|
260
|
+
architecture=architecture))
|
|
261
|
+
print('Finished for ' + str(region) + '. ' + str(len(ec2InstanceList)) + ' added.\n')
|
|
262
|
+
currentEC2Dict = {_.name: _ for _ in ec2InstanceList}
|
|
254
263
|
for instanceName, instanceTypeObj in currentEC2Dict.items():
|
|
255
264
|
if instanceTypeObj not in currentEC2List:
|
|
256
265
|
currentEC2List.append(instanceTypeObj)
|
|
257
|
-
instancesByRegion.setdefault(
|
|
266
|
+
instancesByRegion.setdefault(region, []).append(instanceName)
|
|
258
267
|
|
|
259
268
|
# write provenance note, copyright and imports
|
|
260
|
-
with open(
|
|
269
|
+
with open(updated_aws_instance_list, 'w') as f:
|
|
261
270
|
f.write(textwrap.dedent('''
|
|
262
271
|
# !!! AUTOGENERATED FILE !!!
|
|
263
272
|
# Update with: src/toil/utils/toilUpdateEC2Instances.py
|
|
@@ -278,16 +287,13 @@ def updateStaticEC2Instances() -> None:
|
|
|
278
287
|
from toil.lib.ec2nodes import InstanceType\n\n\n''').format(year=datetime.date.today().strftime("%Y"))[1:])
|
|
279
288
|
|
|
280
289
|
# write header of total EC2 instance type list
|
|
281
|
-
genString =
|
|
282
|
-
num=str(len(currentEC2List)), date=str(datetime.datetime.now()))
|
|
290
|
+
genString = f'# {len(currentEC2List)} Instance Types. Generated {datetime.datetime.now()}.\n'
|
|
283
291
|
genString = genString + "E2Instances = {\n"
|
|
284
292
|
sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name)
|
|
285
293
|
|
|
286
294
|
# write the list of all instances types
|
|
287
295
|
for i in sortedCurrentEC2List:
|
|
288
|
-
|
|
289
|
-
"\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity, architecture=i.architecture)
|
|
290
|
-
genString = genString + z
|
|
296
|
+
genString = genString + f" '{i.name}': InstanceType(name='{i.name}', cores={i.cores}, memory={i.memory}, disks={i.disks}, disk_capacity={i.disk_capacity}, architecture='{i.architecture}'),\n"
|
|
291
297
|
genString = genString + '}\n\n'
|
|
292
298
|
|
|
293
299
|
genString = genString + 'regionDict = {\n'
|
|
@@ -301,19 +307,19 @@ def updateStaticEC2Instances() -> None:
|
|
|
301
307
|
if genString.endswith(',\n'):
|
|
302
308
|
genString = genString[:-len(',\n')]
|
|
303
309
|
genString = genString + '}\n'
|
|
304
|
-
with open(
|
|
310
|
+
with open(updated_aws_instance_list, 'a+') as f:
|
|
305
311
|
f.write(genString)
|
|
306
312
|
|
|
307
313
|
# append key for fetching at the end
|
|
308
314
|
regionKey = '\nec2InstancesByRegion = {region: [E2Instances[i] for i in instances] for region, instances in regionDict.items()}\n'
|
|
309
315
|
|
|
310
|
-
with open(
|
|
316
|
+
with open(updated_aws_instance_list, 'a+') as f:
|
|
311
317
|
f.write(regionKey)
|
|
312
|
-
|
|
313
|
-
if os.path.exists(origFile):
|
|
314
|
-
os.remove(origFile)
|
|
318
|
+
|
|
315
319
|
# replace the instance list with a current list
|
|
316
|
-
os.rename(
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
+
os.rename(updated_aws_instance_list, original_aws_instance_list)
|
|
321
|
+
|
|
322
|
+
# delete the aws region json file directory
|
|
323
|
+
if os.path.exists(region_json_dirname):
|
|
324
|
+
print(f'Update Successful! Removing AWS Region JSON Files @: {region_json_dirname}')
|
|
325
|
+
shutil.rmtree(region_json_dirname)
|
toil/lib/encryption/_nacl.py
CHANGED
|
@@ -53,7 +53,8 @@ def encrypt(message: bytes, keyPath: str) -> bytes:
|
|
|
53
53
|
# of a collision is astronomically low. (This approach is
|
|
54
54
|
# recommended in the libsodium documentation.)
|
|
55
55
|
nonce = nacl.utils.random(SecretBox.NONCE_SIZE)
|
|
56
|
-
|
|
56
|
+
if len(nonce) != SecretBox.NONCE_SIZE:
|
|
57
|
+
raise RuntimeError("Generated nonce is the wrong size.")
|
|
57
58
|
return bytes(sb.encrypt(message, nonce))
|
|
58
59
|
|
|
59
60
|
|