toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +39 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/htcondor.py +0 -1
  8. toil/batchSystems/kubernetes.py +34 -31
  9. toil/batchSystems/local_support.py +3 -1
  10. toil/batchSystems/lsf.py +7 -7
  11. toil/batchSystems/mesos/batchSystem.py +7 -7
  12. toil/batchSystems/options.py +32 -83
  13. toil/batchSystems/registry.py +104 -23
  14. toil/batchSystems/singleMachine.py +16 -13
  15. toil/batchSystems/slurm.py +87 -16
  16. toil/batchSystems/torque.py +0 -1
  17. toil/bus.py +44 -8
  18. toil/common.py +544 -753
  19. toil/cwl/__init__.py +28 -32
  20. toil/cwl/cwltoil.py +595 -574
  21. toil/cwl/utils.py +55 -10
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/__init__.py +2 -2
  24. toil/fileStores/abstractFileStore.py +88 -14
  25. toil/fileStores/cachingFileStore.py +610 -549
  26. toil/fileStores/nonCachingFileStore.py +46 -22
  27. toil/job.py +182 -101
  28. toil/jobStores/abstractJobStore.py +161 -95
  29. toil/jobStores/aws/jobStore.py +23 -9
  30. toil/jobStores/aws/utils.py +6 -6
  31. toil/jobStores/fileJobStore.py +116 -18
  32. toil/jobStores/googleJobStore.py +16 -7
  33. toil/jobStores/utils.py +5 -6
  34. toil/leader.py +87 -56
  35. toil/lib/accelerators.py +10 -5
  36. toil/lib/aws/__init__.py +3 -14
  37. toil/lib/aws/ami.py +22 -9
  38. toil/lib/aws/iam.py +21 -13
  39. toil/lib/aws/session.py +2 -16
  40. toil/lib/aws/utils.py +4 -5
  41. toil/lib/compatibility.py +1 -1
  42. toil/lib/conversions.py +26 -3
  43. toil/lib/docker.py +22 -23
  44. toil/lib/ec2.py +10 -6
  45. toil/lib/ec2nodes.py +106 -100
  46. toil/lib/encryption/_nacl.py +2 -1
  47. toil/lib/generatedEC2Lists.py +325 -18
  48. toil/lib/io.py +49 -2
  49. toil/lib/misc.py +1 -1
  50. toil/lib/resources.py +9 -2
  51. toil/lib/threading.py +101 -38
  52. toil/options/common.py +736 -0
  53. toil/options/cwl.py +336 -0
  54. toil/options/wdl.py +37 -0
  55. toil/provisioners/abstractProvisioner.py +9 -4
  56. toil/provisioners/aws/__init__.py +3 -6
  57. toil/provisioners/aws/awsProvisioner.py +6 -0
  58. toil/provisioners/clusterScaler.py +3 -2
  59. toil/provisioners/gceProvisioner.py +2 -2
  60. toil/realtimeLogger.py +2 -1
  61. toil/resource.py +24 -18
  62. toil/server/app.py +2 -3
  63. toil/server/cli/wes_cwl_runner.py +4 -4
  64. toil/server/utils.py +1 -1
  65. toil/server/wes/abstract_backend.py +3 -2
  66. toil/server/wes/amazon_wes_utils.py +5 -4
  67. toil/server/wes/tasks.py +2 -3
  68. toil/server/wes/toil_backend.py +2 -10
  69. toil/server/wsgi_app.py +2 -0
  70. toil/serviceManager.py +12 -10
  71. toil/statsAndLogging.py +41 -9
  72. toil/test/__init__.py +29 -54
  73. toil/test/batchSystems/batchSystemTest.py +11 -111
  74. toil/test/batchSystems/test_slurm.py +24 -8
  75. toil/test/cactus/__init__.py +0 -0
  76. toil/test/cactus/test_cactus_integration.py +58 -0
  77. toil/test/cwl/cwlTest.py +438 -223
  78. toil/test/cwl/glob_dir.cwl +15 -0
  79. toil/test/cwl/preemptible.cwl +21 -0
  80. toil/test/cwl/preemptible_expression.cwl +28 -0
  81. toil/test/cwl/revsort.cwl +1 -1
  82. toil/test/cwl/revsort2.cwl +1 -1
  83. toil/test/docs/scriptsTest.py +2 -3
  84. toil/test/jobStores/jobStoreTest.py +34 -21
  85. toil/test/lib/aws/test_iam.py +4 -14
  86. toil/test/lib/aws/test_utils.py +0 -3
  87. toil/test/lib/dockerTest.py +4 -4
  88. toil/test/lib/test_ec2.py +12 -17
  89. toil/test/mesos/helloWorld.py +4 -5
  90. toil/test/mesos/stress.py +1 -1
  91. toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
  92. toil/test/options/options.py +37 -0
  93. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  94. toil/test/provisioners/clusterScalerTest.py +6 -4
  95. toil/test/provisioners/clusterTest.py +23 -11
  96. toil/test/provisioners/gceProvisionerTest.py +0 -6
  97. toil/test/provisioners/restartScript.py +3 -2
  98. toil/test/server/serverTest.py +1 -1
  99. toil/test/sort/restart_sort.py +2 -1
  100. toil/test/sort/sort.py +2 -1
  101. toil/test/sort/sortTest.py +2 -13
  102. toil/test/src/autoDeploymentTest.py +45 -45
  103. toil/test/src/busTest.py +5 -5
  104. toil/test/src/checkpointTest.py +2 -2
  105. toil/test/src/deferredFunctionTest.py +1 -1
  106. toil/test/src/fileStoreTest.py +32 -16
  107. toil/test/src/helloWorldTest.py +1 -1
  108. toil/test/src/importExportFileTest.py +1 -1
  109. toil/test/src/jobDescriptionTest.py +2 -1
  110. toil/test/src/jobServiceTest.py +1 -1
  111. toil/test/src/jobTest.py +18 -18
  112. toil/test/src/miscTests.py +5 -3
  113. toil/test/src/promisedRequirementTest.py +3 -3
  114. toil/test/src/realtimeLoggerTest.py +1 -1
  115. toil/test/src/resourceTest.py +2 -2
  116. toil/test/src/restartDAGTest.py +1 -1
  117. toil/test/src/resumabilityTest.py +36 -2
  118. toil/test/src/retainTempDirTest.py +1 -1
  119. toil/test/src/systemTest.py +2 -2
  120. toil/test/src/toilContextManagerTest.py +2 -2
  121. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  122. toil/test/utils/toilDebugTest.py +98 -32
  123. toil/test/utils/toilKillTest.py +2 -2
  124. toil/test/utils/utilsTest.py +23 -3
  125. toil/test/wdl/wdltoil_test.py +223 -45
  126. toil/toilState.py +7 -6
  127. toil/utils/toilClean.py +1 -1
  128. toil/utils/toilConfig.py +36 -0
  129. toil/utils/toilDebugFile.py +60 -33
  130. toil/utils/toilDebugJob.py +39 -12
  131. toil/utils/toilDestroyCluster.py +1 -1
  132. toil/utils/toilKill.py +1 -1
  133. toil/utils/toilLaunchCluster.py +13 -2
  134. toil/utils/toilMain.py +3 -2
  135. toil/utils/toilRsyncCluster.py +1 -1
  136. toil/utils/toilSshCluster.py +1 -1
  137. toil/utils/toilStats.py +445 -305
  138. toil/utils/toilStatus.py +2 -5
  139. toil/version.py +10 -10
  140. toil/wdl/utils.py +2 -122
  141. toil/wdl/wdltoil.py +1257 -492
  142. toil/worker.py +55 -46
  143. toil-6.1.0.dist-info/METADATA +124 -0
  144. toil-6.1.0.dist-info/RECORD +241 -0
  145. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
  146. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
  147. toil/batchSystems/parasol.py +0 -379
  148. toil/batchSystems/tes.py +0 -459
  149. toil/test/batchSystems/parasolTestSupport.py +0 -117
  150. toil/test/wdl/builtinTest.py +0 -506
  151. toil/test/wdl/toilwdlTest.py +0 -522
  152. toil/wdl/toilwdl.py +0 -141
  153. toil/wdl/versions/dev.py +0 -107
  154. toil/wdl/versions/draft2.py +0 -980
  155. toil/wdl/versions/v1.py +0 -794
  156. toil/wdl/wdl_analysis.py +0 -116
  157. toil/wdl/wdl_functions.py +0 -997
  158. toil/wdl/wdl_synthesis.py +0 -1011
  159. toil/wdl/wdl_types.py +0 -243
  160. toil-5.12.0.dist-info/METADATA +0 -118
  161. toil-5.12.0.dist-info/RECORD +0 -244
  162. /toil/{wdl/versions → options}/__init__.py +0 -0
  163. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  164. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/lib/conversions.py CHANGED
@@ -46,8 +46,10 @@ def convert_units(num: float,
46
46
  src_unit: str,
47
47
  dst_unit: str = 'B') -> float:
48
48
  """Returns a float representing the converted input in dst_units."""
49
- assert src_unit.lower() in VALID_PREFIXES, f"{src_unit} not a valid unit, valid units are {VALID_PREFIXES}."
50
- assert dst_unit.lower() in VALID_PREFIXES, f"{dst_unit} not a valid unit, valid units are {VALID_PREFIXES}."
49
+ if not src_unit.lower() in VALID_PREFIXES:
50
+ raise RuntimeError(f"{src_unit} not a valid unit, valid units are {VALID_PREFIXES}.")
51
+ if not dst_unit.lower() in VALID_PREFIXES:
52
+ raise RuntimeError(f"{dst_unit} not a valid unit, valid units are {VALID_PREFIXES}.")
51
53
  return (num * bytes_in_unit(src_unit)) / bytes_in_unit(dst_unit)
52
54
 
53
55
 
@@ -60,7 +62,8 @@ def parse_memory_string(string: str) -> Tuple[float, str]:
60
62
  # find the first character of the unit
61
63
  if character not in '0123456789.-_ ':
62
64
  units = string[i:].strip()
63
- assert units.lower() in VALID_PREFIXES, f"{units} not a valid unit, valid units are {VALID_PREFIXES}."
65
+ if not units.lower() in VALID_PREFIXES:
66
+ raise RuntimeError(f"{units} not a valid unit, valid units are {VALID_PREFIXES}.")
64
67
  return float(string[:i]), units
65
68
  return float(string), 'b'
66
69
 
@@ -71,6 +74,7 @@ def human2bytes(string: str) -> int:
71
74
  integer number of bytes.
72
75
  """
73
76
  value, unit = parse_memory_string(string)
77
+
74
78
  return int(convert_units(value, src_unit=unit, dst_unit='b'))
75
79
 
76
80
 
@@ -124,3 +128,22 @@ def hms_duration_to_seconds(hms: str) -> float:
124
128
  seconds += float(vals_to_convert[2])
125
129
 
126
130
  return seconds
131
+
132
+
133
+ def strtobool(val: str) -> bool:
134
+ """
135
+ Make a human-readable string into a bool.
136
+
137
+ Convert a string along the lines of "y", "1", "ON", "TrUe", or
138
+ "Yes" to True, and the corresponding false-ish values to False.
139
+ """
140
+ # We only track prefixes, so "y" covers "y", "yes",
141
+ # and "yeah no" and makes them all True.
142
+ TABLE = {True: ["1", "on", "y", "t"], False: ["0", "off", "n", "f"]}
143
+ lowered = val.lower()
144
+ for result, prefixes in TABLE.items():
145
+ for prefix in prefixes:
146
+ if lowered.startswith(prefix):
147
+ return result
148
+ raise ValueError(f"Cannot convert \"{val}\" to a bool")
149
+
toil/lib/docker.py CHANGED
@@ -17,7 +17,7 @@ import os
17
17
  import re
18
18
  import struct
19
19
  from shlex import quote
20
- from typing import Optional, List
20
+ from typing import List, Optional
21
21
 
22
22
  import requests
23
23
 
@@ -27,7 +27,6 @@ from docker.errors import (ContainerError,
27
27
  NotFound,
28
28
  create_api_error_from_http_exception)
29
29
  from docker.utils.socket import consume_socket_output, demux_adaptor
30
-
31
30
  from toil.lib.accelerators import get_host_accelerator_numbers
32
31
 
33
32
  logger = logging.getLogger(__name__)
@@ -84,16 +83,17 @@ def apiDockerCall(job,
84
83
  jobs, with the intention that failed/orphaned docker jobs be handled
85
84
  appropriately.
86
85
 
87
- Example of using dockerCall in toil to index a FASTA file with SAMtools:
88
- def toil_job(job):
89
- working_dir = job.fileStore.getLocalTempDir()
90
- path = job.fileStore.readGlobalFile(ref_id,
91
- os.path.join(working_dir, 'ref.fasta')
92
- parameters = ['faidx', path]
93
- apiDockerCall(job,
94
- image='quay.io/ucgc_cgl/samtools:latest',
95
- working_dir=working_dir,
96
- parameters=parameters)
86
+ Example of using dockerCall in toil to index a FASTA file with SAMtools::
87
+
88
+ def toil_job(job):
89
+ working_dir = job.fileStore.getLocalTempDir()
90
+ path = job.fileStore.readGlobalFile(ref_id,
91
+ os.path.join(working_dir, 'ref.fasta')
92
+ parameters = ['faidx', path]
93
+ apiDockerCall(job,
94
+ image='quay.io/ucgc_cgl/samtools:latest',
95
+ working_dir=working_dir,
96
+ parameters=parameters)
97
97
 
98
98
  Note that when run with detach=False, or with detach=True and stdout=True
99
99
  or stderr=True, this is a blocking call. When run with detach=True and
@@ -103,13 +103,13 @@ def apiDockerCall(job,
103
103
  :param toil.Job.job job: The Job instance for the calling function.
104
104
  :param str image: Name of the Docker image to be used.
105
105
  (e.g. 'quay.io/ucsc_cgl/samtools:latest')
106
- :param list[str] parameters: A list of string elements. If there are
106
+ :param list[str] parameters: A list of string elements. If there are
107
107
  multiple elements, these will be joined with
108
- spaces. This handling of multiple elements
108
+ spaces. This handling of multiple elements
109
109
  provides backwards compatibility with previous
110
110
  versions which called docker using
111
111
  subprocess.check_call().
112
- **If list of lists: list[list[str]], then treat
112
+ If list of lists: list[list[str]], then treat
113
113
  as successive commands chained with pipe.
114
114
  :param str working_dir: The working directory.
115
115
  :param int deferParam: Action to take on the container upon job completion.
@@ -225,8 +225,8 @@ def apiDockerCall(job,
225
225
  working_dir = os.path.abspath(working_dir)
226
226
 
227
227
  # Ensure the user has passed a valid value for deferParam
228
- assert deferParam in (None, FORGO, STOP, RM), \
229
- 'Please provide a valid value for deferParam.'
228
+ if deferParam not in (None, FORGO, STOP, RM):
229
+ raise RuntimeError('Please provide a valid value for deferParam.')
230
230
 
231
231
  client = docker.from_env(version='auto', timeout=timeout)
232
232
 
@@ -413,12 +413,11 @@ def containerIsRunning(container_name: str, timeout: int = 365 * 24 * 60 * 60):
413
413
 
414
414
  :param container_name: Name of the container being checked.
415
415
  :param int timeout: Use the given timeout in seconds for interactions with
416
- the Docker daemon. Note that the underlying docker module is
417
- not always able to abort ongoing reads and writes in order
418
- to respect the timeout. Defaults to 1 year (i.e. wait
419
- essentially indefinitely).
416
+ the Docker daemon. Note that the underlying docker module is not always
417
+ able to abort ongoing reads and writes in order to respect the timeout.
418
+ Defaults to 1 year (i.e. wait essentially indefinitely).
420
419
  :returns: True if status is 'running', False if status is anything else,
421
- and None if the container does not exist.
420
+ and None if the container does not exist.
422
421
  """
423
422
  client = docker.from_env(version='auto', timeout=timeout)
424
423
  try:
@@ -439,7 +438,7 @@ def containerIsRunning(container_name: str, timeout: int = 365 * 24 * 60 * 60):
439
438
  def getContainerName(job):
440
439
  """
441
440
  Create a random string including the job name, and return it. Name will
442
- match [a-zA-Z0-9][a-zA-Z0-9_.-]
441
+ match ``[a-zA-Z0-9][a-zA-Z0-9_.-]``.
443
442
  """
444
443
  parts = ['toil', str(job.description), base64.b64encode(os.urandom(9), b'-_').decode('utf-8')]
445
444
  name = re.sub('[^a-zA-Z0-9_.-]', '', '--'.join(parts))
toil/lib/ec2.py CHANGED
@@ -103,11 +103,13 @@ def wait_instances_running(ec2, instances: Iterable[Boto2Instance]) -> Iterable[
103
103
  if i.state == 'pending':
104
104
  pending_ids.add(i.id)
105
105
  elif i.state == 'running':
106
- assert i.id not in running_ids
106
+ if i.id in running_ids:
107
+ raise RuntimeError("An instance was already added to the list of running instance IDs. Maybe there is a duplicate.")
107
108
  running_ids.add(i.id)
108
109
  yield i
109
110
  else:
110
- assert i.id not in other_ids
111
+ if i.id in other_ids:
112
+ raise RuntimeError("An instance was already added to the list of other instances. Maybe there is a duplicate.")
111
113
  other_ids.add(i.id)
112
114
  yield i
113
115
  logger.info('%i instance(s) pending, %i running, %i other.',
@@ -130,10 +132,10 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
130
132
  :param requests: The requests to wait on.
131
133
 
132
134
  :param timeout: Maximum time in seconds to spend waiting or None to wait forever. If a
133
- timeout occurs, the remaining open requests will be cancelled.
135
+ timeout occurs, the remaining open requests will be cancelled.
134
136
 
135
137
  :param tentative: if True, give up on a spot request at the earliest indication of it
136
- not being fulfilled immediately
138
+ not being fulfilled immediately
137
139
 
138
140
  """
139
141
 
@@ -166,11 +168,13 @@ def wait_spot_requests_active(ec2, requests: Iterable[SpotInstanceRequest], time
166
168
  'Request %s entered status %s indicating that it will not be '
167
169
  'fulfilled anytime soon.', r.id, r.status.code)
168
170
  elif r.state == 'active':
169
- assert r.id not in active_ids
171
+ if r.id in active_ids:
172
+ raise RuntimeError("A request was already added to the list of active requests. Maybe there are duplicate requests.")
170
173
  active_ids.add(r.id)
171
174
  batch.append(r)
172
175
  else:
173
- assert r.id not in other_ids
176
+ if r.id in other_ids:
177
+ raise RuntimeError("A request was already added to the list of other IDs. Maybe there are duplicate requests.")
174
178
  other_ids.add(r.id)
175
179
  batch.append(r)
176
180
  if batch:
toil/lib/ec2nodes.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2015-2021 Regents of the University of California
1
+ # Copyright (C) 2015-2024 Regents of the University of California
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -17,12 +17,17 @@ import logging
17
17
  import os
18
18
  import re
19
19
  import textwrap
20
- from typing import Any, Dict, List, Tuple, Union
21
-
22
20
  import requests
21
+ import shutil
22
+ import enlighten # type: ignore
23
+
24
+ from typing import Dict, List, Tuple, Union, Any
25
+
23
26
 
24
27
  logger = logging.getLogger(__name__)
28
+ manager = enlighten.get_manager()
25
29
  dirname = os.path.dirname(__file__)
30
+ region_json_dirname = os.path.join(dirname, 'region_jsons')
26
31
 
27
32
 
28
33
  EC2Regions = {'us-west-1': 'US West (N. California)',
@@ -83,7 +88,7 @@ class InstanceType:
83
88
  return False
84
89
 
85
90
 
86
- def isNumber(s: str) -> bool:
91
+ def is_number(s: str) -> bool:
87
92
  """
88
93
  Determines if a unicode string (that may include commas) is a number.
89
94
 
@@ -105,7 +110,7 @@ def isNumber(s: str) -> bool:
105
110
  return False
106
111
 
107
112
 
108
- def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float], float]]:
113
+ def parse_storage(storage_info: str) -> Union[List[int], Tuple[Union[int, float], float]]:
109
114
  """
110
115
  Parses EC2 JSON storage param string into a number.
111
116
 
@@ -117,22 +122,22 @@ def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float],
117
122
  "8 x 1.9 NVMe SSD"
118
123
  "900 GB NVMe SSD"
119
124
 
120
- :param str storageData: EC2 JSON storage param string.
125
+ :param str storage_info: EC2 JSON storage param string.
121
126
  :return: Two floats representing: (# of disks), and (disk_capacity in GiB of each disk).
122
127
  """
123
- if storageData == "EBS only":
128
+ if storage_info == "EBS only":
124
129
  return [0, 0]
125
130
  else:
126
- specs = storageData.strip().split()
127
- if isNumber(specs[0]) and specs[1] == 'x' and isNumber(specs[2]):
131
+ specs = storage_info.strip().split()
132
+ if is_number(specs[0]) and specs[1] == 'x' and is_number(specs[2]):
128
133
  return float(specs[0].replace(',', '')), float(specs[2].replace(',', ''))
129
- elif isNumber(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
134
+ elif is_number(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
130
135
  return 1, float(specs[0].replace(',', ''))
131
136
  else:
132
137
  raise RuntimeError('EC2 JSON format has likely changed. Error parsing disk specs.')
133
138
 
134
139
 
135
- def parseMemory(memAttribute: str) -> float:
140
+ def parse_memory(mem_info: str) -> float:
136
141
  """
137
142
  Returns EC2 'memory' string as a float.
138
143
 
@@ -140,18 +145,19 @@ def parseMemory(memAttribute: str) -> float:
140
145
  Amazon loves to put commas in their numbers, so we have to accommodate that.
141
146
  If the syntax ever changes, this will raise.
142
147
 
143
- :param memAttribute: EC2 JSON memory param string.
148
+ :param mem_info: EC2 JSON memory param string.
144
149
  :return: A float representing memory in GiB.
145
150
  """
146
- mem = memAttribute.replace(',', '').split()
151
+ mem = mem_info.replace(',', '').split()
147
152
  if mem[1] == 'GiB':
148
153
  return float(mem[0])
149
154
  else:
150
155
  raise RuntimeError('EC2 JSON format has likely changed. Error parsing memory.')
151
156
 
152
157
 
153
- def fetchEC2Index(filename: str) -> None:
154
- """Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
158
+ def download_region_json(filename: str, region: str = 'us-east-1') -> None:
159
+ """
160
+ Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
155
161
 
156
162
  See: https://aws.amazon.com/blogs/aws/new-aws-price-list-api/
157
163
 
@@ -159,61 +165,45 @@ def fetchEC2Index(filename: str) -> None:
159
165
  aws instance name (example: 't2.micro'), and the value is an
160
166
  InstanceType object representing that aws instance name.
161
167
  """
162
- print('Downloading ~1Gb AWS billing file to parse for information.\n')
168
+ response = requests.get(f'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/{region}/index.json', stream=True)
169
+ file_size = int(response.headers.get("content-length", 0))
170
+ print(f'Downloading ~{file_size / 1000000000}Gb {region} AWS billing file to: {filename}')
163
171
 
164
- response = requests.get('https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json')
165
- if response.ok:
166
- with open(filename, 'w') as f:
167
- f.write(str(json.dumps(json.loads(response.text), indent=4)))
168
- print('Download completed successfully!\n')
169
- else:
170
- raise RuntimeError('Error: ' + str(response) + ' :: ' + str(response.text))
172
+ with manager.counter(total=file_size, desc=os.path.basename(filename), unit='bytes', leave=False) as progress_bar:
173
+ with open(filename, "wb") as file:
174
+ for data in response.iter_content(1048576):
175
+ progress_bar.update(len(data))
176
+ file.write(data)
171
177
 
172
178
 
173
- def fetchEC2InstanceDict(awsBillingJson: Dict[str, Any], region: str) -> Dict[str, InstanceType]:
179
+ def reduce_region_json_size(filename:str) -> List[Dict[str, Any]]:
174
180
  """
175
- Takes a JSON and returns a list of InstanceType objects representing EC2 instance params.
181
+ Deletes information in the json file that we don't need, and rewrites it. This makes the file smaller.
176
182
 
177
- :param region:
178
- :return:
183
+ The reason being: we used to download the unified AWS Bulk API JSON, which eventually crept up to 5.6Gb,
184
+ the loading of which could not be done on a 32Gb RAM machine. Now we download each region JSON individually
185
+ (with AWS's new Query API), but even those may eventually one day grow ridiculously large, so we do what we can to
186
+ keep the file sizes down (and thus also the amount loaded into memory) to keep this script working for longer.
179
187
  """
180
- ec2InstanceList = []
181
- for k, v in awsBillingJson['products'].items():
182
- i = v['attributes']
183
- # NOTES:
184
- #
185
- # 3 tenant types: 'Host' (always $0.00; just a template?)
186
- # 'Dedicated' (toil does not support; these are pricier)
187
- # 'Shared' (AWS default and what toil uses)
188
- #
189
- # The same instance can appear with multiple "operation" values;
190
- # "RunInstances" is normal
191
- # "RunInstances:<code>" is e.g. Linux with MS SQL Server installed.
192
- if (i.get('location') == region and
193
- i.get('tenancy') == 'Shared' and
194
- i.get('operatingSystem') == 'Linux' and
195
- i.get('operation') == 'RunInstances'):
196
-
197
- normal_use = i.get('usagetype').endswith('BoxUsage:' + i['instanceType']) # not reserved or unused
198
- if normal_use:
199
- disks, disk_capacity = parseStorage(v["attributes"]["storage"])
200
-
201
- # Determines whether the instance type is from an ARM or AMD family
202
- # ARM instance names include a digit followed by a 'g' before the instance size
203
- architecture = 'arm64' if re.search(r".*\dg.*\..*", i["instanceType"]) else 'amd64'
204
-
205
- instance = InstanceType(name=i["instanceType"],
206
- cores=i["vcpu"],
207
- memory=parseMemory(i["memory"]),
208
- disks=disks,
209
- disk_capacity=disk_capacity,
210
- architecture=architecture)
211
- if instance in ec2InstanceList:
212
- raise RuntimeError('EC2 JSON format has likely changed. '
213
- 'Duplicate instance {} found.'.format(instance))
214
- ec2InstanceList.append(instance)
215
- print('Finished for ' + str(region) + '. ' + str(len(ec2InstanceList)) + ' added.')
216
- return {_.name: _ for _ in ec2InstanceList}
188
+ with open(filename, 'r') as f:
189
+ aws_products = json.loads(f.read())['products']
190
+ aws_product_list = list()
191
+ for k in list(aws_products.keys()):
192
+ ec2_attributes = aws_products[k]['attributes']
193
+ if (ec2_attributes.get('tenancy') == 'Shared' and
194
+ ec2_attributes.get('operatingSystem') == 'Linux' and
195
+ ec2_attributes.get('operation') == 'RunInstances' and
196
+ ec2_attributes.get('usagetype').endswith('BoxUsage:' + ec2_attributes['instanceType'])):
197
+ aws_product_list.append(dict(disk=ec2_attributes["storage"],
198
+ loc=ec2_attributes["location"],
199
+ name=ec2_attributes["instanceType"],
200
+ mem=ec2_attributes["memory"],
201
+ cpu=ec2_attributes["vcpu"]))
202
+ del aws_products[k]
203
+ del aws_products
204
+ with open(filename, 'w') as f:
205
+ f.write(json.dumps(dict(aws=aws_product_list), indent=2))
206
+ return aws_product_list
217
207
 
218
208
 
219
209
  def updateStaticEC2Instances() -> None:
@@ -225,39 +215,58 @@ def updateStaticEC2Instances() -> None:
225
215
  :return: Nothing. Writes a new 'generatedEC2Lists.py' file.
226
216
  """
227
217
  print("Updating Toil's EC2 lists to the most current version from AWS's bulk API.\n"
228
- "This may take a while, depending on your internet connection (~1Gb file).\n")
218
+ "This may take a while, depending on your internet connection.\n")
229
219
 
230
- origFile = os.path.join(dirname, 'generatedEC2Lists.py') # original
231
- assert os.path.exists(origFile)
220
+ original_aws_instance_list = os.path.join(dirname, 'generatedEC2Lists.py') # original
221
+ if not os.path.exists(original_aws_instance_list):
222
+ raise RuntimeError(f"Path {original_aws_instance_list} does not exist.")
232
223
  # use a temporary file until all info is fetched
233
- genFile = os.path.join(dirname, 'generatedEC2Lists_tmp.py') # temp
234
- if os.path.exists(genFile):
235
- os.remove(genFile)
224
+ updated_aws_instance_list = os.path.join(dirname, 'generatedEC2Lists_tmp.py') # temp
225
+ if os.path.exists(updated_aws_instance_list):
226
+ os.remove(updated_aws_instance_list)
236
227
 
237
- # filepath to store the aws json request (will be cleaned up)
238
- # this is done because AWS changes their json format from time to time
239
- # and debugging is faster with the file stored locally
240
- awsJsonIndex = os.path.join(dirname, 'index.json')
241
-
242
- if not os.path.exists(awsJsonIndex):
243
- fetchEC2Index(filename=awsJsonIndex)
244
- else:
245
- print('Reusing previously downloaded json @: ' + awsJsonIndex)
246
-
247
- with open(awsJsonIndex) as f:
248
- awsProductDict = json.loads(f.read())
228
+ if not os.path.exists(region_json_dirname):
229
+ os.mkdir(region_json_dirname)
249
230
 
250
231
  currentEC2List = []
251
232
  instancesByRegion: Dict[str, List[str]] = {}
252
- for regionNickname in EC2Regions:
253
- currentEC2Dict = fetchEC2InstanceDict(awsProductDict, region=EC2Regions[regionNickname])
233
+ for region in EC2Regions.keys():
234
+ region_json = os.path.join(region_json_dirname, f'{region}.json')
235
+
236
+ if os.path.exists(region_json):
237
+ try:
238
+ with open(region_json, 'r') as f:
239
+ aws_products = json.loads(f.read())['aws']
240
+ print(f'Reusing previously downloaded json @: {region_json}')
241
+ except:
242
+ os.remove(region_json)
243
+ download_region_json(filename=region_json, region=region)
244
+ aws_products = reduce_region_json_size(filename=region_json)
245
+ else:
246
+ download_region_json(filename=region_json, region=region)
247
+ aws_products = reduce_region_json_size(filename=region_json)
248
+
249
+ ec2InstanceList = []
250
+ for i in aws_products:
251
+ disks, disk_capacity = parse_storage(i["disk"])
252
+ # Determines whether the instance type is from an ARM or AMD family
253
+ # ARM instance names include a digit followed by a 'g' before the instance size
254
+ architecture = 'arm64' if re.search(r".*\dg.*\..*", i["name"]) else 'amd64'
255
+ ec2InstanceList.append(InstanceType(name=i["name"],
256
+ cores=i["cpu"],
257
+ memory=parse_memory(i["mem"]),
258
+ disks=disks,
259
+ disk_capacity=disk_capacity,
260
+ architecture=architecture))
261
+ print('Finished for ' + str(region) + '. ' + str(len(ec2InstanceList)) + ' added.\n')
262
+ currentEC2Dict = {_.name: _ for _ in ec2InstanceList}
254
263
  for instanceName, instanceTypeObj in currentEC2Dict.items():
255
264
  if instanceTypeObj not in currentEC2List:
256
265
  currentEC2List.append(instanceTypeObj)
257
- instancesByRegion.setdefault(regionNickname, []).append(instanceName)
266
+ instancesByRegion.setdefault(region, []).append(instanceName)
258
267
 
259
268
  # write provenance note, copyright and imports
260
- with open(genFile, 'w') as f:
269
+ with open(updated_aws_instance_list, 'w') as f:
261
270
  f.write(textwrap.dedent('''
262
271
  # !!! AUTOGENERATED FILE !!!
263
272
  # Update with: src/toil/utils/toilUpdateEC2Instances.py
@@ -278,16 +287,13 @@ def updateStaticEC2Instances() -> None:
278
287
  from toil.lib.ec2nodes import InstanceType\n\n\n''').format(year=datetime.date.today().strftime("%Y"))[1:])
279
288
 
280
289
  # write header of total EC2 instance type list
281
- genString = "# {num} Instance Types. Generated {date}.\n".format(
282
- num=str(len(currentEC2List)), date=str(datetime.datetime.now()))
290
+ genString = f'# {len(currentEC2List)} Instance Types. Generated {datetime.datetime.now()}.\n'
283
291
  genString = genString + "E2Instances = {\n"
284
292
  sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name)
285
293
 
286
294
  # write the list of all instances types
287
295
  for i in sortedCurrentEC2List:
288
- z = " '{name}': InstanceType(name='{name}', cores={cores}, memory={memory}, disks={disks}, disk_capacity={disk_capacity}, architecture='{architecture}')," \
289
- "\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity, architecture=i.architecture)
290
- genString = genString + z
296
+ genString = genString + f" '{i.name}': InstanceType(name='{i.name}', cores={i.cores}, memory={i.memory}, disks={i.disks}, disk_capacity={i.disk_capacity}, architecture='{i.architecture}'),\n"
291
297
  genString = genString + '}\n\n'
292
298
 
293
299
  genString = genString + 'regionDict = {\n'
@@ -301,19 +307,19 @@ def updateStaticEC2Instances() -> None:
301
307
  if genString.endswith(',\n'):
302
308
  genString = genString[:-len(',\n')]
303
309
  genString = genString + '}\n'
304
- with open(genFile, 'a+') as f:
310
+ with open(updated_aws_instance_list, 'a+') as f:
305
311
  f.write(genString)
306
312
 
307
313
  # append key for fetching at the end
308
314
  regionKey = '\nec2InstancesByRegion = {region: [E2Instances[i] for i in instances] for region, instances in regionDict.items()}\n'
309
315
 
310
- with open(genFile, 'a+') as f:
316
+ with open(updated_aws_instance_list, 'a+') as f:
311
317
  f.write(regionKey)
312
- # delete the original file
313
- if os.path.exists(origFile):
314
- os.remove(origFile)
318
+
315
319
  # replace the instance list with a current list
316
- os.rename(genFile, origFile)
317
- # delete the aws billing json file
318
- if os.path.exists(awsJsonIndex):
319
- os.remove(awsJsonIndex)
320
+ os.rename(updated_aws_instance_list, original_aws_instance_list)
321
+
322
+ # delete the aws region json file directory
323
+ if os.path.exists(region_json_dirname):
324
+ print(f'Update Successful! Removing AWS Region JSON Files @: {region_json_dirname}')
325
+ shutil.rmtree(region_json_dirname)
@@ -53,7 +53,8 @@ def encrypt(message: bytes, keyPath: str) -> bytes:
53
53
  # of a collision is astronomically low. (This approach is
54
54
  # recommended in the libsodium documentation.)
55
55
  nonce = nacl.utils.random(SecretBox.NONCE_SIZE)
56
- assert len(nonce) == SecretBox.NONCE_SIZE
56
+ if len(nonce) != SecretBox.NONCE_SIZE:
57
+ raise RuntimeError("Generated nonce is the wrong size.")
57
58
  return bytes(sb.encrypt(message, nonce))
58
59
 
59
60