toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. toil/batchSystems/abstractBatchSystem.py +13 -5
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
  3. toil/batchSystems/kubernetes.py +13 -2
  4. toil/batchSystems/mesos/batchSystem.py +33 -2
  5. toil/batchSystems/registry.py +15 -118
  6. toil/batchSystems/slurm.py +191 -16
  7. toil/common.py +20 -1
  8. toil/cwl/cwltoil.py +97 -119
  9. toil/cwl/utils.py +103 -3
  10. toil/fileStores/__init__.py +1 -1
  11. toil/fileStores/abstractFileStore.py +5 -2
  12. toil/fileStores/cachingFileStore.py +1 -1
  13. toil/job.py +30 -14
  14. toil/jobStores/abstractJobStore.py +35 -255
  15. toil/jobStores/aws/jobStore.py +864 -1964
  16. toil/jobStores/aws/utils.py +24 -270
  17. toil/jobStores/fileJobStore.py +2 -1
  18. toil/jobStores/googleJobStore.py +32 -13
  19. toil/jobStores/utils.py +0 -327
  20. toil/leader.py +27 -22
  21. toil/lib/accelerators.py +1 -1
  22. toil/lib/aws/config.py +22 -0
  23. toil/lib/aws/s3.py +477 -9
  24. toil/lib/aws/utils.py +22 -33
  25. toil/lib/checksum.py +88 -0
  26. toil/lib/conversions.py +33 -31
  27. toil/lib/directory.py +217 -0
  28. toil/lib/ec2.py +97 -29
  29. toil/lib/exceptions.py +2 -1
  30. toil/lib/expando.py +2 -2
  31. toil/lib/generatedEC2Lists.py +138 -19
  32. toil/lib/io.py +33 -2
  33. toil/lib/memoize.py +21 -7
  34. toil/lib/misc.py +1 -1
  35. toil/lib/pipes.py +385 -0
  36. toil/lib/plugins.py +106 -0
  37. toil/lib/retry.py +1 -1
  38. toil/lib/threading.py +1 -1
  39. toil/lib/url.py +320 -0
  40. toil/lib/web.py +4 -5
  41. toil/options/cwl.py +13 -1
  42. toil/options/runner.py +17 -10
  43. toil/options/wdl.py +12 -1
  44. toil/provisioners/__init__.py +5 -2
  45. toil/provisioners/aws/__init__.py +43 -36
  46. toil/provisioners/aws/awsProvisioner.py +47 -15
  47. toil/provisioners/node.py +60 -12
  48. toil/resource.py +3 -13
  49. toil/server/app.py +12 -6
  50. toil/server/cli/wes_cwl_runner.py +2 -2
  51. toil/server/wes/abstract_backend.py +21 -43
  52. toil/server/wes/toil_backend.py +2 -2
  53. toil/test/__init__.py +16 -18
  54. toil/test/batchSystems/batchSystemTest.py +2 -9
  55. toil/test/batchSystems/batch_system_plugin_test.py +7 -0
  56. toil/test/batchSystems/test_slurm.py +103 -14
  57. toil/test/cwl/cwlTest.py +181 -8
  58. toil/test/cwl/staging_cat.cwl +27 -0
  59. toil/test/cwl/staging_make_file.cwl +25 -0
  60. toil/test/cwl/staging_workflow.cwl +43 -0
  61. toil/test/cwl/zero_default.cwl +61 -0
  62. toil/test/docs/scripts/tutorial_staging.py +17 -8
  63. toil/test/docs/scriptsTest.py +2 -1
  64. toil/test/jobStores/jobStoreTest.py +23 -133
  65. toil/test/lib/aws/test_iam.py +7 -7
  66. toil/test/lib/aws/test_s3.py +30 -33
  67. toil/test/lib/aws/test_utils.py +9 -9
  68. toil/test/lib/test_url.py +69 -0
  69. toil/test/lib/url_plugin_test.py +105 -0
  70. toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
  71. toil/test/provisioners/clusterTest.py +15 -2
  72. toil/test/provisioners/gceProvisionerTest.py +1 -1
  73. toil/test/server/serverTest.py +78 -36
  74. toil/test/src/autoDeploymentTest.py +2 -3
  75. toil/test/src/fileStoreTest.py +89 -87
  76. toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
  77. toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
  78. toil/test/utils/toilKillTest.py +35 -28
  79. toil/test/wdl/md5sum/md5sum-gs.json +1 -1
  80. toil/test/wdl/md5sum/md5sum.json +1 -1
  81. toil/test/wdl/testfiles/read_file.wdl +18 -0
  82. toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
  83. toil/test/wdl/wdltoil_test.py +171 -162
  84. toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
  85. toil/utils/toilDebugFile.py +6 -3
  86. toil/utils/toilSshCluster.py +23 -0
  87. toil/utils/toilStats.py +17 -2
  88. toil/utils/toilUpdateEC2Instances.py +1 -0
  89. toil/version.py +10 -10
  90. toil/wdl/wdltoil.py +1179 -825
  91. toil/worker.py +16 -8
  92. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
  93. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
  94. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
  95. toil/lib/iterables.py +0 -112
  96. toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
  97. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
  98. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
  99. {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/lib/url.py ADDED
@@ -0,0 +1,320 @@
1
+ # Copyright (C) 2015-2025 Regents of the University of California
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import os
16
+ from abc import ABC, ABCMeta, abstractmethod
17
+ from typing import (
18
+ IO,
19
+ TYPE_CHECKING,
20
+ Any,
21
+ Callable,
22
+ ContextManager,
23
+ Literal,
24
+ Optional,
25
+ Union,
26
+ cast,
27
+ overload,
28
+ Type,
29
+ )
30
+ from urllib.parse import ParseResult, urlparse
31
+
32
+ from toil.lib.exceptions import UnimplementedURLException
33
+ from toil.lib.memoize import memoize
34
+ from toil.lib.plugins import register_plugin, get_plugin
35
+
36
+ try:
37
+ from botocore.exceptions import ProxyConnectionError
38
+ except ImportError:
39
+
40
+ class ProxyConnectionError(BaseException): # type: ignore
41
+ """Dummy class."""
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+ class URLAccess:
46
+ """
47
+ Widget for accessing external storage (URLs).
48
+ """
49
+
50
+ @classmethod
51
+ def url_exists(cls, src_uri: str) -> bool:
52
+ """
53
+ Return True if the file at the given URI exists, and False otherwise.
54
+
55
+ May raise an error if file existence cannot be determined.
56
+
57
+ :param src_uri: URL that points to a file or object in the storage
58
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
59
+ """
60
+ parseResult = urlparse(src_uri)
61
+ otherCls = cls._find_url_implementation(parseResult)
62
+ return otherCls._url_exists(parseResult)
63
+
64
+ @classmethod
65
+ def get_size(cls, src_uri: str) -> Optional[int]:
66
+ """
67
+ Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
68
+
69
+ :param src_uri: URL that points to a file or object in the storage
70
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
71
+ """
72
+ parseResult = urlparse(src_uri)
73
+ otherCls = cls._find_url_implementation(parseResult)
74
+ return otherCls._get_size(parseResult)
75
+
76
+ @classmethod
77
+ def get_is_directory(cls, src_uri: str) -> bool:
78
+ """
79
+ Return True if the thing at the given URL is a directory, and False if
80
+ it is a file. The URL may or may not end in '/'.
81
+ """
82
+ parseResult = urlparse(src_uri)
83
+ otherCls = cls._find_url_implementation(parseResult)
84
+ return otherCls._get_is_directory(parseResult)
85
+
86
+ @classmethod
87
+ def list_url(cls, src_uri: str) -> list[str]:
88
+ """
89
+ List the directory at the given URL. Returned path components can be
90
+ joined with '/' onto the passed URL to form new URLs. Those that end in
91
+ '/' correspond to directories. The provided URL may or may not end with
92
+ '/'.
93
+
94
+ Currently supported schemes are:
95
+
96
+ - 's3' for objects in Amazon S3
97
+ e.g. s3://bucket/prefix/
98
+
99
+ - 'file' for local files
100
+ e.g. file:///local/dir/path/
101
+
102
+ :param str src_uri: URL that points to a directory or prefix in the storage mechanism of a
103
+ supported URL scheme e.g. a prefix in an AWS s3 bucket.
104
+
105
+ :return: A list of URL components in the given directory, already URL-encoded.
106
+ """
107
+ parseResult = urlparse(src_uri)
108
+ otherCls = cls._find_url_implementation(parseResult)
109
+ return otherCls._list_url(parseResult)
110
+
111
+ @classmethod
112
+ def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> tuple[int, bool]:
113
+ """
114
+ Read the given URL and write its content into the given writable stream.
115
+
116
+ Raises FileNotFoundError if the URL doesn't exist.
117
+
118
+ :return: The size of the file in bytes and whether the executable permission bit is set
119
+ """
120
+ parseResult = urlparse(src_uri)
121
+ otherCls = cls._find_url_implementation(parseResult)
122
+ return otherCls._read_from_url(parseResult, writable)
123
+
124
+ @classmethod
125
+ def open_url(cls, src_uri: str) -> IO[bytes]:
126
+ """
127
+ Read from the given URI.
128
+
129
+ Raises FileNotFoundError if the URL doesn't exist.
130
+
131
+ Has a readable stream interface, unlike :meth:`read_from_url` which
132
+ takes a writable stream.
133
+ """
134
+ parseResult = urlparse(src_uri)
135
+ otherCls = cls._find_url_implementation(parseResult)
136
+ return otherCls._open_url(parseResult)
137
+
138
+ @classmethod
139
+ @abstractmethod
140
+ def _url_exists(cls, url: ParseResult) -> bool:
141
+ """
142
+ Return True if the item at the given URL exists, and Flase otherwise.
143
+
144
+ May raise an error if file existence cannot be determined.
145
+ """
146
+ raise NotImplementedError(f"No implementation for {url}")
147
+
148
+ @classmethod
149
+ @abstractmethod
150
+ def _get_size(cls, url: ParseResult) -> Optional[int]:
151
+ """
152
+ Get the size of the object at the given URL, or None if it cannot be obtained.
153
+ """
154
+ raise NotImplementedError(f"No implementation for {url}")
155
+
156
+ @classmethod
157
+ @abstractmethod
158
+ def _get_is_directory(cls, url: ParseResult) -> bool:
159
+ """
160
+ Return True if the thing at the given URL is a directory, and False if
161
+ it is a file or it is known not to exist. The URL may or may not end in
162
+ '/'.
163
+
164
+ :param url: URL that points to a file or object, or directory or prefix,
165
+ in the storage mechanism of a supported URL scheme e.g. a blob
166
+ in an AWS s3 bucket.
167
+ """
168
+ raise NotImplementedError(f"No implementation for {url}")
169
+
170
+ @classmethod
171
+ @abstractmethod
172
+ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
173
+ """
174
+ Reads the contents of the object at the specified location and writes it to the given
175
+ writable stream.
176
+
177
+ Raises FileNotFoundError if the thing at the URL is not found.
178
+
179
+ :param ParseResult url: URL that points to a file or object in the storage
180
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
181
+
182
+ :param IO[bytes] writable: a writable stream
183
+
184
+ :return: The size of the file in bytes and whether the executable permission bit is set
185
+ """
186
+ raise NotImplementedError(f"No implementation for {url}")
187
+
188
+ @classmethod
189
+ @abstractmethod
190
+ def _list_url(cls, url: ParseResult) -> list[str]:
191
+ """
192
+ List the contents of the given URL, which may or may not end in '/'
193
+
194
+ Returns a list of URL components. Those that end in '/' are meant to be
195
+ directories, while those that do not are meant to be files.
196
+
197
+ :param ParseResult url: URL that points to a directory or prefix in the
198
+ storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
199
+ bucket.
200
+
201
+ :return: The children of the given URL, already URL-encoded if
202
+ appropriate. (If the URL is a bare path, no encoding is done.)
203
+ """
204
+ raise NotImplementedError(f"No implementation for {url}")
205
+
206
+ @classmethod
207
+ @abstractmethod
208
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
209
+ """
210
+ Get a stream of the object at the specified location.
211
+
212
+ Raises FileNotFoundError if the thing at the URL is not found.
213
+ """
214
+ raise NotImplementedError(f"No implementation for {url}")
215
+
216
+ @classmethod
217
+ @abstractmethod
218
+ def _write_to_url(
219
+ cls,
220
+ readable: Union[IO[bytes], IO[str]],
221
+ url: ParseResult,
222
+ executable: bool = False,
223
+ ) -> None:
224
+ """
225
+ Reads the contents of the given readable stream and writes it to the object at the
226
+ specified location. Raises FileNotFoundError if the URL doesn't exist.
227
+
228
+ :param Union[IO[bytes], IO[str]] readable: a readable stream
229
+
230
+ :param ParseResult url: URL that points to a file or object in the storage
231
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
232
+
233
+ :param bool executable: determines if the file has executable permissions
234
+ """
235
+ raise NotImplementedError(f"No implementation for {url}")
236
+
237
+ @classmethod
238
+ @abstractmethod
239
+ def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
240
+ """
241
+ Returns True if the url access implementation supports the URL's scheme.
242
+
243
+ :param ParseResult url: a parsed URL that may be supported
244
+
245
+ :param bool export: Determines if the url is supported for exported
246
+
247
+ :return bool: returns true if the cls supports the URL
248
+ """
249
+ raise NotImplementedError(f"No implementation for {url}")
250
+
251
+ @classmethod
252
+ def _find_url_implementation(
253
+ cls, url: ParseResult, export: bool = False
254
+ ) -> type["URLAccess"]:
255
+ """
256
+ Returns the URLAccess subclass that supports the given URL.
257
+
258
+ :param ParseResult url: The given URL
259
+
260
+ :param bool export: Determines if the url is supported for exporting
261
+
262
+ """
263
+ try:
264
+ implementation_factory = get_plugin("url_access", url.scheme.lower())
265
+ except KeyError:
266
+ raise UnimplementedURLException(url, "export" if export else "import")
267
+
268
+ try:
269
+ implementation = cast(Type[URLAccess], implementation_factory())
270
+ except (ImportError, ProxyConnectionError):
271
+ logger.debug(
272
+ "Unable to import implementation for scheme '%s', as is expected if the corresponding extra was "
273
+ "omitted at installation time.",
274
+ url.scheme.lower(),
275
+ )
276
+ raise UnimplementedURLException(url, "export" if export else "import")
277
+
278
+ if implementation._supports_url(url, export):
279
+ return implementation
280
+ raise UnimplementedURLException(url, "export" if export else "import")
281
+
282
+ #####
283
+ # Built-in url access
284
+ #####
285
+
286
+ def file_job_store_factory() -> type[URLAccess]:
287
+ from toil.jobStores.fileJobStore import FileJobStore
288
+
289
+ return FileJobStore
290
+
291
+
292
+ def google_job_store_factory() -> type[URLAccess]:
293
+ from toil.jobStores.googleJobStore import GoogleJobStore
294
+
295
+ return GoogleJobStore
296
+
297
+
298
+ def aws_job_store_factory() -> type[URLAccess]:
299
+ from toil.jobStores.aws.jobStore import AWSJobStore
300
+
301
+ return AWSJobStore
302
+
303
+
304
+ def job_store_support_factory() -> type[URLAccess]:
305
+ from toil.jobStores.abstractJobStore import JobStoreSupport
306
+
307
+ return JobStoreSupport
308
+
309
+ #make sure my py still works and the tests work
310
+ # can then get rid of _url_access_classes method
311
+
312
+ #####
313
+ # Registers all built-in urls
314
+ #####
315
+ register_plugin("url_access", "file", file_job_store_factory)
316
+ register_plugin("url_access", "gs", google_job_store_factory)
317
+ register_plugin("url_access", "s3", aws_job_store_factory)
318
+ register_plugin("url_access", "http", job_store_support_factory)
319
+ register_plugin("url_access", "https", job_store_support_factory)
320
+ register_plugin("url_access", "ftp", job_store_support_factory)
toil/lib/web.py CHANGED
@@ -17,13 +17,12 @@ Contains functions for making web requests with Toil.
17
17
 
18
18
  All web requests should go through this module, to make sure they use the right
19
19
  user agent.
20
-
20
+ >>> httpserver = getfixture("httpserver")
21
+ >>> handler = httpserver.expect_request("/path").respond_with_json({})
21
22
  >>> from toil.lib.web import web_session
22
- >>> web_session.get("https://example.com")
23
-
23
+ >>> web_session.get(httpserver.url_for("/path"))
24
+ <Response [200]>
24
25
  """
25
-
26
- import logging
27
26
  import requests
28
27
  import sys
29
28
 
toil/options/cwl.py CHANGED
@@ -3,6 +3,8 @@ from argparse import ArgumentParser
3
3
 
4
4
  from configargparse import SUPPRESS
5
5
 
6
+ from toil.lib.conversions import human2bytes
7
+ from toil.options.common import make_open_interval_action
6
8
  from toil.version import baseVersion
7
9
 
8
10
 
@@ -411,9 +413,19 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None:
411
413
  "--no-cwl-default-ram",
412
414
  action="store_false",
413
415
  help=suppress_help
414
- or "Do not apply CWL specification default ramMin, so that Toil --defaultMemory applies.",
416
+ or "Do not apply CWL specification default ramMin, so that Toil --defaultMemory applies. This can help jobs get to Slurm with no memory limit assigned.",
415
417
  dest="cwl_default_ram",
416
418
  )
419
+ parser.add_argument(
420
+ "--cwl-min-ram",
421
+ type=human2bytes,
422
+ action=make_open_interval_action(1),
423
+ help=suppress_help
424
+ or "Specify a minimum memory allocation for all tasks ."
425
+ "If --no-cwl-default-ram is passed, this does not apply to tools that do not "
426
+ "specify a memory requirement; --defaultMemory is used for those tools"
427
+ "in that case."
428
+ )
417
429
  parser.add_argument(
418
430
  "--destBucket",
419
431
  type=str,
toil/options/runner.py CHANGED
@@ -1,4 +1,4 @@
1
- from argparse import ArgumentParser
1
+ from argparse import ArgumentParser, SUPPRESS
2
2
 
3
3
  from toil.lib.conversions import human2bytes
4
4
 
@@ -25,17 +25,23 @@ def add_runner_options(
25
25
  help="Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance. "
26
26
  "If set to true, the argument --importWorkersDisk must also be set."
27
27
  )
28
- import_workers_threshold_argument = ["--importWorkersThreshold"]
28
+ import_workers_batchsize_argument = ["--importWorkersBatchSize"]
29
29
  if cwl:
30
- import_workers_threshold_argument.append("--import-workers-threshold")
30
+ import_workers_batchsize_argument.append("--import-workers-batch-size")
31
31
  parser.add_argument(
32
- *import_workers_threshold_argument,
33
- dest="import_workers_threshold",
32
+ *import_workers_batchsize_argument,
33
+ dest="import_workers_batchsize",
34
34
  type=lambda x: human2bytes(str(x)),
35
35
  default="1 GiB",
36
- help="Specify the file size threshold that determines how many files go into a batched import. As many files will go into a batch import job until this threshold "
37
- "is reached. This should be set in conjunction with the argument --runImportsOnWorkers."
36
+ help="Specify the target total file size for file import batches. "
37
+ "As many files as can fit will go into each batch import job. This should be set in conjunction with the argument --runImportsOnWorkers."
38
38
  )
39
+
40
+ # Deprecated
41
+ parser.add_argument(
42
+ "--importWorkersThreshold", "--import-workers-threshold", dest="import_workers_batchsize",type=lambda x: human2bytes(str(x)), help=SUPPRESS
43
+ )
44
+
39
45
  import_workers_disk_argument = ["--importWorkersDisk"]
40
46
  if cwl:
41
47
  import_workers_disk_argument.append("--import-workers-disk")
@@ -44,7 +50,8 @@ def add_runner_options(
44
50
  dest="import_workers_disk",
45
51
  type=lambda x: human2bytes(str(x)),
46
52
  default="1 MiB",
47
- help="Specify the disk size each import worker will get. This may be necessary when file streaming is not possible. For example, downloading from AWS to a GCE "
48
- "job store. If specified, this should be set to the largest file size of all files to import. This should be set in conjunction with the arguments "
49
- "--runImportsOnWorkers and --importWorkersThreshold."
53
+ help="Specify the disk size each import worker will get. This usually will not need to be set as Toil will attempt to use file streaming when downloading files. "
54
+ "If not possible, for example, when downloading from AWS to a GCE job store, "
55
+ "this should be set to the largest file size of all files to import. This should be set in conjunction with the arguments "
56
+ "--runImportsOnWorkers and --importWorkersBatchSize."
50
57
  )
toil/options/wdl.py CHANGED
@@ -96,4 +96,15 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None:
96
96
  type=strtobool,
97
97
  default=False,
98
98
  help=suppress_help or "Exit runner if workflow has any lint warnings"
99
- )
99
+ )
100
+
101
+ quant_check_arguments = ["--wdlQuantCheck"] + (
102
+ ["--quantCheck"] if not suppress else []
103
+ )
104
+ parser.add_argument(
105
+ *quant_check_arguments,
106
+ dest="quant_check",
107
+ type=strtobool,
108
+ default=True,
109
+ help=suppress_help or "Whether to relax quantifier validation rules"
110
+ )
@@ -127,8 +127,11 @@ def parse_node_types(
127
127
 
128
128
  Inputs should look something like this:
129
129
 
130
- >>> parse_node_types('c5.4xlarge/c5a.4xlarge:0.42,t2.large')
131
- [({'c5.4xlarge', 'c5a.4xlarge'}, 0.42), ({'t2.large'}, None)]
130
+ >>> types = parse_node_types('c5.4xlarge/c5a.4xlarge:0.42,t2.large')
131
+ >>> sorted(types[0][0]), types[0][1]
132
+ (['c5.4xlarge', 'c5a.4xlarge'], 0.42)
133
+ >>> sorted(types[1][0]), types[1][1]
134
+ (['t2.large'], None)
132
135
 
133
136
  :param node_type_specs: A string defining node types
134
137
 
@@ -16,7 +16,7 @@ import logging
16
16
  from collections import namedtuple
17
17
  from operator import attrgetter
18
18
  from statistics import mean, stdev
19
- from typing import Optional
19
+ from typing import Optional, TYPE_CHECKING
20
20
 
21
21
  from botocore.client import BaseClient
22
22
 
@@ -26,11 +26,15 @@ from toil.lib.aws import (
26
26
  get_aws_zone_from_environment_region,
27
27
  get_aws_zone_from_metadata,
28
28
  )
29
+ from toil.lib.aws.utils import boto3_pager
29
30
 
30
31
  logger = logging.getLogger(__name__)
31
32
 
32
33
  ZoneTuple = namedtuple("ZoneTuple", ["name", "price_deviation"])
33
34
 
35
+ if TYPE_CHECKING:
36
+ from mypy_boto3_ec2.type_defs import SpotPriceTypeDef
37
+
34
38
 
35
39
  def get_aws_zone_from_spot_market(
36
40
  spotBid: Optional[float],
@@ -109,7 +113,7 @@ def get_best_aws_zone(
109
113
  def choose_spot_zone(
110
114
  zones: list[str],
111
115
  bid: float,
112
- spot_history: list["boto.ec2.spotpricehistory.SpotPriceHistory"],
116
+ spot_history: list["SpotPriceTypeDef"],
113
117
  ) -> str:
114
118
  """
115
119
  Returns the zone to put the spot request based on, in order of priority:
@@ -120,27 +124,26 @@ def choose_spot_zone(
120
124
 
121
125
  :return: the name of the selected zone
122
126
 
123
- >>> from collections import namedtuple
124
- >>> FauxHistory = namedtuple('FauxHistory', ['price', 'availability_zone'])
125
127
  >>> zones = ['us-west-2a', 'us-west-2b']
126
- >>> spot_history = [FauxHistory(0.1, 'us-west-2a'), \
127
- FauxHistory(0.2, 'us-west-2a'), \
128
- FauxHistory(0.3, 'us-west-2b'), \
129
- FauxHistory(0.6, 'us-west-2b')]
128
+ >>> FauxHistory = lambda p, z: {"SpotPrice": p, "AvailabilityZone": z}
129
+ >>> spot_history = [FauxHistory("0.1", 'us-west-2a'), \
130
+ FauxHistory("0.2", 'us-west-2a'), \
131
+ FauxHistory("0.3", 'us-west-2b'), \
132
+ FauxHistory("0.6", 'us-west-2b')]
130
133
  >>> choose_spot_zone(zones, 0.15, spot_history)
131
134
  'us-west-2a'
132
135
 
133
- >>> spot_history=[FauxHistory(0.3, 'us-west-2a'), \
134
- FauxHistory(0.2, 'us-west-2a'), \
135
- FauxHistory(0.1, 'us-west-2b'), \
136
- FauxHistory(0.6, 'us-west-2b')]
136
+ >>> spot_history=[FauxHistory("0.3", 'us-west-2a'), \
137
+ FauxHistory("0.2", 'us-west-2a'), \
138
+ FauxHistory("0.1", 'us-west-2b'), \
139
+ FauxHistory("0.6", 'us-west-2b')]
137
140
  >>> choose_spot_zone(zones, 0.15, spot_history)
138
141
  'us-west-2b'
139
142
 
140
- >>> spot_history=[FauxHistory(0.1, 'us-west-2a'), \
141
- FauxHistory(0.7, 'us-west-2a'), \
142
- FauxHistory(0.1, 'us-west-2b'), \
143
- FauxHistory(0.6, 'us-west-2b')]
143
+ >>> spot_history=[FauxHistory("0.1", 'us-west-2a'), \
144
+ FauxHistory("0.7", 'us-west-2a'), \
145
+ FauxHistory("0.1", 'us-west-2b'), \
146
+ FauxHistory("0.6", 'us-west-2b')]
144
147
  >>> choose_spot_zone(zones, 0.15, spot_history)
145
148
  'us-west-2b'
146
149
  """
@@ -152,11 +155,11 @@ def choose_spot_zone(
152
155
  zone_histories = [
153
156
  zone_history
154
157
  for zone_history in spot_history
155
- if zone_history.availability_zone == zone
158
+ if zone_history["AvailabilityZone"] == zone
156
159
  ]
157
160
  if zone_histories:
158
- price_deviation = stdev([history.price for history in zone_histories])
159
- recent_price = zone_histories[0].price
161
+ price_deviation = stdev([float(history["SpotPrice"]) for history in zone_histories])
162
+ recent_price = float(zone_histories[0]["SpotPrice"])
160
163
  else:
161
164
  price_deviation, recent_price = 0.0, bid
162
165
  zone_tuple = ZoneTuple(name=zone, price_deviation=price_deviation)
@@ -169,7 +172,7 @@ def choose_spot_zone(
169
172
 
170
173
  def optimize_spot_bid(
171
174
  boto3_ec2: BaseClient, instance_type: str, spot_bid: float, zone_options: list[str]
172
- ):
175
+ ) -> str:
173
176
  """
174
177
  Check whether the bid is in line with history and makes an effort to place
175
178
  the instance in a sensible zone.
@@ -179,30 +182,29 @@ def optimize_spot_bid(
179
182
  """
180
183
  spot_history = _get_spot_history(boto3_ec2, instance_type)
181
184
  if spot_history:
182
- _check_spot_bid(spot_bid, spot_history)
185
+ _check_spot_bid(spot_bid, spot_history, name=instance_type)
183
186
  most_stable_zone = choose_spot_zone(zone_options, spot_bid, spot_history)
184
187
  logger.debug("Placing spot instances in zone %s.", most_stable_zone)
185
188
  return most_stable_zone
186
189
 
187
190
 
188
- def _check_spot_bid(spot_bid, spot_history):
191
+ def _check_spot_bid(spot_bid: float, spot_history: list["SpotPriceTypeDef"], name: Optional[str] = None) -> None:
189
192
  """
190
193
  Prevents users from potentially over-paying for instances
191
194
 
192
195
  Note: this checks over the whole region, not a particular zone
193
196
 
194
- :param spot_bid: float
197
+ :param spot_bid: The proposed bid in dollars per hour.
195
198
 
196
- :type spot_history: list[SpotPriceHistory]
199
+ :type spot_history: The recent history of the spot price
197
200
 
198
201
  :raises UserError: if bid is > 2X the spot price's average
199
202
 
200
- >>> from collections import namedtuple
201
- >>> FauxHistory = namedtuple( "FauxHistory", [ "price", "availability_zone" ] )
202
- >>> spot_data = [ FauxHistory( 0.1, "us-west-2a" ), \
203
- FauxHistory( 0.2, "us-west-2a" ), \
204
- FauxHistory( 0.3, "us-west-2b" ), \
205
- FauxHistory( 0.6, "us-west-2b" ) ]
203
+ >>> FauxHistory = lambda p, z: {"SpotPrice": p, "AvailabilityZone": z}
204
+ >>> spot_data = [ FauxHistory( "0.1", "us-west-2a" ), \
205
+ FauxHistory( "0.2", "us-west-2a" ), \
206
+ FauxHistory( "0.3", "us-west-2b" ), \
207
+ FauxHistory( "0.6", "us-west-2b" ) ]
206
208
  >>> # noinspection PyProtectedMember
207
209
  >>> _check_spot_bid( 0.1, spot_data )
208
210
  >>> # noinspection PyProtectedMember
@@ -212,17 +214,21 @@ def _check_spot_bid(spot_bid, spot_history):
212
214
  ...
213
215
  UserError: Your bid $ 2.000000 is more than double this instance type's average spot price ($ 0.300000) over the last week
214
216
  """
215
- average = mean([datum.price for datum in spot_history])
217
+ if name is None:
218
+ # Describe the instance as something
219
+ name = "this instance type"
220
+ average = mean([float(datum["SpotPrice"]) for datum in spot_history])
216
221
  if spot_bid > average * 2:
217
222
  logger.warning(
218
- "Your bid $ %f is more than double this instance type's average "
223
+ "Your bid $ %f is more than double %s's average "
219
224
  "spot price ($ %f) over the last week",
220
225
  spot_bid,
226
+ name,
221
227
  average,
222
228
  )
223
229
 
224
230
 
225
- def _get_spot_history(boto3_ec2: BaseClient, instance_type: str):
231
+ def _get_spot_history(boto3_ec2: BaseClient, instance_type: str) -> list["SpotPriceTypeDef"]:
226
232
  """
227
233
  Returns list of 1,000 most recent spot market data points represented as SpotPriceHistory
228
234
  objects. Note: The most recent object/data point will be first in the list.
@@ -230,10 +236,11 @@ def _get_spot_history(boto3_ec2: BaseClient, instance_type: str):
230
236
  :rtype: list[SpotPriceHistory]
231
237
  """
232
238
  one_week_ago = datetime.datetime.now() - datetime.timedelta(days=7)
233
- spot_data = boto3_ec2.describe_spot_price_history(
239
+ spot_data = boto3_pager(
240
+ boto3_ec2.describe_spot_price_history,
241
+ "SpotPriceHistory",
234
242
  StartTime=one_week_ago.isoformat(),
235
243
  InstanceTypes=[instance_type],
236
244
  ProductDescriptions=["Linux/UNIX"],
237
245
  )
238
- spot_data.sort(key=attrgetter("timestamp"), reverse=True)
239
- return spot_data
246
+ return sorted(spot_data, key=lambda d: d["Timestamp"], reverse=True)