toil 9.1.2__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +14 -14
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/options/common.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from argparse import Action, ArgumentParser, _AppendAction
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
5
6
|
|
|
6
7
|
from configargparse import SUPPRESS
|
|
7
8
|
from ruamel.yaml import YAML
|
|
@@ -23,7 +24,7 @@ SYS_MAX_SIZE = 9223372036854775807
|
|
|
23
24
|
# use the same number
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
def parse_set_env(l: list[str]) -> dict[str,
|
|
27
|
+
def parse_set_env(l: list[str]) -> dict[str, str | None]:
|
|
27
28
|
"""
|
|
28
29
|
Parse a list of strings of the form "NAME=VALUE" or just "NAME" into a dictionary.
|
|
29
30
|
|
|
@@ -53,7 +54,7 @@ def parse_set_env(l: list[str]) -> dict[str, Optional[str]]:
|
|
|
53
54
|
ValueError: Empty name
|
|
54
55
|
"""
|
|
55
56
|
d = {}
|
|
56
|
-
v:
|
|
57
|
+
v: str | None = None
|
|
57
58
|
for i in l:
|
|
58
59
|
try:
|
|
59
60
|
k, v = i.split("=", 1)
|
|
@@ -73,7 +74,7 @@ def parse_int_list(s: str) -> list[int]:
|
|
|
73
74
|
return [int(x) for x in s.split(",")]
|
|
74
75
|
|
|
75
76
|
|
|
76
|
-
def iC(min_value: int, max_value:
|
|
77
|
+
def iC(min_value: int, max_value: int | None = None) -> Callable[[int], bool]:
|
|
77
78
|
"""Returns a function that checks if a given int is in the given half-open interval."""
|
|
78
79
|
assert isinstance(min_value, int)
|
|
79
80
|
if max_value is None:
|
|
@@ -82,7 +83,7 @@ def iC(min_value: int, max_value: Optional[int] = None) -> Callable[[int], bool]
|
|
|
82
83
|
return lambda x: min_value <= x < max_value
|
|
83
84
|
|
|
84
85
|
|
|
85
|
-
def fC(minValue: float, maxValue:
|
|
86
|
+
def fC(minValue: float, maxValue: float | None = None) -> Callable[[float], bool]:
|
|
86
87
|
"""Returns a function that checks if a given float is in the given half-open interval."""
|
|
87
88
|
assert isinstance(minValue, float)
|
|
88
89
|
if maxValue is None:
|
|
@@ -91,7 +92,7 @@ def fC(minValue: float, maxValue: Optional[float] = None) -> Callable[[float], b
|
|
|
91
92
|
return lambda x: minValue <= x < maxValue
|
|
92
93
|
|
|
93
94
|
|
|
94
|
-
def parse_accelerator_list(specs:
|
|
95
|
+
def parse_accelerator_list(specs: str | None) -> list["AcceleratorRequirement"]:
|
|
95
96
|
"""
|
|
96
97
|
Parse a string description of one or more accelerator requirements.
|
|
97
98
|
"""
|
|
@@ -116,7 +117,7 @@ def parseBool(val: str) -> bool:
|
|
|
116
117
|
|
|
117
118
|
# This is kept in the outer scope as multiple batchsystem files use this
|
|
118
119
|
def make_open_interval_action(
|
|
119
|
-
min:
|
|
120
|
+
min: int | float, max: int | float | None = None
|
|
120
121
|
) -> type[Action]:
|
|
121
122
|
"""
|
|
122
123
|
Returns an argparse action class to check if the input is within the given half-open interval.
|
|
@@ -286,7 +287,7 @@ def add_base_toil_options(
|
|
|
286
287
|
setattr(namespace, self.dest, coordination_dir)
|
|
287
288
|
|
|
288
289
|
def make_closed_interval_action(
|
|
289
|
-
min:
|
|
290
|
+
min: int | float, max: int | float | None = None
|
|
290
291
|
) -> type[Action]:
|
|
291
292
|
"""
|
|
292
293
|
Returns an argparse action class to check if the input is within the given half-open interval.
|
|
@@ -306,7 +307,7 @@ def add_base_toil_options(
|
|
|
306
307
|
values: Any,
|
|
307
308
|
option_string: Any = None,
|
|
308
309
|
) -> None:
|
|
309
|
-
def is_within(x:
|
|
310
|
+
def is_within(x: int | float) -> bool:
|
|
310
311
|
if max is None:
|
|
311
312
|
return min <= x
|
|
312
313
|
else:
|
|
@@ -467,8 +468,10 @@ def add_base_toil_options(
|
|
|
467
468
|
)
|
|
468
469
|
|
|
469
470
|
caching = file_store_options.add_mutually_exclusive_group()
|
|
470
|
-
caching_help = (
|
|
471
|
-
|
|
471
|
+
caching_help = (
|
|
472
|
+
"Enable or disable worker level file caching for your workflow, specifying this overrides default from batch system. "
|
|
473
|
+
"Does not affect CWL or WDL task caching."
|
|
474
|
+
)
|
|
472
475
|
caching.add_argument(
|
|
473
476
|
"--caching",
|
|
474
477
|
dest="caching",
|
|
@@ -1119,11 +1122,11 @@ def add_base_toil_options(
|
|
|
1119
1122
|
default=None,
|
|
1120
1123
|
help="Whether to publish workflow metrics reports (including unique workflow "
|
|
1121
1124
|
"and task run IDs, job names, and version and Toil feature use information) to "
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
+
'Dockstore when a workflow completes. Selecting "current" will publish metrics '
|
|
1126
|
+
'for the current workflow. Selecting "all" will also publish prior workflow '
|
|
1127
|
+
'runs from the Toil history database, even if they themselves were run with "no". '
|
|
1125
1128
|
"Note that once published, workflow metrics CANNOT be deleted or un-published; they "
|
|
1126
|
-
"will stay published forever!"
|
|
1129
|
+
"will stay published forever!",
|
|
1127
1130
|
)
|
|
1128
1131
|
|
|
1129
1132
|
# Debug options
|
toil/options/cwl.py
CHANGED
|
@@ -424,7 +424,7 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None:
|
|
|
424
424
|
or "Specify a minimum memory allocation for all tasks ."
|
|
425
425
|
"If --no-cwl-default-ram is passed, this does not apply to tools that do not "
|
|
426
426
|
"specify a memory requirement; --defaultMemory is used for those tools"
|
|
427
|
-
"in that case."
|
|
427
|
+
"in that case.",
|
|
428
428
|
)
|
|
429
429
|
parser.add_argument(
|
|
430
430
|
"--destBucket",
|
|
@@ -439,5 +439,5 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None:
|
|
|
439
439
|
"recomputing steps. Can be very helpful in the development and "
|
|
440
440
|
"troubleshooting of CWL documents. This automatically bypasses the file store."
|
|
441
441
|
" Not to be confused with --caching.",
|
|
442
|
-
dest="cachedir"
|
|
442
|
+
dest="cachedir",
|
|
443
443
|
)
|
toil/options/runner.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from argparse import
|
|
1
|
+
from argparse import SUPPRESS, ArgumentParser
|
|
2
2
|
|
|
3
3
|
from toil.lib.conversions import human2bytes
|
|
4
4
|
|
|
@@ -39,7 +39,11 @@ def add_runner_options(
|
|
|
39
39
|
|
|
40
40
|
# Deprecated
|
|
41
41
|
parser.add_argument(
|
|
42
|
-
"--importWorkersThreshold",
|
|
42
|
+
"--importWorkersThreshold",
|
|
43
|
+
"--import-workers-threshold",
|
|
44
|
+
dest="import_workers_batchsize",
|
|
45
|
+
type=lambda x: human2bytes(str(x)),
|
|
46
|
+
help=SUPPRESS,
|
|
43
47
|
)
|
|
44
48
|
|
|
45
49
|
import_workers_disk_argument = ["--importWorkersDisk"]
|
|
@@ -51,7 +55,7 @@ def add_runner_options(
|
|
|
51
55
|
type=lambda x: human2bytes(str(x)),
|
|
52
56
|
default="1 MiB",
|
|
53
57
|
help="Specify the disk size each import worker will get. This usually will not need to be set as Toil will attempt to use file streaming when downloading files. "
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
58
|
+
"If not possible, for example, when downloading from AWS to a GCE job store, "
|
|
59
|
+
"this should be set to the largest file size of all files to import. This should be set in conjunction with the arguments "
|
|
60
|
+
"--runImportsOnWorkers and --importWorkersBatchSize."
|
|
57
61
|
)
|
toil/options/wdl.py
CHANGED
|
@@ -87,9 +87,7 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None:
|
|
|
87
87
|
help=suppress_help or "Keep and return all call outputs as workflow outputs"
|
|
88
88
|
)
|
|
89
89
|
|
|
90
|
-
strict_arguments = ["--wdlStrict"] + (
|
|
91
|
-
["--strict"] if not suppress else []
|
|
92
|
-
)
|
|
90
|
+
strict_arguments = ["--wdlStrict"] + (["--strict"] if not suppress else [])
|
|
93
91
|
parser.add_argument(
|
|
94
92
|
*strict_arguments,
|
|
95
93
|
dest="strict",
|
toil/provisioners/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import argparse
|
|
15
15
|
import logging
|
|
16
16
|
from difflib import get_close_matches
|
|
17
|
-
from typing import TYPE_CHECKING,
|
|
17
|
+
from typing import TYPE_CHECKING, Union
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from toil.provisioners.aws.awsProvisioner import AWSProvisioner
|
|
@@ -26,12 +26,12 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
|
|
27
27
|
def cluster_factory(
|
|
28
28
|
provisioner: str,
|
|
29
|
-
clusterName:
|
|
29
|
+
clusterName: str | None = None,
|
|
30
30
|
clusterType: str = "mesos",
|
|
31
|
-
zone:
|
|
31
|
+
zone: str | None = None,
|
|
32
32
|
nodeStorage: int = 50,
|
|
33
|
-
nodeStorageOverrides:
|
|
34
|
-
sseKey:
|
|
33
|
+
nodeStorageOverrides: list[str] | None = None,
|
|
34
|
+
sseKey: str | None = None,
|
|
35
35
|
enable_fuse: bool = False,
|
|
36
36
|
) -> Union["AWSProvisioner", "GCEProvisioner"]:
|
|
37
37
|
"""
|
|
@@ -114,8 +114,8 @@ def add_provisioner_options(parser: argparse.ArgumentParser) -> None:
|
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def parse_node_types(
|
|
117
|
-
node_type_specs:
|
|
118
|
-
) -> list[tuple[set[str],
|
|
117
|
+
node_type_specs: str | None,
|
|
118
|
+
) -> list[tuple[set[str], float | None]]:
|
|
119
119
|
"""
|
|
120
120
|
Parse a specification for zero or more node types.
|
|
121
121
|
|
|
@@ -180,7 +180,7 @@ def parse_node_types(
|
|
|
180
180
|
|
|
181
181
|
|
|
182
182
|
def check_valid_node_types(
|
|
183
|
-
provisioner, node_types: list[tuple[set[str],
|
|
183
|
+
provisioner, node_types: list[tuple[set[str], float | None]]
|
|
184
184
|
):
|
|
185
185
|
"""
|
|
186
186
|
Raises if an invalid nodeType is specified for aws or gce.
|
|
@@ -256,7 +256,7 @@ class ClusterCombinationNotSupportedException(Exception):
|
|
|
256
256
|
provisioner_class: type,
|
|
257
257
|
cluster_type: str,
|
|
258
258
|
architecture: str,
|
|
259
|
-
reason:
|
|
259
|
+
reason: str | None = None,
|
|
260
260
|
):
|
|
261
261
|
message = (
|
|
262
262
|
f"The {provisioner_class} provisioner does not support making {cluster_type} clusters "
|
|
@@ -20,7 +20,7 @@ import tempfile
|
|
|
20
20
|
import textwrap
|
|
21
21
|
from abc import ABC, abstractmethod
|
|
22
22
|
from functools import total_ordering
|
|
23
|
-
from typing import Any
|
|
23
|
+
from typing import Any
|
|
24
24
|
from urllib.parse import quote
|
|
25
25
|
from uuid import uuid4
|
|
26
26
|
|
|
@@ -58,9 +58,9 @@ class Shape:
|
|
|
58
58
|
|
|
59
59
|
def __init__(
|
|
60
60
|
self,
|
|
61
|
-
wallTime:
|
|
61
|
+
wallTime: int | float,
|
|
62
62
|
memory: int,
|
|
63
|
-
cores:
|
|
63
|
+
cores: int | float,
|
|
64
64
|
disk: int,
|
|
65
65
|
preemptible: bool,
|
|
66
66
|
) -> None:
|
|
@@ -107,12 +107,14 @@ class Shape:
|
|
|
107
107
|
return self.greater_than(other)
|
|
108
108
|
|
|
109
109
|
def __repr__(self) -> str:
|
|
110
|
-
return
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
110
|
+
return (
|
|
111
|
+
"Shape(wallTime={}, memory={}, cores={}, disk={}, preemptible={})".format(
|
|
112
|
+
self.wallTime,
|
|
113
|
+
self.memory,
|
|
114
|
+
self.cores,
|
|
115
|
+
self.disk,
|
|
116
|
+
self.preemptible,
|
|
117
|
+
)
|
|
116
118
|
)
|
|
117
119
|
|
|
118
120
|
def __str__(self) -> str:
|
|
@@ -133,11 +135,11 @@ class AbstractProvisioner(ABC):
|
|
|
133
135
|
|
|
134
136
|
def __init__(
|
|
135
137
|
self,
|
|
136
|
-
clusterName:
|
|
137
|
-
clusterType:
|
|
138
|
-
zone:
|
|
138
|
+
clusterName: str | None = None,
|
|
139
|
+
clusterType: str | None = "mesos",
|
|
140
|
+
zone: str | None = None,
|
|
139
141
|
nodeStorage: int = 50,
|
|
140
|
-
nodeStorageOverrides:
|
|
142
|
+
nodeStorageOverrides: list[str] | None = None,
|
|
141
143
|
enable_fuse: bool = False,
|
|
142
144
|
) -> None:
|
|
143
145
|
"""
|
|
@@ -164,7 +166,7 @@ class AbstractProvisioner(ABC):
|
|
|
164
166
|
for override in nodeStorageOverrides or []:
|
|
165
167
|
nodeShape, storageOverride = override.split(":")
|
|
166
168
|
self._nodeStorageOverrides[nodeShape] = int(storageOverride)
|
|
167
|
-
self._leaderPrivateIP:
|
|
169
|
+
self._leaderPrivateIP: str | None = None
|
|
168
170
|
# This will hold an SSH public key for Mesos clusters, or the
|
|
169
171
|
# Kubernetes joining information as a dict for Kubernetes clusters.
|
|
170
172
|
self._leaderWorkerAuthentication = None
|
|
@@ -353,7 +355,7 @@ class AbstractProvisioner(ABC):
|
|
|
353
355
|
# it.
|
|
354
356
|
return dict(config["DEFAULT"])
|
|
355
357
|
|
|
356
|
-
def setAutoscaledNodeTypes(self, nodeTypes: list[tuple[set[str],
|
|
358
|
+
def setAutoscaledNodeTypes(self, nodeTypes: list[tuple[set[str], float | None]]):
|
|
357
359
|
"""
|
|
358
360
|
Set node types, shapes and spot bids for Toil-managed autoscaling.
|
|
359
361
|
:param nodeTypes: A list of node types, as parsed with parse_node_types.
|
|
@@ -432,7 +434,7 @@ class AbstractProvisioner(ABC):
|
|
|
432
434
|
nodeTypes: set[str],
|
|
433
435
|
numNodes: int,
|
|
434
436
|
preemptible: bool,
|
|
435
|
-
spotBid:
|
|
437
|
+
spotBid: float | None = None,
|
|
436
438
|
) -> int:
|
|
437
439
|
"""
|
|
438
440
|
Used to add worker nodes to the cluster
|
|
@@ -483,7 +485,7 @@ class AbstractProvisioner(ABC):
|
|
|
483
485
|
|
|
484
486
|
@abstractmethod
|
|
485
487
|
def getProvisionedWorkers(
|
|
486
|
-
self, instance_type:
|
|
488
|
+
self, instance_type: str | None = None, preemptible: bool | None = None
|
|
487
489
|
) -> list[Node]:
|
|
488
490
|
"""
|
|
489
491
|
Gets all nodes, optionally of the given instance type or
|
|
@@ -535,7 +537,7 @@ class AbstractProvisioner(ABC):
|
|
|
535
537
|
self,
|
|
536
538
|
path: str,
|
|
537
539
|
filesystem: str = "root",
|
|
538
|
-
mode:
|
|
540
|
+
mode: str | int = "0755",
|
|
539
541
|
contents: str = "",
|
|
540
542
|
append: bool = False,
|
|
541
543
|
):
|
|
@@ -1060,7 +1062,7 @@ class AbstractProvisioner(ABC):
|
|
|
1060
1062
|
"""
|
|
1061
1063
|
raise NotImplementedError()
|
|
1062
1064
|
|
|
1063
|
-
def getKubernetesCloudProvider(self) ->
|
|
1065
|
+
def getKubernetesCloudProvider(self) -> str | None:
|
|
1064
1066
|
"""
|
|
1065
1067
|
Return the Kubernetes cloud provider (for example, 'aws'), to pass to
|
|
1066
1068
|
the kubelets in a Kubernetes cluster provisioned using this provisioner.
|
|
@@ -1390,7 +1392,7 @@ class AbstractProvisioner(ABC):
|
|
|
1390
1392
|
def _getIgnitionUserData(
|
|
1391
1393
|
self,
|
|
1392
1394
|
role: str,
|
|
1393
|
-
keyPath:
|
|
1395
|
+
keyPath: str | None = None,
|
|
1394
1396
|
preemptible: bool = False,
|
|
1395
1397
|
architecture: str = "amd64",
|
|
1396
1398
|
) -> str:
|
|
@@ -16,7 +16,7 @@ import logging
|
|
|
16
16
|
from collections import namedtuple
|
|
17
17
|
from operator import attrgetter
|
|
18
18
|
from statistics import mean, stdev
|
|
19
|
-
from typing import
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
20
|
|
|
21
21
|
from botocore.client import BaseClient
|
|
22
22
|
|
|
@@ -37,11 +37,11 @@ if TYPE_CHECKING:
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def get_aws_zone_from_spot_market(
|
|
40
|
-
spotBid:
|
|
41
|
-
nodeType:
|
|
42
|
-
boto3_ec2:
|
|
43
|
-
zone_options:
|
|
44
|
-
) ->
|
|
40
|
+
spotBid: float | None,
|
|
41
|
+
nodeType: str | None,
|
|
42
|
+
boto3_ec2: BaseClient | None,
|
|
43
|
+
zone_options: list[str] | None,
|
|
44
|
+
) -> str | None:
|
|
45
45
|
"""
|
|
46
46
|
If a spot bid, node type, and Boto2 EC2 connection are specified, picks a
|
|
47
47
|
zone where instances are easy to buy from the zones in the region of the
|
|
@@ -72,11 +72,11 @@ def get_aws_zone_from_spot_market(
|
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
def get_best_aws_zone(
|
|
75
|
-
spotBid:
|
|
76
|
-
nodeType:
|
|
77
|
-
boto3_ec2:
|
|
78
|
-
zone_options:
|
|
79
|
-
) ->
|
|
75
|
+
spotBid: float | None = None,
|
|
76
|
+
nodeType: str | None = None,
|
|
77
|
+
boto3_ec2: BaseClient | None = None,
|
|
78
|
+
zone_options: list[str] | None = None,
|
|
79
|
+
) -> str | None:
|
|
80
80
|
"""
|
|
81
81
|
Get the right AWS zone to use.
|
|
82
82
|
|
|
@@ -158,7 +158,9 @@ def choose_spot_zone(
|
|
|
158
158
|
if zone_history["AvailabilityZone"] == zone
|
|
159
159
|
]
|
|
160
160
|
if zone_histories:
|
|
161
|
-
price_deviation = stdev(
|
|
161
|
+
price_deviation = stdev(
|
|
162
|
+
[float(history["SpotPrice"]) for history in zone_histories]
|
|
163
|
+
)
|
|
162
164
|
recent_price = float(zone_histories[0]["SpotPrice"])
|
|
163
165
|
else:
|
|
164
166
|
price_deviation, recent_price = 0.0, bid
|
|
@@ -188,7 +190,9 @@ def optimize_spot_bid(
|
|
|
188
190
|
return most_stable_zone
|
|
189
191
|
|
|
190
192
|
|
|
191
|
-
def _check_spot_bid(
|
|
193
|
+
def _check_spot_bid(
|
|
194
|
+
spot_bid: float, spot_history: list["SpotPriceTypeDef"], name: str | None = None
|
|
195
|
+
) -> None:
|
|
192
196
|
"""
|
|
193
197
|
Prevents users from potentially over-paying for instances
|
|
194
198
|
|
|
@@ -228,7 +232,9 @@ def _check_spot_bid(spot_bid: float, spot_history: list["SpotPriceTypeDef"], nam
|
|
|
228
232
|
)
|
|
229
233
|
|
|
230
234
|
|
|
231
|
-
def _get_spot_history(
|
|
235
|
+
def _get_spot_history(
|
|
236
|
+
boto3_ec2: BaseClient, instance_type: str
|
|
237
|
+
) -> list["SpotPriceTypeDef"]:
|
|
232
238
|
"""
|
|
233
239
|
Returns list of 1,000 most recent spot market data points represented as SpotPriceHistory
|
|
234
240
|
objects. Note: The most recent object/data point will be first in the list.
|
|
@@ -23,10 +23,10 @@ import string
|
|
|
23
23
|
import textwrap
|
|
24
24
|
import time
|
|
25
25
|
import uuid
|
|
26
|
-
from collections.abc import Collection, Iterable
|
|
26
|
+
from collections.abc import Callable, Collection, Iterable
|
|
27
27
|
from functools import wraps
|
|
28
28
|
from shlex import quote
|
|
29
|
-
from typing import TYPE_CHECKING, Any,
|
|
29
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast
|
|
30
30
|
|
|
31
31
|
# We need these to exist as attributes we can get off of the boto object
|
|
32
32
|
from botocore.exceptions import ClientError
|
|
@@ -325,7 +325,9 @@ class AWSProvisioner(AbstractProvisioner):
|
|
|
325
325
|
self._worker_subnets_by_zone = self._get_good_subnets_like(self._leader_subnet)
|
|
326
326
|
|
|
327
327
|
# Find the SSH key name to use to start instances
|
|
328
|
-
if hasattr(ec2_metadata,
|
|
328
|
+
if hasattr(ec2_metadata, "public_keys") and isinstance(
|
|
329
|
+
ec2_metadata.public_keys, dict
|
|
330
|
+
):
|
|
329
331
|
key_names = list(ec2_metadata.public_keys.keys())
|
|
330
332
|
if len(key_names) > 0 and isinstance(key_names[0], str):
|
|
331
333
|
# We have a key name from the EC2 metadata. This should always
|
|
@@ -334,8 +336,10 @@ class AWSProvisioner(AbstractProvisioner):
|
|
|
334
336
|
# merged. Override anything from the tags.
|
|
335
337
|
self._keyName = key_names[0]
|
|
336
338
|
|
|
337
|
-
if not hasattr(self,
|
|
338
|
-
raise RuntimeError(
|
|
339
|
+
if not hasattr(self, "_keyName"):
|
|
340
|
+
raise RuntimeError(
|
|
341
|
+
"Unable to determine the SSH key name the cluster is using"
|
|
342
|
+
)
|
|
339
343
|
|
|
340
344
|
self._leaderPrivateIP = ec2_metadata.private_ipv4 # this is PRIVATE IP
|
|
341
345
|
self._tags = {
|
|
@@ -1021,9 +1025,7 @@ class AWSProvisioner(AbstractProvisioner):
|
|
|
1021
1025
|
)
|
|
1022
1026
|
# Boto 3 demands we base64 the user data ourselves *only* for spot
|
|
1023
1027
|
# instances, and still wants a str.
|
|
1024
|
-
spot_user_data = base64.b64encode(
|
|
1025
|
-
userData.encode("utf-8")
|
|
1026
|
-
).decode("utf-8")
|
|
1028
|
+
spot_user_data = base64.b64encode(userData.encode("utf-8")).decode("utf-8")
|
|
1027
1029
|
spot_kwargs: dict[Literal["LaunchSpecification"], dict[str, Any]] = {
|
|
1028
1030
|
"LaunchSpecification": {
|
|
1029
1031
|
"KeyName": self._keyName,
|
|
@@ -18,7 +18,8 @@ import math
|
|
|
18
18
|
import os
|
|
19
19
|
import time
|
|
20
20
|
from collections import defaultdict
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import Callable
|
|
22
|
+
from typing import TYPE_CHECKING, Any, Union
|
|
22
23
|
|
|
23
24
|
from toil.batchSystems.abstractBatchSystem import (
|
|
24
25
|
AbstractBatchSystem,
|
|
@@ -45,7 +46,7 @@ logger = logging.getLogger(__name__)
|
|
|
45
46
|
EVICTION_THRESHOLD = human2bytes("100MiB")
|
|
46
47
|
RESERVE_SMALL_LIMIT = human2bytes("1GiB")
|
|
47
48
|
RESERVE_SMALL_AMOUNT = human2bytes("255MiB")
|
|
48
|
-
RESERVE_BREAKPOINTS: list[
|
|
49
|
+
RESERVE_BREAKPOINTS: list[int | float] = [
|
|
49
50
|
human2bytes("4GiB"),
|
|
50
51
|
human2bytes("8GiB"),
|
|
51
52
|
human2bytes("16GiB"),
|
|
@@ -120,7 +121,7 @@ class BinPackedFit:
|
|
|
120
121
|
|
|
121
122
|
def addJobShape(
|
|
122
123
|
self, jobShape: Shape
|
|
123
|
-
) ->
|
|
124
|
+
) -> tuple[Shape, list[FailedConstraint]] | None:
|
|
124
125
|
"""
|
|
125
126
|
Add the job to the first node reservation in which it will fit. (This
|
|
126
127
|
is the bin-packing aspect).
|
|
@@ -141,7 +142,7 @@ class BinPackedFit:
|
|
|
141
142
|
jobShape,
|
|
142
143
|
)
|
|
143
144
|
# Go back and debug why this happened.
|
|
144
|
-
fewest_constraints:
|
|
145
|
+
fewest_constraints: list[FailedConstraint] | None = None
|
|
145
146
|
for shape in self.nodeShapes:
|
|
146
147
|
failures = NodeReservation(nodeShape).get_failed_constraints(jobShape)
|
|
147
148
|
if fewest_constraints is None or len(failures) < len(
|
|
@@ -199,7 +200,7 @@ class NodeReservation:
|
|
|
199
200
|
# The wall-time of this slice and resources available in this timeslice
|
|
200
201
|
self.shape = shape
|
|
201
202
|
# The next portion of the reservation (None if this is the end)
|
|
202
|
-
self.nReservation:
|
|
203
|
+
self.nReservation: NodeReservation | None = None
|
|
203
204
|
|
|
204
205
|
def __str__(self) -> str:
|
|
205
206
|
return (
|
|
@@ -290,7 +291,7 @@ class NodeReservation:
|
|
|
290
291
|
def shapes(self) -> list[Shape]:
|
|
291
292
|
"""Get all time-slice shapes, in order, from this reservation on."""
|
|
292
293
|
shapes = []
|
|
293
|
-
curRes:
|
|
294
|
+
curRes: NodeReservation | None = self
|
|
294
295
|
while curRes is not None:
|
|
295
296
|
shapes.append(curRes.shape)
|
|
296
297
|
curRes = curRes.nReservation
|
|
@@ -316,7 +317,7 @@ class NodeReservation:
|
|
|
316
317
|
is a reservation for, and targetTime is the maximum time to wait before starting this job.
|
|
317
318
|
"""
|
|
318
319
|
# starting slice of time that we can fit in so far
|
|
319
|
-
startingReservation:
|
|
320
|
+
startingReservation: NodeReservation | None = self
|
|
320
321
|
# current end of the slices we can fit in so far
|
|
321
322
|
endingReservation = self
|
|
322
323
|
# the amount of runtime of the job currently covered by slices
|
|
@@ -644,7 +645,7 @@ class ClusterScaler:
|
|
|
644
645
|
# How many bytes are reserved so far?
|
|
645
646
|
reserved = 0.0
|
|
646
647
|
# How many bytes of memory have we accounted for so far?
|
|
647
|
-
accounted:
|
|
648
|
+
accounted: float | int = 0
|
|
648
649
|
for breakpoint, fraction in zip(RESERVE_BREAKPOINTS, RESERVE_FRACTIONS):
|
|
649
650
|
# Below each breakpoint, reserve the matching portion of the memory
|
|
650
651
|
# since the previous breakpoint, like a progressive income tax.
|
|
@@ -1157,7 +1158,7 @@ class ClusterScaler:
|
|
|
1157
1158
|
)
|
|
1158
1159
|
return filtered_nodes
|
|
1159
1160
|
|
|
1160
|
-
def getNodes(self, preemptible:
|
|
1161
|
+
def getNodes(self, preemptible: bool | None = None) -> dict["Node", NodeInfo]:
|
|
1161
1162
|
"""
|
|
1162
1163
|
Returns a dictionary mapping node identifiers of preemptible or non-preemptible nodes to
|
|
1163
1164
|
NodeInfo objects, one for each node.
|
|
@@ -1253,9 +1254,9 @@ class JobTooBigError(Exception):
|
|
|
1253
1254
|
|
|
1254
1255
|
def __init__(
|
|
1255
1256
|
self,
|
|
1256
|
-
job:
|
|
1257
|
-
shape:
|
|
1258
|
-
constraints:
|
|
1257
|
+
job: JobDescription | None = None,
|
|
1258
|
+
shape: Shape | None = None,
|
|
1259
|
+
constraints: list[FailedConstraint] | None = None,
|
|
1259
1260
|
):
|
|
1260
1261
|
"""
|
|
1261
1262
|
Make a JobTooBigError.
|
|
@@ -1398,8 +1399,8 @@ class ScalerThread(ExceptionalThread):
|
|
|
1398
1399
|
|
|
1399
1400
|
if len(could_not_fit) != 0:
|
|
1400
1401
|
# If we have any jobs left over that we couldn't fit, complain.
|
|
1401
|
-
bad_job:
|
|
1402
|
-
bad_shape:
|
|
1402
|
+
bad_job: JobDescription | None = None
|
|
1403
|
+
bad_shape: Shape | None = None
|
|
1403
1404
|
for job, shape in zip(queuedJobs, queuedJobShapes):
|
|
1404
1405
|
# Try and find an example job with an offending shape
|
|
1405
1406
|
if shape in could_not_fit:
|
|
@@ -1430,7 +1431,7 @@ class ScalerThread(ExceptionalThread):
|
|
|
1430
1431
|
|
|
1431
1432
|
class ClusterStats:
|
|
1432
1433
|
def __init__(
|
|
1433
|
-
self, path: str, batchSystem: AbstractBatchSystem, clusterName:
|
|
1434
|
+
self, path: str, batchSystem: AbstractBatchSystem, clusterName: str | None
|
|
1434
1435
|
) -> None:
|
|
1435
1436
|
logger.debug("Initializing cluster statistics")
|
|
1436
1437
|
self.stats: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
|
@@ -1439,7 +1440,7 @@ class ClusterStats:
|
|
|
1439
1440
|
self.stop = False
|
|
1440
1441
|
self.clusterName = clusterName
|
|
1441
1442
|
self.batchSystem = batchSystem
|
|
1442
|
-
self.
|
|
1443
|
+
self.scalable = (
|
|
1443
1444
|
isinstance(self.batchSystem, AbstractScalableBatchSystem)
|
|
1444
1445
|
if batchSystem
|
|
1445
1446
|
else False
|
|
@@ -1460,7 +1461,7 @@ class ClusterStats:
|
|
|
1460
1461
|
return fullName
|
|
1461
1462
|
counter += 1
|
|
1462
1463
|
|
|
1463
|
-
if self.statsPath and self.
|
|
1464
|
+
if self.statsPath and self.scalable:
|
|
1464
1465
|
self.stop = True
|
|
1465
1466
|
for thread in self.statsThreads:
|
|
1466
1467
|
thread.join()
|
|
@@ -1492,7 +1493,7 @@ class ClusterStats:
|
|
|
1492
1493
|
time=time.time(), # add time stamp
|
|
1493
1494
|
)
|
|
1494
1495
|
|
|
1495
|
-
if self.
|
|
1496
|
+
if self.scalable:
|
|
1496
1497
|
logger.debug("Starting to gather statistics")
|
|
1497
1498
|
stats: dict[str, list[dict[str, Any]]] = {}
|
|
1498
1499
|
if not isinstance(self.batchSystem, AbstractScalableBatchSystem):
|
|
@@ -17,7 +17,6 @@ import os
|
|
|
17
17
|
import threading
|
|
18
18
|
import time
|
|
19
19
|
import uuid
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from libcloud.compute.drivers.gce import GCEFailedNode
|
|
23
22
|
from libcloud.compute.providers import get_driver
|
|
@@ -393,7 +392,7 @@ class GCEProvisioner(AbstractProvisioner):
|
|
|
393
392
|
return workersCreated
|
|
394
393
|
|
|
395
394
|
def getProvisionedWorkers(
|
|
396
|
-
self, instance_type:
|
|
395
|
+
self, instance_type: str | None = None, preemptible: bool | None = None
|
|
397
396
|
):
|
|
398
397
|
assert self._leaderPrivateIP
|
|
399
398
|
entireCluster = self._getNodesInCluster(instance_type=instance_type)
|
|
@@ -460,7 +459,7 @@ class GCEProvisioner(AbstractProvisioner):
|
|
|
460
459
|
if botoExists:
|
|
461
460
|
node.injectFile(self._botoPath, self.NODE_BOTO_PATH, "toil_worker")
|
|
462
461
|
|
|
463
|
-
def _getNodesInCluster(self, instance_type:
|
|
462
|
+
def _getNodesInCluster(self, instance_type: str | None = None):
|
|
464
463
|
instanceGroup = self._gceDriver.ex_get_instancegroup(
|
|
465
464
|
self.clusterName, zone=self._zone
|
|
466
465
|
)
|