toil 9.1.2__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +14 -14
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/utils.py
CHANGED
|
@@ -11,33 +11,22 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import base64
|
|
15
|
-
import bz2
|
|
16
14
|
import logging
|
|
17
15
|
import os
|
|
18
|
-
import types
|
|
19
16
|
from ssl import SSLError
|
|
20
|
-
from typing import
|
|
17
|
+
from typing import IO, TYPE_CHECKING, Any
|
|
21
18
|
|
|
22
19
|
from boto3.s3.transfer import TransferConfig
|
|
23
20
|
from botocore.client import Config
|
|
24
21
|
from botocore.exceptions import ClientError
|
|
25
22
|
|
|
26
23
|
from toil.lib.aws import AWSServerErrors, session
|
|
27
|
-
from toil.lib.aws.utils import
|
|
24
|
+
from toil.lib.aws.utils import get_bucket_region
|
|
28
25
|
from toil.lib.compatibility import compat_bytes
|
|
29
|
-
from toil.lib.retry import
|
|
30
|
-
DEFAULT_DELAYS,
|
|
31
|
-
DEFAULT_TIMEOUT,
|
|
32
|
-
get_error_code,
|
|
33
|
-
get_error_message,
|
|
34
|
-
get_error_status,
|
|
35
|
-
old_retry,
|
|
36
|
-
retry,
|
|
37
|
-
)
|
|
26
|
+
from toil.lib.retry import get_error_code, get_error_message, retry
|
|
38
27
|
|
|
39
28
|
if TYPE_CHECKING:
|
|
40
|
-
from mypy_boto3_s3 import
|
|
29
|
+
from mypy_boto3_s3 import S3ServiceResource
|
|
41
30
|
from mypy_boto3_s3.type_defs import CopySourceTypeDef
|
|
42
31
|
|
|
43
32
|
logger = logging.getLogger(__name__)
|
|
@@ -66,9 +55,9 @@ def uploadFromPath(
|
|
|
66
55
|
resource: "S3ServiceResource",
|
|
67
56
|
bucketName: str,
|
|
68
57
|
fileID: str,
|
|
69
|
-
headerArgs:
|
|
58
|
+
headerArgs: dict[str, Any] | None = None,
|
|
70
59
|
partSize: int = 50 << 20,
|
|
71
|
-
) ->
|
|
60
|
+
) -> str | None:
|
|
72
61
|
"""
|
|
73
62
|
Uploads a file to s3, using multipart uploading if applicable
|
|
74
63
|
|
|
@@ -112,9 +101,9 @@ def uploadFile(
|
|
|
112
101
|
resource: "S3ServiceResource",
|
|
113
102
|
bucketName: str,
|
|
114
103
|
fileID: str,
|
|
115
|
-
headerArgs:
|
|
104
|
+
headerArgs: dict[str, Any] | None = None,
|
|
116
105
|
partSize: int = 50 << 20,
|
|
117
|
-
) ->
|
|
106
|
+
) -> str | None:
|
|
118
107
|
"""
|
|
119
108
|
Upload a readable object to s3, using multipart uploading if applicable.
|
|
120
109
|
:param readable: a readable stream or a file path to upload to s3
|
|
@@ -172,11 +161,11 @@ def copyKeyMultipart(
|
|
|
172
161
|
srcKeyVersion: str,
|
|
173
162
|
dstBucketName: str,
|
|
174
163
|
dstKeyName: str,
|
|
175
|
-
sseAlgorithm:
|
|
176
|
-
sseKey:
|
|
177
|
-
copySourceSseAlgorithm:
|
|
178
|
-
copySourceSseKey:
|
|
179
|
-
) ->
|
|
164
|
+
sseAlgorithm: str | None = None,
|
|
165
|
+
sseKey: str | None = None,
|
|
166
|
+
copySourceSseAlgorithm: str | None = None,
|
|
167
|
+
copySourceSseKey: str | None = None,
|
|
168
|
+
) -> str | None:
|
|
180
169
|
"""
|
|
181
170
|
Copies a key from a source key to a destination key in multiple parts. Note that if the
|
|
182
171
|
destination key exists it will be overwritten implicitly, and if it does not exist a new
|
toil/jobStores/fileJobStore.py
CHANGED
|
@@ -23,7 +23,7 @@ import time
|
|
|
23
23
|
import uuid
|
|
24
24
|
from collections.abc import Iterable, Iterator
|
|
25
25
|
from contextlib import contextmanager
|
|
26
|
-
from typing import IO, Literal,
|
|
26
|
+
from typing import IO, Literal, overload
|
|
27
27
|
from urllib.parse import ParseResult, quote, unquote
|
|
28
28
|
|
|
29
29
|
from toil.fileStores import FileID
|
|
@@ -327,7 +327,7 @@ class FileJobStore(AbstractJobStore, URLAccess):
|
|
|
327
327
|
# linking is not done be default because of issue #1755
|
|
328
328
|
# TODO: is hardlinking ever actually done?
|
|
329
329
|
src_path = self._extract_path_from_url(src_path)
|
|
330
|
-
if self.linkImports and not hardlink and symlink:
|
|
330
|
+
if self.linkImports and not hardlink and symlink and src_path != "/dev/null":
|
|
331
331
|
os.symlink(os.path.realpath(src_path), dst_path)
|
|
332
332
|
else:
|
|
333
333
|
atomic_copy(src_path, dst_path)
|
|
@@ -703,33 +703,33 @@ class FileJobStore(AbstractJobStore, URLAccess):
|
|
|
703
703
|
@overload
|
|
704
704
|
def read_file_stream(
|
|
705
705
|
self,
|
|
706
|
-
file_id:
|
|
706
|
+
file_id: str | FileID,
|
|
707
707
|
encoding: Literal[None] = None,
|
|
708
|
-
errors:
|
|
708
|
+
errors: str | None = None,
|
|
709
709
|
) -> Iterator[IO[bytes]]: ...
|
|
710
710
|
|
|
711
711
|
@contextmanager
|
|
712
712
|
@overload
|
|
713
713
|
def read_file_stream(
|
|
714
|
-
self, file_id:
|
|
714
|
+
self, file_id: str | FileID, encoding: str, errors: str | None = None
|
|
715
715
|
) -> Iterator[IO[str]]: ...
|
|
716
716
|
|
|
717
717
|
@contextmanager
|
|
718
718
|
@overload
|
|
719
719
|
def read_file_stream(
|
|
720
720
|
self,
|
|
721
|
-
file_id:
|
|
722
|
-
encoding:
|
|
723
|
-
errors:
|
|
724
|
-
) ->
|
|
721
|
+
file_id: str | FileID,
|
|
722
|
+
encoding: str | None = None,
|
|
723
|
+
errors: str | None = None,
|
|
724
|
+
) -> Iterator[IO[bytes]] | Iterator[IO[str]]: ...
|
|
725
725
|
|
|
726
726
|
@contextmanager
|
|
727
727
|
def read_file_stream(
|
|
728
728
|
self,
|
|
729
|
-
file_id:
|
|
730
|
-
encoding:
|
|
731
|
-
errors:
|
|
732
|
-
) ->
|
|
729
|
+
file_id: str | FileID,
|
|
730
|
+
encoding: str | None = None,
|
|
731
|
+
errors: str | None = None,
|
|
732
|
+
) -> Iterator[IO[bytes]] | Iterator[IO[str]]:
|
|
733
733
|
self._check_job_store_file_id(file_id)
|
|
734
734
|
if encoding is None:
|
|
735
735
|
with open(
|
|
@@ -779,7 +779,7 @@ class FileJobStore(AbstractJobStore, URLAccess):
|
|
|
779
779
|
self,
|
|
780
780
|
shared_file_name: str,
|
|
781
781
|
encoding: str,
|
|
782
|
-
errors:
|
|
782
|
+
errors: str | None = None,
|
|
783
783
|
) -> Iterator[IO[str]]: ...
|
|
784
784
|
|
|
785
785
|
@overload
|
|
@@ -788,16 +788,16 @@ class FileJobStore(AbstractJobStore, URLAccess):
|
|
|
788
788
|
self,
|
|
789
789
|
shared_file_name: str,
|
|
790
790
|
encoding: Literal[None] = None,
|
|
791
|
-
errors:
|
|
791
|
+
errors: str | None = None,
|
|
792
792
|
) -> Iterator[IO[bytes]]: ...
|
|
793
793
|
|
|
794
794
|
@contextmanager
|
|
795
795
|
def read_shared_file_stream(
|
|
796
796
|
self,
|
|
797
797
|
shared_file_name: str,
|
|
798
|
-
encoding:
|
|
799
|
-
errors:
|
|
800
|
-
) ->
|
|
798
|
+
encoding: str | None = None,
|
|
799
|
+
errors: str | None = None,
|
|
800
|
+
) -> Iterator[IO[bytes]] | Iterator[IO[str]]:
|
|
801
801
|
self._requireValidSharedFileName(shared_file_name)
|
|
802
802
|
try:
|
|
803
803
|
with open(
|
|
@@ -814,7 +814,7 @@ class FileJobStore(AbstractJobStore, URLAccess):
|
|
|
814
814
|
else:
|
|
815
815
|
raise
|
|
816
816
|
|
|
817
|
-
def list_all_file_names(self, for_job:
|
|
817
|
+
def list_all_file_names(self, for_job: str | None = None) -> Iterable[str]:
|
|
818
818
|
"""
|
|
819
819
|
Get all the file names (not file IDs) of files stored in the job store.
|
|
820
820
|
|
|
@@ -871,12 +871,18 @@ class FileJobStore(AbstractJobStore, URLAccess):
|
|
|
871
871
|
|
|
872
872
|
def write_logs(self, msg):
|
|
873
873
|
# Temporary files are placed in the stats directory tree
|
|
874
|
-
tempStatsFileName =
|
|
875
|
-
|
|
874
|
+
tempStatsFileName = (
|
|
875
|
+
self.LOG_PREFIX + str(uuid.uuid4().hex) + self.LOG_TEMP_SUFFIX
|
|
876
|
+
)
|
|
877
|
+
tempStatsFile = os.path.join(
|
|
878
|
+
self._get_arbitrary_stats_inbox_dir(), tempStatsFileName
|
|
879
|
+
)
|
|
876
880
|
writeFormat = "w" if isinstance(msg, str) else "wb"
|
|
877
881
|
with open(tempStatsFile, writeFormat) as f:
|
|
878
882
|
f.write(msg)
|
|
879
|
-
os.rename(
|
|
883
|
+
os.rename(
|
|
884
|
+
tempStatsFile, tempStatsFile[: -len(self.LOG_TEMP_SUFFIX)]
|
|
885
|
+
) # This operation is atomic
|
|
880
886
|
|
|
881
887
|
def read_logs(self, callback, read_all=False):
|
|
882
888
|
files_processed = 0
|
toil/jobStores/googleJobStore.py
CHANGED
|
@@ -17,10 +17,11 @@ import pickle
|
|
|
17
17
|
import stat
|
|
18
18
|
import time
|
|
19
19
|
import uuid
|
|
20
|
+
from collections.abc import Iterator
|
|
20
21
|
from contextlib import contextmanager
|
|
21
22
|
from functools import wraps
|
|
22
23
|
from io import BytesIO
|
|
23
|
-
from typing import
|
|
24
|
+
from typing import IO, Any
|
|
24
25
|
from urllib.parse import ParseResult, urlunparse
|
|
25
26
|
|
|
26
27
|
from google.api_core.exceptions import (
|
|
@@ -39,10 +40,10 @@ from toil.jobStores.abstractJobStore import (
|
|
|
39
40
|
NoSuchJobException,
|
|
40
41
|
NoSuchJobStoreException,
|
|
41
42
|
)
|
|
42
|
-
from toil.lib.pipes import ReadablePipe, WritablePipe
|
|
43
43
|
from toil.lib.compatibility import compat_bytes
|
|
44
44
|
from toil.lib.io import AtomicFileCreate
|
|
45
45
|
from toil.lib.misc import truncExpBackoff
|
|
46
|
+
from toil.lib.pipes import ReadablePipe, WritablePipe
|
|
46
47
|
from toil.lib.retry import old_retry
|
|
47
48
|
from toil.lib.url import URLAccess
|
|
48
49
|
|
|
@@ -92,6 +93,7 @@ def google_retry(f):
|
|
|
92
93
|
|
|
93
94
|
return wrapper
|
|
94
95
|
|
|
96
|
+
|
|
95
97
|
@contextmanager
|
|
96
98
|
def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
97
99
|
"""
|
|
@@ -102,7 +104,7 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
|
102
104
|
behind the scenes. Then it will complain::
|
|
103
105
|
|
|
104
106
|
<class 'google.auth.exceptions.InvalidOperation'>: Anonymous credentials cannot be refreshed.
|
|
105
|
-
|
|
107
|
+
|
|
106
108
|
We need to detect this and report that the real problem is that the user
|
|
107
109
|
has not set up any credentials. When you try to make the client
|
|
108
110
|
non-anonymously and don't have credentials set up, you get a nice error
|
|
@@ -138,7 +140,6 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
|
|
|
138
140
|
raise
|
|
139
141
|
|
|
140
142
|
|
|
141
|
-
|
|
142
143
|
class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
143
144
|
|
|
144
145
|
nodeServiceAccountJson = "/root/service_account.json"
|
|
@@ -182,6 +183,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
182
183
|
"""
|
|
183
184
|
|
|
184
185
|
notes: list[str] = []
|
|
186
|
+
|
|
185
187
|
def add_note(message: str, *args: Any, warn: bool = False) -> None:
|
|
186
188
|
"""
|
|
187
189
|
Add and possibly warn with a note about the client permissions.
|
|
@@ -190,6 +192,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
190
192
|
if warn:
|
|
191
193
|
log.warning(note)
|
|
192
194
|
notes.append(note)
|
|
195
|
+
|
|
193
196
|
def compile_notes() -> str:
|
|
194
197
|
"""
|
|
195
198
|
Make one string explainign why we might not have expected permissions.
|
|
@@ -202,9 +205,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
202
205
|
# Determine if we have an override environment variable for our credentials.
|
|
203
206
|
# We get the path to check existence, but Google Storage works out what
|
|
204
207
|
# to use later by looking at the environment again.
|
|
205
|
-
credentials_path:
|
|
206
|
-
"GOOGLE_APPLICATION_CREDENTIALS", None
|
|
207
|
-
)
|
|
208
|
+
credentials_path: str | None = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", None)
|
|
208
209
|
if credentials_path is not None and not os.path.exists(credentials_path):
|
|
209
210
|
# If the file is missing, complain.
|
|
210
211
|
# This variable holds a file name and not any sensitive data itself.
|
|
@@ -212,22 +213,25 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
212
213
|
"File '%s' from GOOGLE_APPLICATION_CREDENTIALS is unavailable! "
|
|
213
214
|
"We may not be able to authenticate!",
|
|
214
215
|
credentials_path,
|
|
215
|
-
warn=True
|
|
216
|
+
warn=True,
|
|
216
217
|
)
|
|
217
218
|
|
|
218
219
|
if credentials_path is None and os.path.exists(cls.nodeServiceAccountJson):
|
|
219
220
|
try:
|
|
220
221
|
# load credentials from a particular file on GCE nodes if an
|
|
221
222
|
# override path is not set
|
|
222
|
-
return
|
|
223
|
-
|
|
224
|
-
|
|
223
|
+
return (
|
|
224
|
+
storage.Client.from_service_account_json(
|
|
225
|
+
cls.nodeServiceAccountJson
|
|
226
|
+
),
|
|
227
|
+
compile_notes(),
|
|
228
|
+
)
|
|
225
229
|
except OSError:
|
|
226
230
|
# Probably we don't have permission to use the file.
|
|
227
231
|
add_note(
|
|
228
232
|
"File '%s' exists but didn't work to authenticate!",
|
|
229
233
|
cls.nodeServiceAccountJson,
|
|
230
|
-
warn=True
|
|
234
|
+
warn=True,
|
|
231
235
|
)
|
|
232
236
|
|
|
233
237
|
# Either a filename is specified, or our fallback file isn't there.
|
|
@@ -366,9 +370,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
366
370
|
|
|
367
371
|
env = {}
|
|
368
372
|
|
|
369
|
-
credentials_path:
|
|
370
|
-
"GOOGLE_APPLICATION_CREDENTIALS", None
|
|
371
|
-
)
|
|
373
|
+
credentials_path: str | None = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", None)
|
|
372
374
|
if credentials_path is not None:
|
|
373
375
|
# Send along the environment variable that points to the credentials file.
|
|
374
376
|
# It must be available in the same place on all nodes.
|
|
@@ -486,7 +488,9 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
486
488
|
|
|
487
489
|
@classmethod
|
|
488
490
|
@google_retry
|
|
489
|
-
def _get_blob_from_url(
|
|
491
|
+
def _get_blob_from_url(
|
|
492
|
+
cls, client: storage.Client, url: ParseResult, exists: bool = False
|
|
493
|
+
) -> storage.blob.Blob:
|
|
490
494
|
"""
|
|
491
495
|
Gets the blob specified by the url.
|
|
492
496
|
|
|
@@ -521,7 +525,7 @@ class GoogleJobStore(AbstractJobStore, URLAccess):
|
|
|
521
525
|
@classmethod
|
|
522
526
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
523
527
|
client, auth_notes = cls.create_client()
|
|
524
|
-
with permission_error_reporter(url, auth_notes):
|
|
528
|
+
with permission_error_reporter(url, auth_notes):
|
|
525
529
|
try:
|
|
526
530
|
cls._get_blob_from_url(client, url, exists=True)
|
|
527
531
|
return True
|
toil/jobStores/utils.py
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
1
|
-
import errno
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
4
3
|
import tempfile
|
|
5
4
|
import uuid
|
|
6
|
-
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Optional
|
|
8
|
-
|
|
9
|
-
from toil.lib.threading import ExceptionalThread
|
|
10
5
|
|
|
11
6
|
log = logging.getLogger(__name__)
|
|
12
7
|
|
|
8
|
+
|
|
13
9
|
class JobStoreUnavailableException(RuntimeError):
|
|
14
10
|
"""
|
|
15
11
|
Raised when a particular type of job store is requested but can't be used.
|
|
@@ -18,8 +14,8 @@ class JobStoreUnavailableException(RuntimeError):
|
|
|
18
14
|
|
|
19
15
|
def generate_locator(
|
|
20
16
|
job_store_type: str,
|
|
21
|
-
local_suggestion:
|
|
22
|
-
decoration:
|
|
17
|
+
local_suggestion: str | None = None,
|
|
18
|
+
decoration: str | None = None,
|
|
23
19
|
) -> str:
|
|
24
20
|
"""
|
|
25
21
|
Generate a random locator for a job store of the given type. Raises an
|
toil/leader.py
CHANGED
|
@@ -21,7 +21,7 @@ import os
|
|
|
21
21
|
import pickle
|
|
22
22
|
import sys
|
|
23
23
|
import time
|
|
24
|
-
from typing import Any
|
|
24
|
+
from typing import Any
|
|
25
25
|
|
|
26
26
|
import enlighten
|
|
27
27
|
|
|
@@ -89,10 +89,10 @@ class Leader:
|
|
|
89
89
|
self,
|
|
90
90
|
config: Config,
|
|
91
91
|
batchSystem: AbstractBatchSystem,
|
|
92
|
-
provisioner:
|
|
92
|
+
provisioner: AbstractProvisioner | None,
|
|
93
93
|
jobStore: AbstractJobStore,
|
|
94
94
|
rootJob: JobDescription,
|
|
95
|
-
jobCache:
|
|
95
|
+
jobCache: dict[str | TemporaryID, JobDescription] | None = None,
|
|
96
96
|
) -> None:
|
|
97
97
|
"""
|
|
98
98
|
Create a Toil Leader object.
|
|
@@ -201,7 +201,7 @@ class Leader:
|
|
|
201
201
|
|
|
202
202
|
# A dashboard that runs on the leader node in AWS clusters to track the state
|
|
203
203
|
# of the cluster
|
|
204
|
-
self.toilMetrics:
|
|
204
|
+
self.toilMetrics: ToilMetrics | None = None
|
|
205
205
|
|
|
206
206
|
# internal jobs we should not expose at top level debugging
|
|
207
207
|
self.debugJobNames = (
|
|
@@ -847,15 +847,15 @@ class Leader:
|
|
|
847
847
|
)
|
|
848
848
|
message = [
|
|
849
849
|
f"Job failed with exit value {status_string}: {updatedJob}",
|
|
850
|
-
f"Exit reason: {BatchJobExitReason.to_string(update.exitReason)}"
|
|
850
|
+
f"Exit reason: {BatchJobExitReason.to_string(update.exitReason)}",
|
|
851
851
|
]
|
|
852
852
|
if update.backing_id is not None:
|
|
853
853
|
# Report the job in the backing scheduler in case the user
|
|
854
854
|
# needs to follow it down a level.
|
|
855
|
-
message.append(
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
)
|
|
855
|
+
message.append(
|
|
856
|
+
f"Failed job in backing scheduler: {update.backing_id}"
|
|
857
|
+
)
|
|
858
|
+
logger.warning("\n".join(message))
|
|
859
859
|
# This logic is undefined for which of the failing jobs will send its exit code
|
|
860
860
|
# when there are multiple failing jobs with different exit statuses
|
|
861
861
|
self.recommended_fail_exit_code = update.exitStatus
|
|
@@ -1183,7 +1183,7 @@ class Leader:
|
|
|
1183
1183
|
)
|
|
1184
1184
|
self.preemptibleServiceJobsIssued += 1
|
|
1185
1185
|
|
|
1186
|
-
def getNumberOfJobsIssued(self, preemptible:
|
|
1186
|
+
def getNumberOfJobsIssued(self, preemptible: bool | None = None) -> int:
|
|
1187
1187
|
"""
|
|
1188
1188
|
Get number of jobs that have been added by issueJob(s) and not removed by removeJob.
|
|
1189
1189
|
|
|
@@ -1262,7 +1262,7 @@ class Leader:
|
|
|
1262
1262
|
|
|
1263
1263
|
return issuedDesc
|
|
1264
1264
|
|
|
1265
|
-
def getJobs(self, preemptible:
|
|
1265
|
+
def getJobs(self, preemptible: bool | None = None) -> list[JobDescription]:
|
|
1266
1266
|
"""
|
|
1267
1267
|
Get all issued jobs.
|
|
1268
1268
|
|
|
@@ -1422,9 +1422,9 @@ class Leader:
|
|
|
1422
1422
|
self,
|
|
1423
1423
|
finished_job: JobDescription,
|
|
1424
1424
|
result_status: int,
|
|
1425
|
-
wall_time:
|
|
1426
|
-
exit_reason:
|
|
1427
|
-
batch_system_id:
|
|
1425
|
+
wall_time: float | None = None,
|
|
1426
|
+
exit_reason: BatchJobExitReason | None = None,
|
|
1427
|
+
batch_system_id: int | None = None,
|
|
1428
1428
|
) -> bool:
|
|
1429
1429
|
"""
|
|
1430
1430
|
Process a finished JobDescription based upon its success or failure.
|
toil/lib/accelerators.py
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
import os
|
|
18
18
|
import string
|
|
19
19
|
import subprocess
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import cast
|
|
21
21
|
from xml.dom import minidom
|
|
22
22
|
|
|
23
23
|
from toil.job import AcceleratorRequirement
|
|
@@ -34,7 +34,9 @@ def have_working_nvidia_smi() -> bool:
|
|
|
34
34
|
it can fulfill a CUDARequirement.
|
|
35
35
|
"""
|
|
36
36
|
try:
|
|
37
|
-
subprocess.check_call(
|
|
37
|
+
subprocess.check_call(
|
|
38
|
+
["nvidia-smi"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
|
39
|
+
)
|
|
38
40
|
except (
|
|
39
41
|
FileNotFoundError,
|
|
40
42
|
PermissionError,
|
|
@@ -105,7 +107,7 @@ def have_working_nvidia_docker_runtime() -> bool:
|
|
|
105
107
|
"nvidia-smi",
|
|
106
108
|
],
|
|
107
109
|
stdout=subprocess.DEVNULL,
|
|
108
|
-
stderr=subprocess.DEVNULL
|
|
110
|
+
stderr=subprocess.DEVNULL,
|
|
109
111
|
)
|
|
110
112
|
except (
|
|
111
113
|
FileNotFoundError,
|
|
@@ -217,7 +219,7 @@ def get_individual_local_accelerators() -> list[AcceleratorRequirement]:
|
|
|
217
219
|
|
|
218
220
|
|
|
219
221
|
def get_restrictive_environment_for_local_accelerators(
|
|
220
|
-
accelerator_numbers:
|
|
222
|
+
accelerator_numbers: set[int] | list[int],
|
|
221
223
|
) -> dict[str, str]:
|
|
222
224
|
"""
|
|
223
225
|
Get environment variables which can be applied to a process to restrict it
|
toil/lib/aws/__init__.py
CHANGED
|
@@ -15,10 +15,9 @@ import json
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import re
|
|
18
|
-
import socket
|
|
19
18
|
from collections.abc import MutableMapping
|
|
20
19
|
from http.client import HTTPException
|
|
21
|
-
from typing import TYPE_CHECKING, Literal,
|
|
20
|
+
from typing import TYPE_CHECKING, Literal, Union
|
|
22
21
|
from urllib.error import URLError
|
|
23
22
|
from urllib.request import urlopen
|
|
24
23
|
|
|
@@ -43,7 +42,7 @@ logger = logging.getLogger(__name__)
|
|
|
43
42
|
# which may not be installed, because it has to be importable everywhere.
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def get_current_aws_region() ->
|
|
45
|
+
def get_current_aws_region() -> str | None:
|
|
47
46
|
"""
|
|
48
47
|
Return the AWS region that the currently configured AWS zone (see
|
|
49
48
|
get_current_aws_zone()) is in.
|
|
@@ -53,14 +52,14 @@ def get_current_aws_region() -> Optional[str]:
|
|
|
53
52
|
return zone_to_region(aws_zone) if aws_zone else None
|
|
54
53
|
|
|
55
54
|
|
|
56
|
-
def get_aws_zone_from_environment() ->
|
|
55
|
+
def get_aws_zone_from_environment() -> str | None:
|
|
57
56
|
"""
|
|
58
57
|
Get the AWS zone from TOIL_AWS_ZONE if set.
|
|
59
58
|
"""
|
|
60
59
|
return os.environ.get("TOIL_AWS_ZONE", None)
|
|
61
60
|
|
|
62
61
|
|
|
63
|
-
def get_aws_zone_from_metadata() ->
|
|
62
|
+
def get_aws_zone_from_metadata() -> str | None:
|
|
64
63
|
"""
|
|
65
64
|
Get the AWS zone from instance metadata, if on EC2 and the boto module is
|
|
66
65
|
available. Otherwise, gets the AWS zone from ECS task metadata, if on ECS.
|
|
@@ -103,7 +102,7 @@ def get_aws_zone_from_metadata() -> Optional[str]:
|
|
|
103
102
|
return None
|
|
104
103
|
|
|
105
104
|
|
|
106
|
-
def get_aws_zone_from_boto() ->
|
|
105
|
+
def get_aws_zone_from_boto() -> str | None:
|
|
107
106
|
"""
|
|
108
107
|
Get the AWS zone from the Boto3 config file or from AWS_DEFAULT_REGION, if it is configured and the
|
|
109
108
|
boto3 module is available.
|
|
@@ -122,7 +121,7 @@ def get_aws_zone_from_boto() -> Optional[str]:
|
|
|
122
121
|
return None
|
|
123
122
|
|
|
124
123
|
|
|
125
|
-
def get_aws_zone_from_environment_region() ->
|
|
124
|
+
def get_aws_zone_from_environment_region() -> str | None:
|
|
126
125
|
"""
|
|
127
126
|
Pick an AWS zone in the region defined by TOIL_AWS_REGION, if it is set.
|
|
128
127
|
"""
|
|
@@ -134,7 +133,7 @@ def get_aws_zone_from_environment_region() -> Optional[str]:
|
|
|
134
133
|
return None
|
|
135
134
|
|
|
136
135
|
|
|
137
|
-
def get_current_aws_zone() ->
|
|
136
|
+
def get_current_aws_zone() -> str | None:
|
|
138
137
|
"""
|
|
139
138
|
Get the currently configured or occupied AWS zone to use.
|
|
140
139
|
|
|
@@ -191,7 +190,7 @@ def running_on_ec2() -> bool:
|
|
|
191
190
|
timeout=1,
|
|
192
191
|
)
|
|
193
192
|
return True
|
|
194
|
-
except (URLError,
|
|
193
|
+
except (URLError, TimeoutError, HTTPException):
|
|
195
194
|
return False
|
|
196
195
|
|
|
197
196
|
|
|
@@ -204,7 +203,7 @@ def running_on_ecs() -> bool:
|
|
|
204
203
|
|
|
205
204
|
|
|
206
205
|
def build_tag_dict_from_env(
|
|
207
|
-
environment: MutableMapping[str, str] = os.environ
|
|
206
|
+
environment: MutableMapping[str, str] = os.environ,
|
|
208
207
|
) -> dict[str, str]:
|
|
209
208
|
tags = dict()
|
|
210
209
|
owner_tag = environment.get("TOIL_OWNER_TAG")
|
toil/lib/aws/ami.py
CHANGED
|
@@ -14,10 +14,13 @@ from toil.lib.retry import retry
|
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
class ReleaseFeedUnavailableError(RuntimeError):
|
|
18
19
|
"""Raised when a Flatcar releases can't be located."""
|
|
20
|
+
|
|
19
21
|
pass
|
|
20
22
|
|
|
23
|
+
|
|
21
24
|
@retry(errors=[ReleaseFeedUnavailableError])
|
|
22
25
|
def get_flatcar_ami(ec2_client: BaseClient, architecture: str = "amd64") -> str:
|
|
23
26
|
"""
|
|
@@ -92,14 +95,14 @@ def _fetch_flatcar_feed(architecture: str = "amd64", source: str = "stable") ->
|
|
|
92
95
|
"stable": f"https://stable.release.flatcar-linux.net/{architecture}-usr/current/flatcar_production_ami_all.json",
|
|
93
96
|
"beta": f"https://beta.release.flatcar-linux.net/{architecture}-usr/current/flatcar_production_ami_all.json",
|
|
94
97
|
# "alpha": f"https://alpha.release.flatcar-linux.net/{architecture}-usr/current/flatcar_production_ami_all.json",
|
|
95
|
-
"archive": f"https://web.archive.org/web/20220625112618if_/https://stable.release.flatcar-linux.net/{architecture}-usr/current/flatcar_production_ami_all.json"
|
|
98
|
+
"archive": f"https://web.archive.org/web/20220625112618if_/https://stable.release.flatcar-linux.net/{architecture}-usr/current/flatcar_production_ami_all.json",
|
|
96
99
|
}[source]
|
|
97
100
|
return cast(bytes, urllib.request.urlopen(JSON_FEED_URL).read())
|
|
98
101
|
|
|
99
102
|
|
|
100
103
|
def flatcar_release_feed_ami(
|
|
101
104
|
region: str, architecture: str = "amd64", source: str = "stable"
|
|
102
|
-
) ->
|
|
105
|
+
) -> str | None:
|
|
103
106
|
"""
|
|
104
107
|
Yield AMI IDs for the given architecture from the Flatcar release feed.
|
|
105
108
|
|
|
@@ -150,12 +153,14 @@ def flatcar_release_feed_ami(
|
|
|
150
153
|
if ami_record.get("name") == region:
|
|
151
154
|
return str(ami_record.get("hvm")) if ami_record.get("hvm") else None
|
|
152
155
|
# We didn't find our region
|
|
153
|
-
logger.warning(
|
|
156
|
+
logger.warning(
|
|
157
|
+
f"Flatcar {source} release feed does not have an image for region {region}"
|
|
158
|
+
)
|
|
154
159
|
|
|
155
160
|
|
|
156
161
|
def feed_flatcar_ami_release(
|
|
157
162
|
ec2_client: BaseClient, architecture: str = "amd64", source: str = "stable"
|
|
158
|
-
) ->
|
|
163
|
+
) -> str | None:
|
|
159
164
|
"""
|
|
160
165
|
Check a Flatcar release feed for the latest flatcar AMI.
|
|
161
166
|
|
|
@@ -175,27 +180,36 @@ def feed_flatcar_ami_release(
|
|
|
175
180
|
|
|
176
181
|
region = ec2_client._client_config.region_name # type: ignore
|
|
177
182
|
|
|
178
|
-
ami
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
+
if ami := flatcar_release_feed_ami(region, architecture, source):
|
|
184
|
+
# verify it exists on AWS
|
|
185
|
+
try:
|
|
186
|
+
response = ec2_client.describe_images(Filters=[{"Name": "image-id", "Values": [ami]}]) # type: ignore
|
|
187
|
+
if (
|
|
188
|
+
len(response["Images"]) == 1
|
|
189
|
+
and response["Images"][0]["State"] == "available"
|
|
190
|
+
):
|
|
191
|
+
return ami
|
|
192
|
+
else:
|
|
193
|
+
logger.warning(
|
|
194
|
+
f"Flatcar release feed suggests image {ami} which does not exist on AWS in {region}"
|
|
195
|
+
)
|
|
196
|
+
except (ClientError, EndpointConnectionError):
|
|
197
|
+
# Sometimes we get back nonsense like:
|
|
198
|
+
# botocore.exceptions.ClientError: An error occurred (AuthFailure) when calling the DescribeImages operation: AWS was not able to validate the provided access credentials
|
|
199
|
+
# Don't hold that against the AMI.
|
|
200
|
+
logger.exception(
|
|
201
|
+
f"Unable to check if AMI {ami} exists on AWS in {region}; assuming it does"
|
|
202
|
+
)
|
|
183
203
|
return ami
|
|
184
|
-
else:
|
|
185
|
-
logger.warning(f"Flatcar release feed suggests image {ami} which does not exist on AWS in {region}")
|
|
186
|
-
except (ClientError, EndpointConnectionError):
|
|
187
|
-
# Sometimes we get back nonsense like:
|
|
188
|
-
# botocore.exceptions.ClientError: An error occurred (AuthFailure) when calling the DescribeImages operation: AWS was not able to validate the provided access credentials
|
|
189
|
-
# Don't hold that against the AMI.
|
|
190
|
-
logger.exception(f"Unable to check if AMI {ami} exists on AWS in {region}; assuming it does")
|
|
191
|
-
return ami
|
|
192
204
|
# We didn't find it
|
|
193
|
-
logger.warning(
|
|
205
|
+
logger.warning(
|
|
206
|
+
f"Flatcar release feed does not have an image for region {region} that exists on AWS"
|
|
207
|
+
)
|
|
194
208
|
|
|
195
209
|
|
|
196
210
|
def aws_marketplace_flatcar_ami_search(
|
|
197
211
|
ec2_client: BaseClient, architecture: str = "amd64"
|
|
198
|
-
) ->
|
|
212
|
+
) -> str | None:
|
|
199
213
|
"""
|
|
200
214
|
Query AWS for all AMI names matching ``Flatcar-stable-*`` and return the most recent one.
|
|
201
215
|
|