toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/slurm.py +191 -16
- toil/common.py +20 -1
- toil/cwl/cwltoil.py +97 -119
- toil/cwl/utils.py +103 -3
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +35 -255
- toil/jobStores/aws/jobStore.py +864 -1964
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/fileJobStore.py +2 -1
- toil/jobStores/googleJobStore.py +32 -13
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/accelerators.py +1 -1
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +138 -19
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/misc.py +1 -1
- toil/lib/pipes.py +385 -0
- toil/lib/plugins.py +106 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/url.py +320 -0
- toil/lib/web.py +4 -5
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +12 -1
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +47 -15
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +2 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +16 -18
- toil/test/batchSystems/batchSystemTest.py +2 -9
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/cwlTest.py +181 -8
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
- toil/test/provisioners/clusterTest.py +15 -2
- toil/test/provisioners/gceProvisionerTest.py +1 -1
- toil/test/server/serverTest.py +78 -36
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum-gs.json +1 -1
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
- toil/test/wdl/wdltoil_test.py +171 -162
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilStats.py +17 -2
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +1179 -825
- toil/worker.py +16 -8
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/lib/io.py
CHANGED
|
@@ -11,6 +11,8 @@ from contextlib import contextmanager
|
|
|
11
11
|
from io import BytesIO
|
|
12
12
|
from typing import IO, Any, Callable, Optional, Protocol, Union
|
|
13
13
|
|
|
14
|
+
from toil.lib.directory import get_directory_item, TOIL_DIR_URI_SCHEME
|
|
15
|
+
from toil.lib.url import URLAccess
|
|
14
16
|
from toil.lib.memoize import memoize
|
|
15
17
|
from toil.lib.misc import StrPath
|
|
16
18
|
|
|
@@ -39,9 +41,8 @@ def get_toil_home() -> str:
|
|
|
39
41
|
|
|
40
42
|
TOIL_URI_SCHEME = "toilfile:"
|
|
41
43
|
|
|
42
|
-
|
|
43
44
|
STANDARD_SCHEMES = ["http:", "https:", "s3:", "gs:", "ftp:"]
|
|
44
|
-
REMOTE_SCHEMES = STANDARD_SCHEMES + [TOIL_URI_SCHEME]
|
|
45
|
+
REMOTE_SCHEMES = STANDARD_SCHEMES + [TOIL_URI_SCHEME, TOIL_DIR_URI_SCHEME]
|
|
45
46
|
ALL_SCHEMES = REMOTE_SCHEMES + ["file:"]
|
|
46
47
|
|
|
47
48
|
def is_standard_url(filename: str) -> bool:
|
|
@@ -75,11 +76,25 @@ def is_url_with_scheme(filename: str, schemes: list[str]) -> bool:
|
|
|
75
76
|
return False
|
|
76
77
|
|
|
77
78
|
def is_toil_url(filename: str) -> bool:
|
|
79
|
+
"""
|
|
80
|
+
Return True if a URL is a toilfile: or toildir: URL.
|
|
81
|
+
"""
|
|
82
|
+
return is_url_with_scheme(filename, [TOIL_URI_SCHEME, TOIL_DIR_URI_SCHEME])
|
|
83
|
+
|
|
84
|
+
def is_toil_file_url(filename: str) -> bool:
|
|
78
85
|
"""
|
|
79
86
|
Return True if a URL is a toilfile: URL.
|
|
80
87
|
"""
|
|
81
88
|
return is_url_with_scheme(filename, [TOIL_URI_SCHEME])
|
|
82
89
|
|
|
90
|
+
def is_toil_dir_url(filename: str) -> bool:
|
|
91
|
+
"""
|
|
92
|
+
Return True if a URL is a toildir: URL.
|
|
93
|
+
|
|
94
|
+
Note that this may point to either a direcotry or a leaf file.
|
|
95
|
+
"""
|
|
96
|
+
return is_url_with_scheme(filename, [TOIL_DIR_URI_SCHEME])
|
|
97
|
+
|
|
83
98
|
def is_file_url(filename: str) -> bool:
|
|
84
99
|
"""
|
|
85
100
|
Return True if a URL is a file: URL.
|
|
@@ -88,6 +103,22 @@ def is_file_url(filename: str) -> bool:
|
|
|
88
103
|
"""
|
|
89
104
|
return is_url_with_scheme(filename, ["file:"])
|
|
90
105
|
|
|
106
|
+
def is_directory_url(filename: str) -> bool:
|
|
107
|
+
"""
|
|
108
|
+
Return True if a URL points to a directory.
|
|
109
|
+
|
|
110
|
+
Covers toildir: URLs and deterrmines if they point to directories or leaf
|
|
111
|
+
files. Also covers other supported remote URL schemes.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
if is_toil_file_url(filename):
|
|
115
|
+
# Direct file URLs aren't directories.
|
|
116
|
+
return False
|
|
117
|
+
if is_toil_dir_url(filename):
|
|
118
|
+
# This is a toildir: URL but might be a file or a root or subdirectory.
|
|
119
|
+
return not isinstance(get_directory_item(filename), str)
|
|
120
|
+
return URLAccess.get_is_directory(filename)
|
|
121
|
+
|
|
91
122
|
def mkdtemp(
|
|
92
123
|
suffix: Optional[str] = None,
|
|
93
124
|
prefix: Optional[str] = None,
|
toil/lib/memoize.py
CHANGED
|
@@ -60,17 +60,22 @@ def sync_memoize(f: Callable[[MAT], MRT]) -> Callable[[MAT], MRT]:
|
|
|
60
60
|
|
|
61
61
|
def parse_iso_utc(s: str) -> datetime.datetime:
|
|
62
62
|
"""
|
|
63
|
-
Parses an
|
|
64
|
-
|
|
63
|
+
Parses an RFC 3339 ISO 8601 time in the UTC timezone.
|
|
64
|
+
|
|
65
|
+
Other timezones are not supported. Returns a timezone-aware UTC datetime
|
|
66
|
+
object.
|
|
65
67
|
|
|
66
68
|
:param s: The ISO-formatted time
|
|
67
69
|
|
|
68
|
-
:return: A timezone-
|
|
70
|
+
:return: A timezone-aware UTC datetime object
|
|
71
|
+
|
|
72
|
+
:raises ValueError: if the string is not in the correct format or is not in
|
|
73
|
+
the UTC timezone.
|
|
69
74
|
|
|
70
75
|
>>> parse_iso_utc('2016-04-27T00:28:04.000Z')
|
|
71
|
-
datetime.datetime(2016, 4, 27, 0, 28, 4)
|
|
76
|
+
datetime.datetime(2016, 4, 27, 0, 28, 4, tzinfo=datetime.timezone.utc)
|
|
72
77
|
>>> parse_iso_utc('2016-04-27T00:28:04Z')
|
|
73
|
-
datetime.datetime(2016, 4, 27, 0, 28, 4)
|
|
78
|
+
datetime.datetime(2016, 4, 27, 0, 28, 4, tzinfo=datetime.timezone.utc)
|
|
74
79
|
>>> parse_iso_utc('2016-04-27T00:28:04X')
|
|
75
80
|
Traceback (most recent call last):
|
|
76
81
|
...
|
|
@@ -83,8 +88,17 @@ def parse_iso_utc(s: str) -> datetime.datetime:
|
|
|
83
88
|
if not m:
|
|
84
89
|
raise ValueError(f"Not a valid ISO datetime in UTC: {s}")
|
|
85
90
|
else:
|
|
86
|
-
|
|
87
|
-
|
|
91
|
+
if m.group(8) != "Z" and not m.group(8).endswith("00:00"):
|
|
92
|
+
raise ValueError(f"Not in the UTC time zone: {s}")
|
|
93
|
+
if m.group(8) == "Z":
|
|
94
|
+
# Convert to an offset for parsing
|
|
95
|
+
s = s[:-1] + "-00:00"
|
|
96
|
+
|
|
97
|
+
fmt = "%Y-%m-%dT%H:%M:%S" + (".%f" if m.group(7) else "") + "%z"
|
|
98
|
+
parsed = datetime.datetime.strptime(s, fmt)
|
|
99
|
+
# We should have guaranteed that this is in UTC
|
|
100
|
+
assert parsed.tzinfo is not None
|
|
101
|
+
return parsed
|
|
88
102
|
|
|
89
103
|
|
|
90
104
|
def strict_bool(s: str) -> bool:
|
toil/lib/misc.py
CHANGED
|
@@ -27,7 +27,7 @@ def get_public_ip() -> str:
|
|
|
27
27
|
try:
|
|
28
28
|
# Try to get the internet-facing IP by attempting a connection
|
|
29
29
|
# to a non-existent server and reading what IP was used.
|
|
30
|
-
ip = "127.0.0.1"
|
|
30
|
+
ip: str = "127.0.0.1"
|
|
31
31
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) as sock:
|
|
32
32
|
# 203.0.113.0/24 is reserved as TEST-NET-3 by RFC 5737, so
|
|
33
33
|
# there is guaranteed to be no one listening on the other
|
toil/lib/pipes.py
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
import errno
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import hashlib
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Optional, TextIO, BinaryIO, IO, Any
|
|
9
|
+
|
|
10
|
+
from toil.lib.checksum import ChecksumError
|
|
11
|
+
from toil.lib.threading import ExceptionalThread
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WritablePipe(ABC):
|
|
17
|
+
"""
|
|
18
|
+
An object-oriented wrapper for os.pipe. Clients should subclass it, implement
|
|
19
|
+
:meth:`.readFrom` to consume the readable end of the pipe, then instantiate the class as a
|
|
20
|
+
context manager to get the writable end. See the example below.
|
|
21
|
+
|
|
22
|
+
>>> import sys, shutil, codecs
|
|
23
|
+
>>> class MyPipe(WritablePipe):
|
|
24
|
+
... def readFrom(self, readable):
|
|
25
|
+
... shutil.copyfileobj(codecs.getreader('utf-8')(readable), sys.stdout)
|
|
26
|
+
>>> with MyPipe() as writable:
|
|
27
|
+
... _ = writable.write('Hello, world!\\n'.encode('utf-8'))
|
|
28
|
+
Hello, world!
|
|
29
|
+
|
|
30
|
+
Each instance of this class creates a thread and invokes the readFrom method in that thread.
|
|
31
|
+
The thread will be join()ed upon normal exit from the context manager, i.e. the body of the
|
|
32
|
+
`with` statement. If an exception occurs, the thread will not be joined but a well-behaved
|
|
33
|
+
:meth:`.readFrom` implementation will terminate shortly thereafter due to the pipe having
|
|
34
|
+
been closed.
|
|
35
|
+
|
|
36
|
+
Now, exceptions in the reader thread will be reraised in the main thread:
|
|
37
|
+
|
|
38
|
+
>>> class MyPipe(WritablePipe):
|
|
39
|
+
... def readFrom(self, readable):
|
|
40
|
+
... raise RuntimeError('Hello, world!')
|
|
41
|
+
>>> with MyPipe() as writable:
|
|
42
|
+
... pass
|
|
43
|
+
Traceback (most recent call last):
|
|
44
|
+
...
|
|
45
|
+
RuntimeError: Hello, world!
|
|
46
|
+
|
|
47
|
+
More complicated, less illustrative tests:
|
|
48
|
+
|
|
49
|
+
Same as above, but proving that handles are closed:
|
|
50
|
+
|
|
51
|
+
>>> x = os.dup(0); os.close(x)
|
|
52
|
+
>>> class MyPipe(WritablePipe):
|
|
53
|
+
... def readFrom(self, readable):
|
|
54
|
+
... raise RuntimeError('Hello, world!')
|
|
55
|
+
>>> with MyPipe() as writable:
|
|
56
|
+
... pass
|
|
57
|
+
Traceback (most recent call last):
|
|
58
|
+
...
|
|
59
|
+
RuntimeError: Hello, world!
|
|
60
|
+
>>> y = os.dup(0); os.close(y); x == y
|
|
61
|
+
True
|
|
62
|
+
|
|
63
|
+
Exceptions in the body of the with statement aren't masked, and handles are closed:
|
|
64
|
+
|
|
65
|
+
>>> x = os.dup(0); os.close(x)
|
|
66
|
+
>>> class MyPipe(WritablePipe):
|
|
67
|
+
... def readFrom(self, readable):
|
|
68
|
+
... pass
|
|
69
|
+
>>> with MyPipe() as writable:
|
|
70
|
+
... raise RuntimeError('Hello, world!')
|
|
71
|
+
Traceback (most recent call last):
|
|
72
|
+
...
|
|
73
|
+
RuntimeError: Hello, world!
|
|
74
|
+
>>> y = os.dup(0); os.close(y); x == y
|
|
75
|
+
True
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, encoding: Optional[str] = None, errors: Optional[str] = None) -> None:
|
|
79
|
+
"""
|
|
80
|
+
The specified encoding and errors apply to the writable end of the pipe.
|
|
81
|
+
|
|
82
|
+
:param str encoding: the name of the encoding used to encode the file. Encodings are the same
|
|
83
|
+
as for encode(). Defaults to None which represents binary mode.
|
|
84
|
+
|
|
85
|
+
:param str errors: an optional string that specifies how encoding errors are to be handled. Errors
|
|
86
|
+
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
87
|
+
"""
|
|
88
|
+
super().__init__()
|
|
89
|
+
self.encoding: Optional[str] = encoding
|
|
90
|
+
self.errors: Optional[str] = errors
|
|
91
|
+
self.readable_fh: Optional[int] = None
|
|
92
|
+
self.writable: Optional[IO[Any]] = None
|
|
93
|
+
self.thread: Optional[ExceptionalThread] = None
|
|
94
|
+
self.reader_done: bool = False
|
|
95
|
+
|
|
96
|
+
def __enter__(self) -> IO[Any]:
|
|
97
|
+
self.readable_fh, writable_fh = os.pipe()
|
|
98
|
+
self.writable = os.fdopen(
|
|
99
|
+
writable_fh,
|
|
100
|
+
"wb" if self.encoding == None else "wt",
|
|
101
|
+
encoding=self.encoding,
|
|
102
|
+
errors=self.errors,
|
|
103
|
+
)
|
|
104
|
+
self.thread = ExceptionalThread(target=self._reader)
|
|
105
|
+
self.thread.start()
|
|
106
|
+
return self.writable
|
|
107
|
+
|
|
108
|
+
def __exit__(self, exc_type: Optional[str], exc_val: Optional[str], exc_tb: Optional[str]) -> None:
|
|
109
|
+
# Closing the writable end will send EOF to the readable and cause the reader thread
|
|
110
|
+
# to finish.
|
|
111
|
+
# TODO: Can close() fail? If so, would we try and clean up after the reader?
|
|
112
|
+
assert self.writable is not None
|
|
113
|
+
self.writable.close()
|
|
114
|
+
try:
|
|
115
|
+
if self.thread is not None:
|
|
116
|
+
# reraises any exception that was raised in the thread
|
|
117
|
+
self.thread.join()
|
|
118
|
+
except Exception as e:
|
|
119
|
+
if exc_type is None:
|
|
120
|
+
# Only raise the child exception if there wasn't
|
|
121
|
+
# already an exception in the main thread
|
|
122
|
+
raise
|
|
123
|
+
else:
|
|
124
|
+
log.error(
|
|
125
|
+
"Swallowing additional exception in reader thread: %s", str(e)
|
|
126
|
+
)
|
|
127
|
+
finally:
|
|
128
|
+
# The responsibility for closing the readable end is generally that of the reader
|
|
129
|
+
# thread. To cover the small window before the reader takes over we also close it here.
|
|
130
|
+
# TODO: Does that make any sense?
|
|
131
|
+
if self.readable_fh is not None:
|
|
132
|
+
# Close the file handle. The reader thread must be dead now.
|
|
133
|
+
try:
|
|
134
|
+
os.close(self.readable_fh)
|
|
135
|
+
except OSError as e:
|
|
136
|
+
# OSError: [Errno 9] Bad file descriptor implies this file handle is already closed
|
|
137
|
+
if not e.errno == errno.EBADF:
|
|
138
|
+
raise e
|
|
139
|
+
|
|
140
|
+
@abstractmethod
|
|
141
|
+
def readFrom(self, readable: IO[Any]) -> None:
|
|
142
|
+
"""
|
|
143
|
+
Implement this method to read data from the pipe. This method should support both
|
|
144
|
+
binary and text mode output.
|
|
145
|
+
|
|
146
|
+
:param file readable: the file object representing the readable end of the pipe. Do not
|
|
147
|
+
explicitly invoke the close() method of the object; that will be done automatically.
|
|
148
|
+
"""
|
|
149
|
+
raise NotImplementedError()
|
|
150
|
+
|
|
151
|
+
def _reader(self) -> None:
|
|
152
|
+
assert self.readable_fh is not None
|
|
153
|
+
with os.fdopen(self.readable_fh, "rb") as readable:
|
|
154
|
+
# TODO: If the reader somehow crashes here, both threads might try
|
|
155
|
+
# to close readable_fh. Fortunately we don't do anything that
|
|
156
|
+
# should be able to fail here.
|
|
157
|
+
# TODO: Use a real mutex; this None-flagging logic doesn't seem race-free.
|
|
158
|
+
self.readable_fh = None # signal to parent thread that we've taken over
|
|
159
|
+
self.readFrom(readable)
|
|
160
|
+
self.reader_done = True
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ReadablePipe(ABC):
|
|
165
|
+
"""
|
|
166
|
+
An object-oriented wrapper for os.pipe. Clients should subclass it, implement
|
|
167
|
+
:meth:`.writeTo` to place data into the writable end of the pipe, then instantiate the class
|
|
168
|
+
as a context manager to get the writable end. See the example below.
|
|
169
|
+
|
|
170
|
+
>>> import sys, shutil, codecs
|
|
171
|
+
>>> class MyPipe(ReadablePipe):
|
|
172
|
+
... def writeTo(self, writable: IO[Any]) -> None:
|
|
173
|
+
... writable.write('Hello, world!\\n'.encode('utf-8'))
|
|
174
|
+
>>> with MyPipe() as readable:
|
|
175
|
+
... shutil.copyfileobj(codecs.getreader('utf-8')(readable), sys.stdout)
|
|
176
|
+
Hello, world!
|
|
177
|
+
|
|
178
|
+
Each instance of this class creates a thread and invokes the :meth:`.writeTo` method in that
|
|
179
|
+
thread. The thread will be join()ed upon normal exit from the context manager, i.e. the body
|
|
180
|
+
of the `with` statement. If an exception occurs, the thread will not be joined but a
|
|
181
|
+
well-behaved :meth:`.writeTo` implementation will terminate shortly thereafter due to the
|
|
182
|
+
pipe having been closed.
|
|
183
|
+
|
|
184
|
+
Now, exceptions in the reader thread will be reraised in the main thread:
|
|
185
|
+
|
|
186
|
+
>>> class MyPipe(ReadablePipe):
|
|
187
|
+
... def writeTo(self, writable):
|
|
188
|
+
... raise RuntimeError('Hello, world!')
|
|
189
|
+
>>> with MyPipe() as readable:
|
|
190
|
+
... pass
|
|
191
|
+
Traceback (most recent call last):
|
|
192
|
+
...
|
|
193
|
+
RuntimeError: Hello, world!
|
|
194
|
+
|
|
195
|
+
More complicated, less illustrative tests:
|
|
196
|
+
|
|
197
|
+
Same as above, but proving that handles are closed:
|
|
198
|
+
|
|
199
|
+
>>> x = os.dup(0); os.close(x)
|
|
200
|
+
>>> class MyPipe(ReadablePipe):
|
|
201
|
+
... def writeTo(self, writable: IO[Any]) -> None:
|
|
202
|
+
... raise RuntimeError('Hello, world!')
|
|
203
|
+
>>> with MyPipe() as readable:
|
|
204
|
+
... pass
|
|
205
|
+
Traceback (most recent call last):
|
|
206
|
+
...
|
|
207
|
+
RuntimeError: Hello, world!
|
|
208
|
+
>>> y = os.dup(0); os.close(y); x == y
|
|
209
|
+
True
|
|
210
|
+
|
|
211
|
+
Exceptions in the body of the with statement aren't masked, and handles are closed:
|
|
212
|
+
|
|
213
|
+
>>> x = os.dup(0); os.close(x)
|
|
214
|
+
>>> class MyPipe(ReadablePipe):
|
|
215
|
+
... def writeTo(self, writable):
|
|
216
|
+
... pass
|
|
217
|
+
>>> with MyPipe() as readable:
|
|
218
|
+
... raise RuntimeError('Hello, world!')
|
|
219
|
+
Traceback (most recent call last):
|
|
220
|
+
...
|
|
221
|
+
RuntimeError: Hello, world!
|
|
222
|
+
>>> y = os.dup(0); os.close(y); x == y
|
|
223
|
+
True
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
@abstractmethod
|
|
227
|
+
def writeTo(self, writable: IO[Any]) -> None:
|
|
228
|
+
"""
|
|
229
|
+
Implement this method to write data from the pipe. This method should support both
|
|
230
|
+
binary and text mode input.
|
|
231
|
+
|
|
232
|
+
:param file writable: the file object representing the writable end of the pipe. Do not
|
|
233
|
+
explicitly invoke the close() method of the object, that will be done automatically.
|
|
234
|
+
"""
|
|
235
|
+
raise NotImplementedError()
|
|
236
|
+
|
|
237
|
+
def _writer(self) -> None:
|
|
238
|
+
assert self.writable_fh is not None
|
|
239
|
+
try:
|
|
240
|
+
with os.fdopen(self.writable_fh, "wb") as writable:
|
|
241
|
+
self.writeTo(writable)
|
|
242
|
+
except OSError as e:
|
|
243
|
+
# The other side of the pipe may have been closed by the
|
|
244
|
+
# reading thread, which is OK.
|
|
245
|
+
if e.errno != errno.EPIPE:
|
|
246
|
+
raise
|
|
247
|
+
|
|
248
|
+
def __init__(self, encoding: Optional[str] = None, errors: Optional[str] = None) -> None:
|
|
249
|
+
"""
|
|
250
|
+
The specified encoding and errors apply to the readable end of the pipe.
|
|
251
|
+
|
|
252
|
+
:param str encoding: the name of the encoding used to encode the file. Encodings are the same
|
|
253
|
+
as for encode(). Defaults to None which represents binary mode.
|
|
254
|
+
|
|
255
|
+
:param str errors: an optional string that specifies how encoding errors are to be handled. Errors
|
|
256
|
+
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
257
|
+
"""
|
|
258
|
+
super().__init__()
|
|
259
|
+
self.encoding: Optional[str] = encoding
|
|
260
|
+
self.errors: Optional[str] = errors
|
|
261
|
+
self.writable_fh: Optional[int] = None
|
|
262
|
+
self.readable: Optional[IO[Any]] = None
|
|
263
|
+
self.thread: Optional[ExceptionalThread] = None
|
|
264
|
+
|
|
265
|
+
def __enter__(self) -> IO[Any]:
|
|
266
|
+
readable_fh, self.writable_fh = os.pipe()
|
|
267
|
+
self.readable = os.fdopen(
|
|
268
|
+
readable_fh,
|
|
269
|
+
"rb" if self.encoding == None else "rt",
|
|
270
|
+
encoding=self.encoding,
|
|
271
|
+
errors=self.errors,
|
|
272
|
+
)
|
|
273
|
+
self.thread = ExceptionalThread(target=self._writer)
|
|
274
|
+
self.thread.start()
|
|
275
|
+
return self.readable
|
|
276
|
+
|
|
277
|
+
def __exit__(self, exc_type: Optional[str], exc_val: Optional[str], exc_tb: Optional[str]) -> None:
|
|
278
|
+
# Close the read end of the pipe. The writing thread may
|
|
279
|
+
# still be writing to the other end, but this will wake it up
|
|
280
|
+
# if that's the case.
|
|
281
|
+
assert self.readable is not None
|
|
282
|
+
self.readable.close()
|
|
283
|
+
try:
|
|
284
|
+
if self.thread is not None:
|
|
285
|
+
# reraises any exception that was raised in the thread
|
|
286
|
+
self.thread.join()
|
|
287
|
+
except:
|
|
288
|
+
if exc_type is None:
|
|
289
|
+
# Only raise the child exception if there wasn't
|
|
290
|
+
# already an exception in the main thread
|
|
291
|
+
raise
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class ReadableTransformingPipe(ReadablePipe):
|
|
295
|
+
"""
|
|
296
|
+
A pipe which is constructed around a readable stream, and which provides a
|
|
297
|
+
context manager that gives a readable stream.
|
|
298
|
+
|
|
299
|
+
Useful as a base class for pipes which have to transform or otherwise visit
|
|
300
|
+
bytes that flow through them, instead of just consuming or producing data.
|
|
301
|
+
|
|
302
|
+
Clients should subclass it and implement :meth:`.transform`, like so:
|
|
303
|
+
|
|
304
|
+
>>> import sys, shutil, codecs
|
|
305
|
+
>>> class MyPipe(ReadableTransformingPipe):
|
|
306
|
+
... def transform(self, readable, writable):
|
|
307
|
+
... writable.write(readable.read().decode('utf-8').upper().encode('utf-8'))
|
|
308
|
+
>>> class SourcePipe(ReadablePipe):
|
|
309
|
+
... def writeTo(self, writable):
|
|
310
|
+
... writable.write('Hello, world!\\n'.encode('utf-8'))
|
|
311
|
+
>>> with SourcePipe() as source:
|
|
312
|
+
... with MyPipe(source) as transformed:
|
|
313
|
+
... shutil.copyfileobj(codecs.getreader('utf-8')(transformed), sys.stdout)
|
|
314
|
+
HELLO, WORLD!
|
|
315
|
+
|
|
316
|
+
The :meth:`.transform` method runs in its own thread, and should move data
|
|
317
|
+
chunk by chunk instead of all at once. It should finish normally if it
|
|
318
|
+
encounters either an EOF on the readable, or a :class:`BrokenPipeError` on
|
|
319
|
+
the writable. This means that it should make sure to actually catch a
|
|
320
|
+
:class:`BrokenPipeError` when writing.
|
|
321
|
+
|
|
322
|
+
See also: :class:`toil.lib.misc.WriteWatchingStream`.
|
|
323
|
+
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
def __init__(self, source: IO[Any], encoding: Optional[str] = None, errors: Optional[str] = None) -> None:
|
|
327
|
+
"""
|
|
328
|
+
:param str encoding: the name of the encoding used to encode the file. Encodings are the same
|
|
329
|
+
as for encode(). Defaults to None which represents binary mode.
|
|
330
|
+
|
|
331
|
+
:param str errors: an optional string that specifies how encoding errors are to be handled. Errors
|
|
332
|
+
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
333
|
+
"""
|
|
334
|
+
super().__init__(encoding=encoding, errors=errors)
|
|
335
|
+
self.source = source
|
|
336
|
+
|
|
337
|
+
@abstractmethod
|
|
338
|
+
def transform(self, readable: IO[Any], writable: IO[Any]) -> None:
|
|
339
|
+
"""
|
|
340
|
+
Implement this method to ship data through the pipe.
|
|
341
|
+
|
|
342
|
+
:param file readable: the input stream file object to transform.
|
|
343
|
+
|
|
344
|
+
:param file writable: the file object representing the writable end of the pipe. Do not
|
|
345
|
+
explicitly invoke the close() method of the object, that will be done automatically.
|
|
346
|
+
"""
|
|
347
|
+
raise NotImplementedError()
|
|
348
|
+
|
|
349
|
+
def writeTo(self, writable: IO[Any]) -> None:
|
|
350
|
+
self.transform(self.source, writable)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class HashingPipe(ReadableTransformingPipe):
|
|
354
|
+
"""
|
|
355
|
+
Class which checksums all the data read through it. If it
|
|
356
|
+
reaches EOF and the checksum isn't correct, raises ChecksumError.
|
|
357
|
+
|
|
358
|
+
Assumes info actually has a checksum.
|
|
359
|
+
"""
|
|
360
|
+
def __init__(self, source: IO[Any], encoding: Optional[str] = None, errors: Optional[str] = None, checksum_to_verify: Optional[str] = None) -> None:
|
|
361
|
+
"""
|
|
362
|
+
:param str encoding: the name of the encoding used to encode the file. Encodings are the same
|
|
363
|
+
as for encode(). Defaults to None which represents binary mode.
|
|
364
|
+
|
|
365
|
+
:param str errors: an optional string that specifies how encoding errors are to be handled. Errors
|
|
366
|
+
are the same as for open(). Defaults to 'strict' when an encoding is specified.
|
|
367
|
+
"""
|
|
368
|
+
super(HashingPipe, self).__init__(source=source, encoding=encoding, errors=errors)
|
|
369
|
+
self.checksum_to_verify = checksum_to_verify
|
|
370
|
+
|
|
371
|
+
def transform(self, readable: IO[Any], writable: IO[Any]) -> None:
|
|
372
|
+
hash_object = hashlib.sha1()
|
|
373
|
+
contents = readable.read(1024 * 1024)
|
|
374
|
+
while contents != b'':
|
|
375
|
+
hash_object.update(contents)
|
|
376
|
+
try:
|
|
377
|
+
writable.write(contents)
|
|
378
|
+
except BrokenPipeError:
|
|
379
|
+
# Read was stopped early by user code.
|
|
380
|
+
# Can't check the checksum.
|
|
381
|
+
return
|
|
382
|
+
contents = readable.read(1024 * 1024)
|
|
383
|
+
final_computed_checksum = f'sha1${hash_object.hexdigest()}'
|
|
384
|
+
if not self.checksum_to_verify == final_computed_checksum:
|
|
385
|
+
raise ChecksumError(f'Checksum mismatch. Expected: {self.checksum_to_verify} Actual: {final_computed_checksum}')
|
toil/lib/plugins.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Copyright (C) 2015-2025 Regents of the University of California
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
Generic plugin system for Toil plugins.
|
|
17
|
+
|
|
18
|
+
Plugins come in Python packages named::
|
|
19
|
+
|
|
20
|
+
toil_{PLUGIN_TYPE}_{WHATEVER}
|
|
21
|
+
|
|
22
|
+
When looking for plugins, Toil will list all the Python packages with the right
|
|
23
|
+
name prefix for the given type of plugin, and load them. The plugin modules
|
|
24
|
+
then have an opportunity to import :meth:`register_plugin` and register
|
|
25
|
+
themselves.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import importlib
|
|
29
|
+
from typing import Any, Literal, Union
|
|
30
|
+
import pkgutil
|
|
31
|
+
from toil.lib.memoize import memoize
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
PluginType = Union[Literal["batch_system"], Literal["url_access"]]
|
|
35
|
+
plugin_types: list[PluginType] = ["batch_system", "url_access"]
|
|
36
|
+
|
|
37
|
+
_registry: dict[str, dict[str, Any]] = {k: {} for k in plugin_types}
|
|
38
|
+
|
|
39
|
+
def register_plugin(
|
|
40
|
+
plugin_type: PluginType, plugin_name: str, plugin_being_registered: Any
|
|
41
|
+
) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Adds a plugin to the registry for the given type of plugin.
|
|
44
|
+
|
|
45
|
+
:param plugin_name: For batch systems, this is the string the user will use
|
|
46
|
+
to select the batch system on the command line with ``--batchSystem``.
|
|
47
|
+
For URL access plugins, this is the URL scheme that the plugin
|
|
48
|
+
implements.
|
|
49
|
+
:param plugin_being_registered: This is a function that, when called,
|
|
50
|
+
imports and returns a plugin-provided class type. For batch systems,
|
|
51
|
+
the resulting type must extend
|
|
52
|
+
:class:`toil.batchSystems.abstractBatchSystem.AbstractBatchSystem`. For
|
|
53
|
+
URL access plugins, it must extend :class:`toil.lib.url.URLAccess`.
|
|
54
|
+
Note that the function used here should return the class itslef; it
|
|
55
|
+
should not construct an instance of the class.
|
|
56
|
+
"""
|
|
57
|
+
_registry[plugin_type][plugin_name] = plugin_being_registered
|
|
58
|
+
|
|
59
|
+
def remove_plugin(
|
|
60
|
+
plugin_type: PluginType, plugin_name: str) -> None:
|
|
61
|
+
"""
|
|
62
|
+
Removes a plugin from the registry for the given type of plugin.
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
del _registry[plugin_type][plugin_name]
|
|
66
|
+
except KeyError:
|
|
67
|
+
# If the plugin does not exist, it can be ignored
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def get_plugin_names(plugin_type:PluginType) -> list[str]:
|
|
71
|
+
"""
|
|
72
|
+
Get the names of all the available plugins of the given type.
|
|
73
|
+
"""
|
|
74
|
+
_load_all_plugins(plugin_type)
|
|
75
|
+
return list(_registry[plugin_type].keys())
|
|
76
|
+
|
|
77
|
+
def get_plugin(plugin_type: PluginType, plugin_name: str) -> Any:
|
|
78
|
+
"""
|
|
79
|
+
Get a plugin class factory function by name.
|
|
80
|
+
|
|
81
|
+
:raises: KeyError if plugin_name is not the name of a plugin of the given
|
|
82
|
+
type.
|
|
83
|
+
"""
|
|
84
|
+
_load_all_plugins(plugin_type)
|
|
85
|
+
return _registry[plugin_type][plugin_name]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _plugin_name_prefix(plugin_type: PluginType) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Get prefix for plugin type.
|
|
91
|
+
|
|
92
|
+
Any packages with prefix will count as toil plugins of that type.
|
|
93
|
+
"""
|
|
94
|
+
return f"toil_{plugin_type}_"
|
|
95
|
+
|
|
96
|
+
@memoize
|
|
97
|
+
def _load_all_plugins(plugin_type: PluginType) -> None:
|
|
98
|
+
"""
|
|
99
|
+
Load all the plugins of the given type that are installed.
|
|
100
|
+
"""
|
|
101
|
+
prefix = _plugin_name_prefix(plugin_type)
|
|
102
|
+
for finder, name, is_pkg in pkgutil.iter_modules():
|
|
103
|
+
# For all installed packages
|
|
104
|
+
if name.startswith(prefix):
|
|
105
|
+
# If it is a Toil batch system plugin, import it
|
|
106
|
+
importlib.import_module(name)
|
toil/lib/retry.py
CHANGED
|
@@ -172,7 +172,7 @@ class ErrorCondition:
|
|
|
172
172
|
|
|
173
173
|
def __init__(
|
|
174
174
|
self,
|
|
175
|
-
error: Optional[
|
|
175
|
+
error: Optional[type[BaseException]] = None,
|
|
176
176
|
error_codes: list[int] = None,
|
|
177
177
|
boto_error_codes: list[str] = None,
|
|
178
178
|
error_message_must_include: str = None,
|
toil/lib/threading.py
CHANGED
|
@@ -226,7 +226,7 @@ class ExceptionalThread(threading.Thread):
|
|
|
226
226
|
if not self.is_alive() and self.exc_info is not None:
|
|
227
227
|
exc_type, exc_value, traceback = self.exc_info
|
|
228
228
|
self.exc_info = None
|
|
229
|
-
raise_(exc_type, exc_value, traceback)
|
|
229
|
+
raise_(exc_type, exc_value, traceback)
|
|
230
230
|
|
|
231
231
|
|
|
232
232
|
def cpu_count() -> int:
|