toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/lib/humanize.py
CHANGED
|
@@ -25,7 +25,9 @@ def bytes2human(n: SupportsInt) -> str:
|
|
|
25
25
|
"""
|
|
26
26
|
Convert n bytes into a human readable string.
|
|
27
27
|
"""
|
|
28
|
-
logger.warning(
|
|
28
|
+
logger.warning(
|
|
29
|
+
'Deprecated toil method. Please use "toil.lib.conversions.bytes2human()" instead."'
|
|
30
|
+
)
|
|
29
31
|
return b2h(n)
|
|
30
32
|
|
|
31
33
|
|
|
@@ -36,5 +38,7 @@ def human2bytes(s: str) -> int:
|
|
|
36
38
|
|
|
37
39
|
When unable to recognize the format ValueError is raised.
|
|
38
40
|
"""
|
|
39
|
-
logger.warning(
|
|
41
|
+
logger.warning(
|
|
42
|
+
'Deprecated toil method. Please use "toil.lib.conversions.human2bytes()" instead."'
|
|
43
|
+
)
|
|
40
44
|
return h2b(s)
|
toil/lib/io.py
CHANGED
|
@@ -1,16 +1,86 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import logging
|
|
2
3
|
import os
|
|
3
4
|
import shutil
|
|
4
5
|
import stat
|
|
6
|
+
import sys
|
|
5
7
|
import tempfile
|
|
6
8
|
import uuid
|
|
9
|
+
from collections.abc import Iterator
|
|
7
10
|
from contextlib import contextmanager
|
|
8
11
|
from io import BytesIO
|
|
9
|
-
from typing import IO, Any, Callable,
|
|
12
|
+
from typing import IO, Any, Callable, Optional, Protocol, Union
|
|
13
|
+
|
|
14
|
+
from toil.lib.memoize import memoize
|
|
10
15
|
|
|
11
16
|
logger = logging.getLogger(__name__)
|
|
12
17
|
|
|
13
|
-
|
|
18
|
+
@memoize
|
|
19
|
+
def get_toil_home() -> str:
|
|
20
|
+
"""
|
|
21
|
+
Get the Toil home directory for storing configuration and global state.
|
|
22
|
+
|
|
23
|
+
Raises an error if it does not exist and cannot be created. Safe to run
|
|
24
|
+
simultaneously in multiple processes.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# TODO: should this use an XDG config directory or ~/.config to not clutter the
|
|
28
|
+
# base home directory?
|
|
29
|
+
toil_home_dir = os.path.join(os.path.expanduser("~"), ".toil")
|
|
30
|
+
|
|
31
|
+
dir_path = try_path(toil_home_dir)
|
|
32
|
+
if dir_path is None:
|
|
33
|
+
raise RuntimeError(
|
|
34
|
+
f"Cannot create or access Toil configuration directory {toil_home_dir}"
|
|
35
|
+
)
|
|
36
|
+
return dir_path
|
|
37
|
+
|
|
38
|
+
TOIL_URI_SCHEME = "toilfile:"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
STANDARD_SCHEMES = ["http:", "https:", "s3:", "gs:", "ftp:"]
|
|
42
|
+
REMOTE_SCHEMES = STANDARD_SCHEMES + [TOIL_URI_SCHEME]
|
|
43
|
+
ALL_SCHEMES = REMOTE_SCHEMES + ["file:"]
|
|
44
|
+
|
|
45
|
+
def is_standard_url(filename: str) -> bool:
|
|
46
|
+
return is_url_with_scheme(filename, STANDARD_SCHEMES)
|
|
47
|
+
|
|
48
|
+
def is_remote_url(filename: str) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Decide if a filename is a known, non-file kind of URL
|
|
51
|
+
"""
|
|
52
|
+
return is_url_with_scheme(filename, REMOTE_SCHEMES)
|
|
53
|
+
|
|
54
|
+
def is_any_url(filename: str) -> bool:
|
|
55
|
+
"""
|
|
56
|
+
Decide if a string is a URI like http:// or file://.
|
|
57
|
+
|
|
58
|
+
Otherwise it might be a bare path.
|
|
59
|
+
"""
|
|
60
|
+
return is_url_with_scheme(filename, ALL_SCHEMES)
|
|
61
|
+
|
|
62
|
+
def is_url_with_scheme(filename: str, schemes: list[str]) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Return True if filename is a URL with any of the given schemes and False otherwise.
|
|
65
|
+
"""
|
|
66
|
+
# TODO: "http:myfile.dat" is a valid filename and *not* a valid URL
|
|
67
|
+
for scheme in schemes:
|
|
68
|
+
if filename.startswith(scheme):
|
|
69
|
+
return True
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def is_toil_url(filename: str) -> bool:
|
|
73
|
+
return is_url_with_scheme(filename, [TOIL_URI_SCHEME])
|
|
74
|
+
|
|
75
|
+
def is_file_url(filename: str) -> bool:
|
|
76
|
+
return is_url_with_scheme(filename, ["file:"])
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def mkdtemp(
|
|
80
|
+
suffix: Optional[str] = None,
|
|
81
|
+
prefix: Optional[str] = None,
|
|
82
|
+
dir: Optional[str] = None,
|
|
83
|
+
) -> str:
|
|
14
84
|
"""
|
|
15
85
|
Make a temporary directory like tempfile.mkdtemp, but with relaxed permissions.
|
|
16
86
|
|
|
@@ -27,10 +97,13 @@ def mkdtemp(suffix: Optional[str] = None, prefix: Optional[str] = None, dir: Opt
|
|
|
27
97
|
# Make the directory
|
|
28
98
|
result = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir)
|
|
29
99
|
# Grant all the permissions: full control for user, and execute for group and other
|
|
30
|
-
os.chmod(
|
|
100
|
+
os.chmod(
|
|
101
|
+
result, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
|
|
102
|
+
)
|
|
31
103
|
# Return the path created
|
|
32
104
|
return result
|
|
33
105
|
|
|
106
|
+
|
|
34
107
|
def robust_rmtree(path: Union[str, bytes]) -> None:
|
|
35
108
|
"""
|
|
36
109
|
Robustly tries to delete paths.
|
|
@@ -45,7 +118,7 @@ def robust_rmtree(path: Union[str, bytes]) -> None:
|
|
|
45
118
|
if not isinstance(path, bytes):
|
|
46
119
|
# Internally we must work in bytes, in case we find an undecodeable
|
|
47
120
|
# filename.
|
|
48
|
-
path = path.encode(
|
|
121
|
+
path = path.encode("utf-8")
|
|
49
122
|
|
|
50
123
|
if not os.path.exists(path):
|
|
51
124
|
# Nothing to do!
|
|
@@ -107,7 +180,7 @@ def atomic_tmp_file(final_path: str) -> str:
|
|
|
107
180
|
as finalPath. It the final path is in /dev (/dev/null, /dev/stdout), it is
|
|
108
181
|
returned unchanged and atomic_tmp_install will do nothing."""
|
|
109
182
|
final_dir = os.path.dirname(os.path.normpath(final_path)) # can be empty
|
|
110
|
-
if final_dir ==
|
|
183
|
+
if final_dir == "/dev":
|
|
111
184
|
return final_path
|
|
112
185
|
final_basename = os.path.basename(final_path)
|
|
113
186
|
final_ext = os.path.splitext(final_path)[1]
|
|
@@ -117,14 +190,15 @@ def atomic_tmp_file(final_path: str) -> str:
|
|
|
117
190
|
|
|
118
191
|
def atomic_install(tmp_path, final_path) -> None:
|
|
119
192
|
"""atomic install of tmp_path as final_path"""
|
|
120
|
-
if os.path.dirname(os.path.normpath(final_path)) !=
|
|
193
|
+
if os.path.dirname(os.path.normpath(final_path)) != "/dev":
|
|
121
194
|
os.rename(tmp_path, final_path)
|
|
122
195
|
|
|
196
|
+
|
|
123
197
|
@contextmanager
|
|
124
198
|
def AtomicFileCreate(final_path: str, keep: bool = False) -> Iterator[str]:
|
|
125
199
|
"""Context manager to create a temporary file. Entering returns path to
|
|
126
200
|
the temporary file in the same directory as finalPath. If the code in
|
|
127
|
-
context succeeds, the file renamed to its
|
|
201
|
+
context succeeds, the file renamed to its actual name. If an error
|
|
128
202
|
occurs, the file is not installed and is removed unless keep is specified.
|
|
129
203
|
"""
|
|
130
204
|
tmp_path = atomic_tmp_file(final_path)
|
|
@@ -140,7 +214,9 @@ def AtomicFileCreate(final_path: str, keep: bool = False) -> Iterator[str]:
|
|
|
140
214
|
raise
|
|
141
215
|
|
|
142
216
|
|
|
143
|
-
def atomic_copy(
|
|
217
|
+
def atomic_copy(
|
|
218
|
+
src_path: str, dest_path: str, executable: Optional[bool] = None
|
|
219
|
+
) -> None:
|
|
144
220
|
"""Copy a file using posix atomic creations semantics."""
|
|
145
221
|
if executable is None:
|
|
146
222
|
executable = os.stat(src_path).st_mode & stat.S_IXUSR != 0
|
|
@@ -150,10 +226,12 @@ def atomic_copy(src_path: str, dest_path: str, executable: Optional[bool] = None
|
|
|
150
226
|
os.chmod(dest_path_tmp, os.stat(dest_path_tmp).st_mode | stat.S_IXUSR)
|
|
151
227
|
|
|
152
228
|
|
|
153
|
-
def atomic_copyobj(
|
|
229
|
+
def atomic_copyobj(
|
|
230
|
+
src_fh: BytesIO, dest_path: str, length: int = 16384, executable: bool = False
|
|
231
|
+
) -> None:
|
|
154
232
|
"""Copy an open file using posix atomic creations semantics."""
|
|
155
233
|
with AtomicFileCreate(dest_path) as dest_path_tmp:
|
|
156
|
-
with open(dest_path_tmp,
|
|
234
|
+
with open(dest_path_tmp, "wb") as dest_path_fh:
|
|
157
235
|
shutil.copyfileobj(src_fh, dest_path_fh, length=length)
|
|
158
236
|
if executable:
|
|
159
237
|
os.chmod(dest_path_tmp, os.stat(dest_path_tmp).st_mode | stat.S_IXUSR)
|
|
@@ -179,9 +257,11 @@ def make_public_dir(in_directory: str, suggested_name: Optional[str] = None) ->
|
|
|
179
257
|
return generated_dir_path
|
|
180
258
|
except FileExistsError:
|
|
181
259
|
pass
|
|
182
|
-
for i in range(
|
|
260
|
+
for i in range(
|
|
261
|
+
4, 32 + 1
|
|
262
|
+
): # make random uuids and truncate to lengths starting at 4 and working up to max 32
|
|
183
263
|
for _ in range(10): # make 10 attempts for each length
|
|
184
|
-
truncated_uuid: str = str(uuid.uuid4()).replace(
|
|
264
|
+
truncated_uuid: str = str(uuid.uuid4()).replace("-", "")[:i]
|
|
185
265
|
generated_dir_path: str = os.path.join(in_directory, truncated_uuid)
|
|
186
266
|
try:
|
|
187
267
|
os.mkdir(generated_dir_path)
|
|
@@ -194,6 +274,7 @@ def make_public_dir(in_directory: str, suggested_name: Optional[str] = None) ->
|
|
|
194
274
|
os.chmod(this_should_never_happen, 0o777)
|
|
195
275
|
return this_should_never_happen
|
|
196
276
|
|
|
277
|
+
|
|
197
278
|
def try_path(path: str, min_size: int = 100 * 1024 * 1024) -> Optional[str]:
|
|
198
279
|
"""
|
|
199
280
|
Try to use the given path. Return it if it exists or can be made,
|
|
@@ -291,3 +372,31 @@ class WriteWatchingStream:
|
|
|
291
372
|
"""
|
|
292
373
|
|
|
293
374
|
self.backingStream.close()
|
|
375
|
+
|
|
376
|
+
class ReadableFileObj(Protocol):
|
|
377
|
+
"""
|
|
378
|
+
Protocol that is more specific than what file_digest takes as an argument.
|
|
379
|
+
Also guarantees a read() method.
|
|
380
|
+
Would extend the protocol from Typeshed for hashlib but those are only
|
|
381
|
+
declared for 3.11+.
|
|
382
|
+
"""
|
|
383
|
+
def readinto(self, buf: bytearray, /) -> int: ...
|
|
384
|
+
def readable(self) -> bool: ...
|
|
385
|
+
def read(self, number: int) -> bytes: ...
|
|
386
|
+
|
|
387
|
+
# hashlib._Hash seems to not appear at runtime
|
|
388
|
+
def file_digest(f: ReadableFileObj, alg_name: str) -> "hashlib._Hash":
|
|
389
|
+
"""
|
|
390
|
+
Polyfilled hashlib.file_digest that works on Python <3.11.
|
|
391
|
+
"""
|
|
392
|
+
if sys.version_info >= (3, 11):
|
|
393
|
+
return hashlib.file_digest(f, alg_name)
|
|
394
|
+
BUFFER_SIZE = 1024 * 1024
|
|
395
|
+
hasher = hashlib.new(alg_name)
|
|
396
|
+
buffer = f.read(BUFFER_SIZE)
|
|
397
|
+
while buffer:
|
|
398
|
+
hasher.update(buffer)
|
|
399
|
+
buffer = f.read(BUFFER_SIZE)
|
|
400
|
+
return hasher
|
|
401
|
+
|
|
402
|
+
|
toil/lib/iterables.py
CHANGED
|
@@ -12,8 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from collections.abc import Iterable, Iterator
|
|
16
|
+
|
|
15
17
|
# 5.14.2018: copied into Toil from https://github.com/BD2KGenomics/bd2k-python-lib
|
|
16
|
-
from typing import Any,
|
|
18
|
+
from typing import Any, TypeVar
|
|
17
19
|
|
|
18
20
|
IT = TypeVar("IT")
|
|
19
21
|
|
|
@@ -102,7 +104,7 @@ class concat:
|
|
|
102
104
|
try:
|
|
103
105
|
i = x.__iter__()
|
|
104
106
|
except AttributeError:
|
|
105
|
-
i = x,
|
|
107
|
+
i = (x,)
|
|
106
108
|
else:
|
|
107
109
|
i = x
|
|
108
110
|
return i
|
toil/lib/memoize.py
CHANGED
|
@@ -17,7 +17,7 @@ import datetime
|
|
|
17
17
|
import re
|
|
18
18
|
from functools import lru_cache, wraps
|
|
19
19
|
from threading import Lock
|
|
20
|
-
from typing import Any, Callable,
|
|
20
|
+
from typing import Any, Callable, TypeVar
|
|
21
21
|
|
|
22
22
|
memoize = lru_cache(maxsize=None)
|
|
23
23
|
"""
|
|
@@ -31,13 +31,14 @@ more than once with the same arguments.
|
|
|
31
31
|
MAT = TypeVar("MAT")
|
|
32
32
|
MRT = TypeVar("MRT")
|
|
33
33
|
|
|
34
|
+
|
|
34
35
|
def sync_memoize(f: Callable[[MAT], MRT]) -> Callable[[MAT], MRT]:
|
|
35
36
|
"""
|
|
36
37
|
Like memoize, but guarantees that decorated function is only called once, even when multiple
|
|
37
38
|
threads are calling the decorating function with multiple parameters.
|
|
38
39
|
"""
|
|
39
40
|
# TODO: Think about an f that is recursive
|
|
40
|
-
memory:
|
|
41
|
+
memory: dict[tuple[Any, ...], Any] = {}
|
|
41
42
|
lock = Lock()
|
|
42
43
|
|
|
43
44
|
@wraps(f)
|
|
@@ -53,13 +54,14 @@ def sync_memoize(f: Callable[[MAT], MRT]) -> Callable[[MAT], MRT]:
|
|
|
53
54
|
r = f(*args)
|
|
54
55
|
memory[args] = r
|
|
55
56
|
return r
|
|
57
|
+
|
|
56
58
|
return new_f
|
|
57
59
|
|
|
58
60
|
|
|
59
61
|
def parse_iso_utc(s: str) -> datetime.datetime:
|
|
60
62
|
"""
|
|
61
63
|
Parses an ISO time with a hard-coded Z for zulu-time (UTC) at the end. Other timezones are
|
|
62
|
-
not supported. Returns a timezone-naive datetime object.
|
|
64
|
+
not supported. Returns a timezone-naive datetime object.
|
|
63
65
|
|
|
64
66
|
:param s: The ISO-formatted time
|
|
65
67
|
|
|
@@ -74,20 +76,22 @@ def parse_iso_utc(s: str) -> datetime.datetime:
|
|
|
74
76
|
...
|
|
75
77
|
ValueError: Not a valid ISO datetime in UTC: 2016-04-27T00:28:04X
|
|
76
78
|
"""
|
|
77
|
-
rfc3339_datetime = re.compile(
|
|
79
|
+
rfc3339_datetime = re.compile(
|
|
80
|
+
r"^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|[+-]\d{2}:\d{2})$"
|
|
81
|
+
)
|
|
78
82
|
m = rfc3339_datetime.match(s)
|
|
79
83
|
if not m:
|
|
80
|
-
raise ValueError(f
|
|
84
|
+
raise ValueError(f"Not a valid ISO datetime in UTC: {s}")
|
|
81
85
|
else:
|
|
82
|
-
fmt =
|
|
86
|
+
fmt = "%Y-%m-%dT%H:%M:%S" + (".%f" if m.group(7) else "") + "Z"
|
|
83
87
|
return datetime.datetime.strptime(s, fmt)
|
|
84
88
|
|
|
85
89
|
|
|
86
90
|
def strict_bool(s: str) -> bool:
|
|
87
91
|
"""Variant of bool() that only accepts two possible string values."""
|
|
88
|
-
if s ==
|
|
92
|
+
if s == "True":
|
|
89
93
|
return True
|
|
90
|
-
elif s ==
|
|
94
|
+
elif s == "False":
|
|
91
95
|
return False
|
|
92
96
|
else:
|
|
93
97
|
raise ValueError(s)
|
toil/lib/misc.py
CHANGED
|
@@ -7,9 +7,9 @@ import socket
|
|
|
7
7
|
import subprocess
|
|
8
8
|
import sys
|
|
9
9
|
import time
|
|
10
|
-
import
|
|
10
|
+
from collections.abc import Iterator
|
|
11
11
|
from contextlib import closing
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import Optional
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -21,19 +21,20 @@ def get_public_ip() -> str:
|
|
|
21
21
|
try:
|
|
22
22
|
# Try to get the internet-facing IP by attempting a connection
|
|
23
23
|
# to a non-existent server and reading what IP was used.
|
|
24
|
-
ip =
|
|
24
|
+
ip = "127.0.0.1"
|
|
25
25
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) as sock:
|
|
26
26
|
# 203.0.113.0/24 is reserved as TEST-NET-3 by RFC 5737, so
|
|
27
27
|
# there is guaranteed to be no one listening on the other
|
|
28
28
|
# end (and we won't accidentally DOS anyone).
|
|
29
|
-
sock.connect((
|
|
29
|
+
sock.connect(("203.0.113.1", 1))
|
|
30
30
|
ip = sock.getsockname()[0]
|
|
31
31
|
return ip
|
|
32
32
|
except:
|
|
33
33
|
# Something went terribly wrong. Just give loopback rather
|
|
34
34
|
# than killing everything, because this is often called just
|
|
35
35
|
# to provide a default argument
|
|
36
|
-
return
|
|
36
|
+
return "127.0.0.1"
|
|
37
|
+
|
|
37
38
|
|
|
38
39
|
def get_user_name() -> str:
|
|
39
40
|
"""
|
|
@@ -46,20 +47,41 @@ def get_user_name() -> str:
|
|
|
46
47
|
except KeyError:
|
|
47
48
|
# This is expected if the user isn't in /etc/passwd, such as in a
|
|
48
49
|
# Docker container when running as a weird UID. Make something up.
|
|
49
|
-
return
|
|
50
|
+
return "UnknownUser" + str(os.getuid())
|
|
50
51
|
except Exception as e:
|
|
51
52
|
# We can't get the UID, or something weird has gone wrong.
|
|
52
|
-
logger.error(
|
|
53
|
-
return
|
|
53
|
+
logger.error("Unexpected error getting user name: %s", e)
|
|
54
|
+
return "UnknownUser"
|
|
55
|
+
|
|
54
56
|
|
|
55
57
|
def utc_now() -> datetime.datetime:
|
|
56
58
|
"""Return a datetime in the UTC timezone corresponding to right now."""
|
|
57
59
|
return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
|
|
58
60
|
|
|
61
|
+
|
|
59
62
|
def unix_now_ms() -> float:
|
|
60
63
|
"""Return the current time in milliseconds since the Unix epoch."""
|
|
61
64
|
return time.time() * 1000
|
|
62
65
|
|
|
66
|
+
def unix_seconds_to_timestamp(timestamp: float) -> str:
|
|
67
|
+
"""
|
|
68
|
+
Convert a time in seconds since the Unix epoch to an ISO 8601 string.
|
|
69
|
+
"""
|
|
70
|
+
return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
|
|
71
|
+
|
|
72
|
+
def unix_seconds_to_local_time(timestamp: float) -> datetime.datetime:
|
|
73
|
+
"""
|
|
74
|
+
Returns a local time corresponding to the given number of seconds since the Unix epoch.
|
|
75
|
+
"""
|
|
76
|
+
return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).astimezone()
|
|
77
|
+
|
|
78
|
+
def seconds_to_duration(time_difference: float) -> str:
|
|
79
|
+
"""
|
|
80
|
+
Convert a time difference in seconds to an ISO 8601 duration string.
|
|
81
|
+
"""
|
|
82
|
+
return f"PT{time_difference:.3f}S"
|
|
83
|
+
|
|
84
|
+
|
|
63
85
|
def slow_down(seconds: float) -> float:
|
|
64
86
|
"""
|
|
65
87
|
Toil jobs that have completed are not allowed to have taken 0 seconds, but
|
|
@@ -77,9 +99,25 @@ def slow_down(seconds: float) -> float:
|
|
|
77
99
|
|
|
78
100
|
return max(seconds, sys.float_info.epsilon)
|
|
79
101
|
|
|
80
|
-
|
|
102
|
+
|
|
103
|
+
def printq(msg: str, quiet: bool, log: bool = False) -> None:
|
|
104
|
+
"""
|
|
105
|
+
This is for functions used simultaneously in Toil proper and in the admin scripts.
|
|
106
|
+
|
|
107
|
+
Our admin scripts "print" to stdout, while Toil proper uses logging. For a script that,
|
|
108
|
+
for example, cleans up IAM, EC2, etc. cruft leftover after failed CI runs, we can call
|
|
109
|
+
an AWS delete IAM role function, and this prints or logs progress (unless quiet is True),
|
|
110
|
+
depending on whether the function is called in, say, the jobstore or a script.
|
|
111
|
+
|
|
112
|
+
:param msg: The string to print or log to stdout.
|
|
113
|
+
:param quiet: Silent output to stdout.
|
|
114
|
+
:param log: Use logging (else "print" to the screen).
|
|
115
|
+
"""
|
|
81
116
|
if not quiet:
|
|
82
|
-
|
|
117
|
+
if not log:
|
|
118
|
+
print(msg)
|
|
119
|
+
else:
|
|
120
|
+
logger.debug(msg)
|
|
83
121
|
|
|
84
122
|
|
|
85
123
|
def truncExpBackoff() -> Iterator[float]:
|
|
@@ -102,12 +140,23 @@ class CalledProcessErrorStderr(subprocess.CalledProcessError):
|
|
|
102
140
|
if (self.returncode < 0) or (self.stderr is None):
|
|
103
141
|
return str(super())
|
|
104
142
|
else:
|
|
105
|
-
err =
|
|
143
|
+
err = (
|
|
144
|
+
self.stderr
|
|
145
|
+
if isinstance(self.stderr, str)
|
|
146
|
+
else self.stderr.decode("ascii", errors="replace")
|
|
147
|
+
)
|
|
106
148
|
return "Command '%s' exit status %d: %s" % (self.cmd, self.returncode, err)
|
|
107
149
|
|
|
108
150
|
|
|
109
|
-
def call_command(
|
|
110
|
-
|
|
151
|
+
def call_command(
|
|
152
|
+
cmd: list[str],
|
|
153
|
+
*args: str,
|
|
154
|
+
input: Optional[str] = None,
|
|
155
|
+
timeout: Optional[float] = None,
|
|
156
|
+
useCLocale: bool = True,
|
|
157
|
+
env: Optional[dict[str, str]] = None,
|
|
158
|
+
quiet: Optional[bool] = False
|
|
159
|
+
) -> str:
|
|
111
160
|
"""
|
|
112
161
|
Simplified calling of external commands.
|
|
113
162
|
|
|
@@ -138,14 +187,30 @@ def call_command(cmd: List[str], *args: str, input: Optional[str] = None, timeou
|
|
|
138
187
|
|
|
139
188
|
logger.debug("run command: {}".format(" ".join(cmd)))
|
|
140
189
|
start_time = datetime.datetime.now()
|
|
141
|
-
proc = subprocess.Popen(
|
|
142
|
-
|
|
190
|
+
proc = subprocess.Popen(
|
|
191
|
+
cmd,
|
|
192
|
+
stdout=subprocess.PIPE,
|
|
193
|
+
stderr=subprocess.PIPE,
|
|
194
|
+
encoding="utf-8",
|
|
195
|
+
errors="replace",
|
|
196
|
+
env=env,
|
|
197
|
+
)
|
|
143
198
|
stdout, stderr = proc.communicate(input=input, timeout=timeout)
|
|
144
199
|
end_time = datetime.datetime.now()
|
|
145
200
|
runtime = (end_time - start_time).total_seconds()
|
|
146
201
|
sys.stderr.write(stderr)
|
|
147
202
|
if proc.returncode != 0:
|
|
148
|
-
logger.debug(
|
|
149
|
-
|
|
150
|
-
|
|
203
|
+
logger.debug(
|
|
204
|
+
"command failed in {}s: {}: {}".format(
|
|
205
|
+
runtime, " ".join(cmd), stderr.rstrip()
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
raise CalledProcessErrorStderr(
|
|
209
|
+
proc.returncode, cmd, output=stdout, stderr=stderr
|
|
210
|
+
)
|
|
211
|
+
logger.debug(
|
|
212
|
+
"command succeeded in {}s: {}{}".format(
|
|
213
|
+
runtime, " ".join(cmd), (": " + stdout.rstrip()) if not quiet else ""
|
|
214
|
+
)
|
|
215
|
+
)
|
|
151
216
|
return stdout
|
toil/lib/objects.py
CHANGED
|
@@ -126,10 +126,10 @@ class InnerClass:
|
|
|
126
126
|
if instance is None:
|
|
127
127
|
return self.inner_class
|
|
128
128
|
else:
|
|
129
|
-
return self._bind(
|
|
129
|
+
return self._bind(instance)
|
|
130
130
|
|
|
131
131
|
@sync_memoize
|
|
132
|
-
def _bind(
|
|
132
|
+
def _bind(self, _outer):
|
|
133
133
|
class BoundInner(self.inner_class):
|
|
134
134
|
outer = _outer
|
|
135
135
|
|
toil/lib/resources.py
CHANGED
|
@@ -12,11 +12,11 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import fnmatch
|
|
15
|
-
import os
|
|
16
15
|
import math
|
|
17
|
-
import
|
|
16
|
+
import os
|
|
18
17
|
import resource
|
|
19
|
-
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
20
|
|
|
21
21
|
class ResourceMonitor:
|
|
22
22
|
"""
|
|
@@ -52,14 +52,20 @@ class ResourceMonitor:
|
|
|
52
52
|
cls._extra_cpu_seconds += seconds
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
|
-
def get_total_cpu_time_and_memory_usage(cls) ->
|
|
55
|
+
def get_total_cpu_time_and_memory_usage(cls) -> tuple[float, int]:
|
|
56
56
|
"""
|
|
57
57
|
Gives the total cpu time of itself and all its children, and the maximum RSS memory usage of
|
|
58
58
|
itself and its single largest child (in kibibytes).
|
|
59
59
|
"""
|
|
60
60
|
me = resource.getrusage(resource.RUSAGE_SELF)
|
|
61
61
|
children = resource.getrusage(resource.RUSAGE_CHILDREN)
|
|
62
|
-
total_cpu_time =
|
|
62
|
+
total_cpu_time = (
|
|
63
|
+
me.ru_utime
|
|
64
|
+
+ me.ru_stime
|
|
65
|
+
+ children.ru_utime
|
|
66
|
+
+ children.ru_stime
|
|
67
|
+
+ cls._extra_cpu_seconds
|
|
68
|
+
)
|
|
63
69
|
total_memory_usage = me.ru_maxrss + children.ru_maxrss
|
|
64
70
|
if sys.platform == "darwin":
|
|
65
71
|
# On Linux, getrusage works in "kilobytes" (really kibibytes), but on
|
|
@@ -74,10 +80,16 @@ class ResourceMonitor:
|
|
|
74
80
|
"""Gives the total cpu time, including the children."""
|
|
75
81
|
me = resource.getrusage(resource.RUSAGE_SELF)
|
|
76
82
|
childs = resource.getrusage(resource.RUSAGE_CHILDREN)
|
|
77
|
-
return
|
|
83
|
+
return (
|
|
84
|
+
me.ru_utime
|
|
85
|
+
+ me.ru_stime
|
|
86
|
+
+ childs.ru_utime
|
|
87
|
+
+ childs.ru_stime
|
|
88
|
+
+ cls._extra_cpu_seconds
|
|
89
|
+
)
|
|
78
90
|
|
|
79
91
|
|
|
80
|
-
def glob(glob_pattern: str, directoryname: str) ->
|
|
92
|
+
def glob(glob_pattern: str, directoryname: str) -> list[str]:
|
|
81
93
|
"""
|
|
82
94
|
Walks through a directory and its subdirectories looking for files matching
|
|
83
95
|
the glob_pattern and returns a list=[].
|