toil 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +19 -4
- toil/batchSystems/abstractGridEngineBatchSystem.py +22 -22
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/slurm.py +85 -14
- toil/bus.py +38 -0
- toil/common.py +20 -18
- toil/cwl/cwltoil.py +81 -63
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +53 -4
- toil/fileStores/cachingFileStore.py +4 -20
- toil/fileStores/nonCachingFileStore.py +5 -14
- toil/job.py +46 -30
- toil/jobStores/abstractJobStore.py +21 -23
- toil/jobStores/aws/utils.py +5 -4
- toil/jobStores/fileJobStore.py +1 -1
- toil/leader.py +17 -14
- toil/lib/conversions.py +19 -0
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +28 -2
- toil/lib/resources.py +8 -1
- toil/lib/threading.py +27 -12
- toil/options/common.py +5 -7
- toil/options/wdl.py +5 -0
- toil/provisioners/abstractProvisioner.py +8 -0
- toil/statsAndLogging.py +36 -8
- toil/test/batchSystems/test_slurm.py +21 -6
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +243 -151
- toil/test/docs/scriptsTest.py +2 -2
- toil/test/jobStores/jobStoreTest.py +7 -5
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/clusterTest.py +9 -8
- toil/test/utils/toilDebugTest.py +1 -1
- toil/test/utils/utilsTest.py +3 -3
- toil/test/wdl/wdltoil_test.py +91 -16
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilStats.py +309 -266
- toil/utils/toilStatus.py +1 -1
- toil/version.py +9 -9
- toil/wdl/wdltoil.py +341 -189
- toil/worker.py +31 -16
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/METADATA +6 -7
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/RECORD +51 -47
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/WHEEL +0 -0
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -0
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/jobStores/aws/utils.py
CHANGED
|
@@ -17,13 +17,12 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import types
|
|
19
19
|
from ssl import SSLError
|
|
20
|
-
from typing import Optional, cast
|
|
20
|
+
from typing import Optional, cast, TYPE_CHECKING
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
23
|
from boto.exception import SDBResponseError
|
|
24
24
|
from botocore.client import Config
|
|
25
25
|
from botocore.exceptions import ClientError
|
|
26
|
-
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
27
26
|
|
|
28
27
|
from toil.lib.aws import session
|
|
29
28
|
from toil.lib.aws.utils import connection_reset, get_bucket_region
|
|
@@ -36,6 +35,8 @@ from toil.lib.retry import (DEFAULT_DELAYS,
|
|
|
36
35
|
get_error_status,
|
|
37
36
|
old_retry,
|
|
38
37
|
retry)
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
39
40
|
|
|
40
41
|
logger = logging.getLogger(__name__)
|
|
41
42
|
|
|
@@ -290,7 +291,7 @@ class ServerSideCopyProhibitedError(RuntimeError):
|
|
|
290
291
|
error=ClientError,
|
|
291
292
|
error_codes=[404, 500, 502, 503, 504]
|
|
292
293
|
)])
|
|
293
|
-
def copyKeyMultipart(resource: S3ServiceResource,
|
|
294
|
+
def copyKeyMultipart(resource: "S3ServiceResource",
|
|
294
295
|
srcBucketName: str,
|
|
295
296
|
srcKeyName: str,
|
|
296
297
|
srcKeyVersion: str,
|
|
@@ -346,7 +347,7 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
346
347
|
# not wherever the bucket virtual hostnames go.
|
|
347
348
|
source_region = get_bucket_region(srcBucketName)
|
|
348
349
|
source_client = cast(
|
|
349
|
-
S3Client,
|
|
350
|
+
"S3Client",
|
|
350
351
|
session.client(
|
|
351
352
|
's3',
|
|
352
353
|
region_name=source_region,
|
toil/jobStores/fileJobStore.py
CHANGED
|
@@ -920,7 +920,7 @@ class FileJobStore(AbstractJobStore):
|
|
|
920
920
|
:raise NoSuchFileException: if the file with ID jobStoreFileID does
|
|
921
921
|
not exist or is not a file
|
|
922
922
|
"""
|
|
923
|
-
if not self.file_exists(
|
|
923
|
+
if not self.file_exists(jobStoreFileID):
|
|
924
924
|
raise NoSuchFileException(jobStoreFileID)
|
|
925
925
|
|
|
926
926
|
def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
|
toil/leader.py
CHANGED
|
@@ -28,14 +28,16 @@ import enlighten
|
|
|
28
28
|
from toil import resolveEntryPoint
|
|
29
29
|
from toil.batchSystems import DeadlockException
|
|
30
30
|
from toil.batchSystems.abstractBatchSystem import (AbstractBatchSystem,
|
|
31
|
-
BatchJobExitReason
|
|
31
|
+
BatchJobExitReason,
|
|
32
|
+
EXIT_STATUS_UNAVAILABLE_VALUE)
|
|
32
33
|
from toil.bus import (JobCompletedMessage,
|
|
33
34
|
JobFailedMessage,
|
|
34
35
|
JobIssuedMessage,
|
|
35
36
|
JobMissingMessage,
|
|
36
37
|
JobUpdatedMessage,
|
|
37
38
|
QueueSizeMessage,
|
|
38
|
-
gen_message_bus_path
|
|
39
|
+
gen_message_bus_path,
|
|
40
|
+
get_job_kind)
|
|
39
41
|
from toil.common import Config, ToilMetrics
|
|
40
42
|
from toil.cwl.utils import CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
41
43
|
from toil.exceptions import FailedJobsException
|
|
@@ -705,8 +707,9 @@ class Leader:
|
|
|
705
707
|
if exitStatus == 0:
|
|
706
708
|
logger.debug('Job ended: %s', updatedJob)
|
|
707
709
|
else:
|
|
708
|
-
|
|
709
|
-
|
|
710
|
+
status_string = str(exitStatus) if exitStatus != EXIT_STATUS_UNAVAILABLE_VALUE else "<UNAVAILABLE>"
|
|
711
|
+
logger.warning(f'Job failed with exit value {status_string}: {updatedJob}\n'
|
|
712
|
+
f'Exit reason: {BatchJobExitReason.to_string(exitReason)}')
|
|
710
713
|
if exitStatus == CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE:
|
|
711
714
|
# This is a CWL job informing us that the workflow is
|
|
712
715
|
# asking things of us that Toil can't do. When we raise an
|
|
@@ -715,7 +718,7 @@ class Leader:
|
|
|
715
718
|
logger.warning("This indicates an unsupported CWL requirement!")
|
|
716
719
|
self.recommended_fail_exit_code = CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE
|
|
717
720
|
# Tell everyone it stopped running.
|
|
718
|
-
self._messages.publish(JobCompletedMessage(updatedJob.
|
|
721
|
+
self._messages.publish(JobCompletedMessage(get_job_kind(updatedJob.get_names()), updatedJob.jobStoreID, exitStatus))
|
|
719
722
|
self.process_finished_job(bsID, exitStatus, wall_time=wallTime, exit_reason=exitReason)
|
|
720
723
|
|
|
721
724
|
def _processLostJobs(self):
|
|
@@ -921,7 +924,7 @@ class Leader:
|
|
|
921
924
|
"%s and %s",
|
|
922
925
|
jobNode, str(jobBatchSystemID), jobNode.requirements_string())
|
|
923
926
|
# Tell everyone it is issued and the queue size changed
|
|
924
|
-
self._messages.publish(JobIssuedMessage(jobNode.
|
|
927
|
+
self._messages.publish(JobIssuedMessage(get_job_kind(jobNode.get_names()), jobNode.jobStoreID, jobBatchSystemID))
|
|
925
928
|
self._messages.publish(QueueSizeMessage(self.getNumberOfJobsIssued()))
|
|
926
929
|
# Tell the user there's another job to do
|
|
927
930
|
self.progress_overall.total += 1
|
|
@@ -1157,7 +1160,7 @@ class Leader:
|
|
|
1157
1160
|
self.progress_overall.update(incr=-1)
|
|
1158
1161
|
self.progress_failed.update(incr=1)
|
|
1159
1162
|
|
|
1160
|
-
# Delegate to the
|
|
1163
|
+
# Delegate to the version that uses a JobDescription
|
|
1161
1164
|
return self.process_finished_job_description(issued_job, result_status, wall_time, exit_reason, batch_system_id)
|
|
1162
1165
|
|
|
1163
1166
|
def process_finished_job_description(self, finished_job: JobDescription, result_status: int,
|
|
@@ -1208,11 +1211,12 @@ class Leader:
|
|
|
1208
1211
|
# more memory efficient than read().striplines() while leaving off the
|
|
1209
1212
|
# trailing \n left when using readlines()
|
|
1210
1213
|
# http://stackoverflow.com/a/15233739
|
|
1211
|
-
StatsAndLogging.logWithFormatting(job_store_id, log_stream, method=logger.warning,
|
|
1214
|
+
StatsAndLogging.logWithFormatting(f'Log from job "{job_store_id}"', log_stream, method=logger.warning,
|
|
1212
1215
|
message='The job seems to have left a log file, indicating failure: %s' % replacement_job)
|
|
1213
1216
|
if self.config.writeLogs or self.config.writeLogsGzip:
|
|
1214
1217
|
with replacement_job.getLogFileHandle(self.jobStore) as log_stream:
|
|
1215
|
-
|
|
1218
|
+
# Send log data from the job store to each per-job log file involved.
|
|
1219
|
+
StatsAndLogging.writeLogFiles([names.stats_name for names in replacement_job.get_chain()], log_stream, self.config, failed=True)
|
|
1216
1220
|
if result_status != 0:
|
|
1217
1221
|
# If the batch system returned a non-zero exit code then the worker
|
|
1218
1222
|
# is assumed not to have captured the failure of the job, so we
|
|
@@ -1236,13 +1240,12 @@ class Leader:
|
|
|
1236
1240
|
else:
|
|
1237
1241
|
with log_stream:
|
|
1238
1242
|
if os.path.getsize(log_file) > 0:
|
|
1239
|
-
StatsAndLogging.logWithFormatting(job_store_id, log_stream, method=logger.warning,
|
|
1243
|
+
StatsAndLogging.logWithFormatting(f'Log from job "{job_store_id}"', log_stream, method=logger.warning,
|
|
1240
1244
|
message='The batch system left a non-empty file %s:' % log_file)
|
|
1241
1245
|
if self.config.writeLogs or self.config.writeLogsGzip:
|
|
1242
1246
|
file_root, _ = os.path.splitext(os.path.basename(log_file))
|
|
1243
|
-
job_names = replacement_job.
|
|
1244
|
-
|
|
1245
|
-
job_names = [str(replacement_job)]
|
|
1247
|
+
job_names = [names.stats_name for names in replacement_job.get_chain()]
|
|
1248
|
+
# Tack the batch system log file name onto each job's name
|
|
1246
1249
|
job_names = [j + '_' + file_root for j in job_names]
|
|
1247
1250
|
log_stream.seek(0)
|
|
1248
1251
|
StatsAndLogging.writeLogFiles(job_names, log_stream, self.config, failed=True)
|
|
@@ -1309,7 +1312,7 @@ class Leader:
|
|
|
1309
1312
|
|
|
1310
1313
|
# Tell everyone it failed
|
|
1311
1314
|
|
|
1312
|
-
self._messages.publish(JobFailedMessage(job_desc.
|
|
1315
|
+
self._messages.publish(JobFailedMessage(get_job_kind(job_desc.get_names()), job_id))
|
|
1313
1316
|
|
|
1314
1317
|
if job_id in self.toilState.service_to_client:
|
|
1315
1318
|
# Is a service job
|
toil/lib/conversions.py
CHANGED
|
@@ -128,3 +128,22 @@ def hms_duration_to_seconds(hms: str) -> float:
|
|
|
128
128
|
seconds += float(vals_to_convert[2])
|
|
129
129
|
|
|
130
130
|
return seconds
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def strtobool(val: str) -> bool:
|
|
134
|
+
"""
|
|
135
|
+
Make a human-readable string into a bool.
|
|
136
|
+
|
|
137
|
+
Convert a string along the lines of "y", "1", "ON", "TrUe", or
|
|
138
|
+
"Yes" to True, and the corresponding false-ish values to False.
|
|
139
|
+
"""
|
|
140
|
+
# We only track prefixes, so "y" covers "y", "yes",
|
|
141
|
+
# and "yeah no" and makes them all True.
|
|
142
|
+
TABLE = {True: ["1", "on", "y", "t"], False: ["0", "off", "n", "f"]}
|
|
143
|
+
lowered = val.lower()
|
|
144
|
+
for result, prefixes in TABLE.items():
|
|
145
|
+
for prefix in prefixes:
|
|
146
|
+
if lowered.startswith(prefix):
|
|
147
|
+
return result
|
|
148
|
+
raise ValueError(f"Cannot convert \"{val}\" to a bool")
|
|
149
|
+
|