toil 8.2.0__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +13 -5
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -5
- toil/batchSystems/kubernetes.py +13 -2
- toil/batchSystems/mesos/batchSystem.py +33 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/slurm.py +191 -16
- toil/common.py +20 -1
- toil/cwl/cwltoil.py +97 -119
- toil/cwl/utils.py +103 -3
- toil/fileStores/__init__.py +1 -1
- toil/fileStores/abstractFileStore.py +5 -2
- toil/fileStores/cachingFileStore.py +1 -1
- toil/job.py +30 -14
- toil/jobStores/abstractJobStore.py +35 -255
- toil/jobStores/aws/jobStore.py +864 -1964
- toil/jobStores/aws/utils.py +24 -270
- toil/jobStores/fileJobStore.py +2 -1
- toil/jobStores/googleJobStore.py +32 -13
- toil/jobStores/utils.py +0 -327
- toil/leader.py +27 -22
- toil/lib/accelerators.py +1 -1
- toil/lib/aws/config.py +22 -0
- toil/lib/aws/s3.py +477 -9
- toil/lib/aws/utils.py +22 -33
- toil/lib/checksum.py +88 -0
- toil/lib/conversions.py +33 -31
- toil/lib/directory.py +217 -0
- toil/lib/ec2.py +97 -29
- toil/lib/exceptions.py +2 -1
- toil/lib/expando.py +2 -2
- toil/lib/generatedEC2Lists.py +138 -19
- toil/lib/io.py +33 -2
- toil/lib/memoize.py +21 -7
- toil/lib/misc.py +1 -1
- toil/lib/pipes.py +385 -0
- toil/lib/plugins.py +106 -0
- toil/lib/retry.py +1 -1
- toil/lib/threading.py +1 -1
- toil/lib/url.py +320 -0
- toil/lib/web.py +4 -5
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +12 -1
- toil/provisioners/__init__.py +5 -2
- toil/provisioners/aws/__init__.py +43 -36
- toil/provisioners/aws/awsProvisioner.py +47 -15
- toil/provisioners/node.py +60 -12
- toil/resource.py +3 -13
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +2 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +16 -18
- toil/test/batchSystems/batchSystemTest.py +2 -9
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +103 -14
- toil/test/cwl/cwlTest.py +181 -8
- toil/test/cwl/staging_cat.cwl +27 -0
- toil/test/cwl/staging_make_file.cwl +25 -0
- toil/test/cwl/staging_workflow.cwl +43 -0
- toil/test/cwl/zero_default.cwl +61 -0
- toil/test/docs/scripts/tutorial_staging.py +17 -8
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/jobStores/jobStoreTest.py +23 -133
- toil/test/lib/aws/test_iam.py +7 -7
- toil/test/lib/aws/test_s3.py +30 -33
- toil/test/lib/aws/test_utils.py +9 -9
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +60 -7
- toil/test/provisioners/clusterTest.py +15 -2
- toil/test/provisioners/gceProvisionerTest.py +1 -1
- toil/test/server/serverTest.py +78 -36
- toil/test/src/autoDeploymentTest.py +2 -3
- toil/test/src/fileStoreTest.py +89 -87
- toil/test/utils/ABCWorkflowDebug/ABC.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +4 -4
- toil/test/utils/toilKillTest.py +35 -28
- toil/test/wdl/md5sum/md5sum-gs.json +1 -1
- toil/test/wdl/md5sum/md5sum.json +1 -1
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
- toil/test/wdl/wdltoil_test.py +171 -162
- toil/test/wdl/wdltoil_test_kubernetes.py +9 -0
- toil/utils/toilDebugFile.py +6 -3
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilStats.py +17 -2
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +1179 -825
- toil/worker.py +16 -8
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/METADATA +32 -32
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/RECORD +97 -85
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/WHEEL +1 -1
- toil/lib/iterables.py +0 -112
- toil/test/docs/scripts/stagingExampleFiles/in.txt +0 -1
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/entry_points.txt +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {toil-8.2.0.dist-info → toil-9.1.0.dist-info}/top_level.txt +0 -0
toil/cwl/utils.py
CHANGED
|
@@ -20,11 +20,26 @@ import posixpath
|
|
|
20
20
|
import stat
|
|
21
21
|
from collections.abc import Iterable, MutableMapping, MutableSequence
|
|
22
22
|
from pathlib import PurePosixPath
|
|
23
|
-
from typing import
|
|
24
|
-
|
|
23
|
+
from typing import (
|
|
24
|
+
Any,
|
|
25
|
+
Callable,
|
|
26
|
+
TypeVar,
|
|
27
|
+
Union,
|
|
28
|
+
Optional,
|
|
29
|
+
cast,
|
|
30
|
+
MutableSequence,
|
|
31
|
+
MutableMapping,
|
|
32
|
+
TYPE_CHECKING,
|
|
33
|
+
)
|
|
34
|
+
from urllib.parse import unquote, urlparse
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
# This module needs to be importable even if cwltool is not installed.
|
|
38
|
+
from cwltool.utils import CWLObjectType, CWLOutputType
|
|
25
39
|
from toil.fileStores import FileID
|
|
26
40
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
27
41
|
from toil.jobStores.abstractJobStore import AbstractJobStore
|
|
42
|
+
from toil.lib.url import URLAccess
|
|
28
43
|
|
|
29
44
|
logger = logging.getLogger(__name__)
|
|
30
45
|
|
|
@@ -208,7 +223,7 @@ def download_structure(
|
|
|
208
223
|
)
|
|
209
224
|
else:
|
|
210
225
|
# We need to download from some other kind of URL.
|
|
211
|
-
size, executable =
|
|
226
|
+
size, executable = URLAccess.read_from_url(
|
|
212
227
|
value, open(dest_path, "wb")
|
|
213
228
|
)
|
|
214
229
|
if executable:
|
|
@@ -219,3 +234,88 @@ def download_structure(
|
|
|
219
234
|
# TODO: why?
|
|
220
235
|
index[dest_path] = value
|
|
221
236
|
existing[value] = dest_path
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def trim_mounts_op_down(file_or_directory: "CWLObjectType") -> None:
|
|
240
|
+
"""
|
|
241
|
+
No-op function for mount-point trimming.
|
|
242
|
+
"""
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def sniff_location(file_or_directory: "CWLObjectType") -> Optional[str]:
|
|
247
|
+
"""
|
|
248
|
+
Get the local bare path for a CWL file or directory, or None.
|
|
249
|
+
|
|
250
|
+
:return: None if we don't have a local path or file URI
|
|
251
|
+
"""
|
|
252
|
+
if file_or_directory.get('location') is None and file_or_directory.get('path') is None:
|
|
253
|
+
# file or directory is defined by contents or listing respectively, this is not redundant
|
|
254
|
+
return None
|
|
255
|
+
# Since we only consider mountable paths, if path is not file URI or bare path, don't consider it
|
|
256
|
+
path_or_url = cast(str, file_or_directory.get('location') or file_or_directory.get('path'))
|
|
257
|
+
parsed = urlparse(path_or_url)
|
|
258
|
+
if parsed.scheme == 'file':
|
|
259
|
+
return unquote(parsed.path)
|
|
260
|
+
elif parsed.scheme == '':
|
|
261
|
+
return path_or_url
|
|
262
|
+
else:
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def trim_mounts_op_up(file_or_directory: "CWLObjectType", op_down_ret: None, child_results: list[bool]) -> bool:
|
|
267
|
+
"""
|
|
268
|
+
Remove subtrees of the CWL file or directory object tree that only have redundant stuff in them.
|
|
269
|
+
|
|
270
|
+
Nonredundant for something in a directory means its path or location is not within the parent directory or doesn't match its basename
|
|
271
|
+
Nonredundant for something in a secondary file means its path or location is not adjacent to the primary file or doesn't match its basename
|
|
272
|
+
|
|
273
|
+
If on a File:
|
|
274
|
+
Returns True if anything in secondary files is nonredundant or has nonredundant children to this file, false otherwise
|
|
275
|
+
If on a Directory:
|
|
276
|
+
Returns True if anything in top level listing is nonredundant or has nonredundant children, otherwise false.
|
|
277
|
+
If something in the listing is redundant and all children are redundant, then delete it
|
|
278
|
+
:param file_or_directory: CWL file or CWL directory type
|
|
279
|
+
:return: boolean
|
|
280
|
+
"""
|
|
281
|
+
own_path = sniff_location(file_or_directory)
|
|
282
|
+
if own_path is None:
|
|
283
|
+
return True
|
|
284
|
+
# basename should be set as we are the implementation
|
|
285
|
+
own_basename = cast(str, file_or_directory['basename'])
|
|
286
|
+
|
|
287
|
+
# If the basename does not match the path, then this is nonredundant
|
|
288
|
+
if not own_path.endswith("/" + own_basename):
|
|
289
|
+
return True
|
|
290
|
+
|
|
291
|
+
if file_or_directory['class'] == 'File':
|
|
292
|
+
if any(child_results):
|
|
293
|
+
# one of the children was detected as not redundant
|
|
294
|
+
return True
|
|
295
|
+
for secondary in cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('secondaryFiles', [])):
|
|
296
|
+
# secondary files should already be flagged nonredundant if they don't have either a path or location
|
|
297
|
+
secondary_path = sniff_location(secondary)
|
|
298
|
+
secondary_basename = cast(str, secondary['basename'])
|
|
299
|
+
# If we swap the secondary basename for the primary basename in the primary path, and they don't match, then they are nonredundant
|
|
300
|
+
if os.path.join(own_path[:-len(own_basename)], secondary_basename) != secondary_path:
|
|
301
|
+
return True
|
|
302
|
+
else:
|
|
303
|
+
listings = cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('listing', []))
|
|
304
|
+
if len(listings) == 0:
|
|
305
|
+
return False
|
|
306
|
+
# We assume child_results is in the same order as the directory listing
|
|
307
|
+
# iterate backwards to avoid iteration issues
|
|
308
|
+
for i in range(len(listings) - 1, -1, -1):
|
|
309
|
+
if child_results[i] is False:
|
|
310
|
+
if os.path.join(own_path, cast(str, listings[i]['basename'])) == sniff_location(listings[i]):
|
|
311
|
+
del listings[i]
|
|
312
|
+
# If one of the listings was nonredundant, then this directory is also nonredundant
|
|
313
|
+
if any(child_results):
|
|
314
|
+
return True
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
def remove_redundant_mounts(cwljob: "CWLObjectType") -> None:
|
|
318
|
+
"""
|
|
319
|
+
Remove any redundant mount points from the listing. Modifies the CWL object in place.
|
|
320
|
+
"""
|
|
321
|
+
visit_cwl_class_and_reduce(cwljob, ["Directory", "File"], trim_mounts_op_down, trim_mounts_op_up)
|
toil/fileStores/__init__.py
CHANGED
|
@@ -28,7 +28,7 @@ class FileID(str):
|
|
|
28
28
|
the job store if unavailable in the ID.
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
|
-
def __new__(cls, fileStoreID: str, *args: Any) -> "FileID":
|
|
31
|
+
def __new__(cls, fileStoreID: str, *args: Any, **kwargs: dict[str, Any]) -> "FileID":
|
|
32
32
|
return super().__new__(cls, fileStoreID)
|
|
33
33
|
|
|
34
34
|
def __init__(self, fileStoreID: str, size: int, executable: bool = False) -> None:
|
|
@@ -671,13 +671,16 @@ class AbstractFileStore(ABC):
|
|
|
671
671
|
Send a logging message to the leader. The message will also be \
|
|
672
672
|
logged by the worker at the same level.
|
|
673
673
|
|
|
674
|
+
Does not depend on the commit system, so this is safe to use during an
|
|
675
|
+
ansynchronous commit, or without a commit afterward.
|
|
676
|
+
|
|
674
677
|
:param text: The string to log.
|
|
675
678
|
:param level: The logging level.
|
|
676
679
|
"""
|
|
677
|
-
logger.log(level=level, msg=("LOG-TO-
|
|
680
|
+
logger.log(level=level, msg=("LOG-TO-LEADER: " + text))
|
|
678
681
|
self.logging_messages.append(dict(text=text, level=level))
|
|
679
682
|
|
|
680
|
-
@deprecated(new_function_name="
|
|
683
|
+
@deprecated(new_function_name="log_to_leader")
|
|
681
684
|
def logToMaster(self, text: str, level: int = logging.INFO) -> None:
|
|
682
685
|
self.log_to_leader(text, level)
|
|
683
686
|
|
|
@@ -1207,7 +1207,7 @@ class CachingFileStore(AbstractFileStore):
|
|
|
1207
1207
|
# its temp dir and database entry.
|
|
1208
1208
|
self._deallocateSpaceForJob()
|
|
1209
1209
|
|
|
1210
|
-
def writeGlobalFile(self, localFileName, cleanup=False
|
|
1210
|
+
def writeGlobalFile(self, localFileName, cleanup=False):
|
|
1211
1211
|
"""
|
|
1212
1212
|
Creates a file in the jobstore and returns a FileID reference.
|
|
1213
1213
|
"""
|
toil/job.py
CHANGED
|
@@ -236,16 +236,16 @@ def parse_accelerator(
|
|
|
236
236
|
{'count': 1, 'kind': 'gpu'}
|
|
237
237
|
|
|
238
238
|
>>> parse_accelerator("nvidia-tesla-k80")
|
|
239
|
-
{'count': 1, 'kind': 'gpu', '
|
|
239
|
+
{'count': 1, 'kind': 'gpu', 'model': 'nvidia-tesla-k80', 'brand': 'nvidia'}
|
|
240
240
|
|
|
241
241
|
>>> parse_accelerator("nvidia-tesla-k80:2")
|
|
242
|
-
{'count': 2, 'kind': 'gpu', '
|
|
242
|
+
{'count': 2, 'kind': 'gpu', 'model': 'nvidia-tesla-k80', 'brand': 'nvidia'}
|
|
243
243
|
|
|
244
244
|
>>> parse_accelerator("gpu")
|
|
245
245
|
{'count': 1, 'kind': 'gpu'}
|
|
246
246
|
|
|
247
247
|
>>> parse_accelerator("cuda:1")
|
|
248
|
-
{'count': 1, 'kind': 'gpu', '
|
|
248
|
+
{'count': 1, 'kind': 'gpu', 'api': 'cuda', 'brand': 'nvidia'}
|
|
249
249
|
|
|
250
250
|
>>> parse_accelerator({"kind": "gpu"})
|
|
251
251
|
{'count': 1, 'kind': 'gpu'}
|
|
@@ -581,8 +581,8 @@ class Requirer:
|
|
|
581
581
|
>>> Requirer._parseResource('cores', 1), Requirer._parseResource('disk', 1), \
|
|
582
582
|
Requirer._parseResource('memory', 1)
|
|
583
583
|
(1, 1, 1)
|
|
584
|
-
>>> Requirer._parseResource('cores', '
|
|
585
|
-
Requirer._parseResource('memory', '
|
|
584
|
+
>>> Requirer._parseResource('cores', '1Gi'), Requirer._parseResource('disk', '1Gi'), \
|
|
585
|
+
Requirer._parseResource('memory', '1Gi')
|
|
586
586
|
(1073741824, 1073741824, 1073741824)
|
|
587
587
|
>>> Requirer._parseResource('cores', 1.1)
|
|
588
588
|
1.1
|
|
@@ -813,7 +813,6 @@ class JobDescription(Requirer):
|
|
|
813
813
|
Subclassed into variants for checkpoint jobs and service jobs that have
|
|
814
814
|
their specific parameters.
|
|
815
815
|
"""
|
|
816
|
-
|
|
817
816
|
def __init__(
|
|
818
817
|
self,
|
|
819
818
|
requirements: Mapping[str, Union[int, str, float, bool, list]],
|
|
@@ -3146,9 +3145,8 @@ class Job:
|
|
|
3146
3145
|
|
|
3147
3146
|
Will modify the job's description with changes that need to be committed back to the JobStore.
|
|
3148
3147
|
"""
|
|
3149
|
-
|
|
3150
|
-
|
|
3151
|
-
startClock = ResourceMonitor.get_total_cpu_time()
|
|
3148
|
+
startTime = time.time()
|
|
3149
|
+
startClock = ResourceMonitor.get_total_cpu_time()
|
|
3152
3150
|
baseDir = os.getcwd()
|
|
3153
3151
|
|
|
3154
3152
|
succeeded = False
|
|
@@ -3180,18 +3178,36 @@ class Job:
|
|
|
3180
3178
|
# Change dir back to cwd dir, if changed by job (this is a safety issue)
|
|
3181
3179
|
if os.getcwd() != baseDir:
|
|
3182
3180
|
os.chdir(baseDir)
|
|
3181
|
+
|
|
3182
|
+
totalCpuTime, total_memory_kib = (
|
|
3183
|
+
ResourceMonitor.get_total_cpu_time_and_memory_usage()
|
|
3184
|
+
)
|
|
3185
|
+
job_time = time.time() - startTime
|
|
3186
|
+
job_cpu_time = totalCpuTime - startClock
|
|
3187
|
+
allocated_cpu_time = job_time * self.cores
|
|
3188
|
+
|
|
3189
|
+
if job_cpu_time > allocated_cpu_time and allocated_cpu_time > 0:
|
|
3190
|
+
# Too much CPU was used by this job! Maybe we're using a batch
|
|
3191
|
+
# system that doesn't/can't sandbox us and we started too many
|
|
3192
|
+
# threads. Complain to the user!
|
|
3193
|
+
excess_factor = job_cpu_time / allocated_cpu_time
|
|
3194
|
+
fileStore.log_to_leader(
|
|
3195
|
+
f"Job {self.description} used {excess_factor:.2f}x more "
|
|
3196
|
+
f"CPU than the requested {self.cores} cores. Consider "
|
|
3197
|
+
f"increasing the job's required CPU cores or limiting the "
|
|
3198
|
+
f"number of processes/threads launched.",
|
|
3199
|
+
level=logging.WARNING
|
|
3200
|
+
)
|
|
3201
|
+
|
|
3183
3202
|
# Finish up the stats
|
|
3184
3203
|
if stats is not None:
|
|
3185
|
-
totalCpuTime, total_memory_kib = (
|
|
3186
|
-
ResourceMonitor.get_total_cpu_time_and_memory_usage()
|
|
3187
|
-
)
|
|
3188
3204
|
stats.jobs.append(
|
|
3189
3205
|
# TODO: We represent everything as strings in the stats
|
|
3190
3206
|
# even though the JSON transport can take bools and floats.
|
|
3191
3207
|
Expando(
|
|
3192
3208
|
start=str(startTime),
|
|
3193
|
-
time=str(
|
|
3194
|
-
clock=str(
|
|
3209
|
+
time=str(job_time),
|
|
3210
|
+
clock=str(job_cpu_time),
|
|
3195
3211
|
class_name=self._jobName(),
|
|
3196
3212
|
memory=str(total_memory_kib),
|
|
3197
3213
|
requested_cores=str(self.cores), # TODO: Isn't this really consumed cores?
|