toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +1 -232
- toil/batchSystems/abstractBatchSystem.py +41 -17
- toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +4 -5
- toil/batchSystems/gridengine.py +1 -1
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +25 -11
- toil/batchSystems/local_support.py +3 -3
- toil/batchSystems/lsf.py +9 -9
- toil/batchSystems/mesos/batchSystem.py +4 -4
- toil/batchSystems/mesos/executor.py +3 -2
- toil/batchSystems/options.py +9 -0
- toil/batchSystems/singleMachine.py +11 -10
- toil/batchSystems/slurm.py +129 -16
- toil/batchSystems/torque.py +1 -1
- toil/bus.py +45 -3
- toil/common.py +56 -31
- toil/cwl/cwltoil.py +442 -371
- toil/deferred.py +1 -1
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +69 -20
- toil/fileStores/cachingFileStore.py +6 -22
- toil/fileStores/nonCachingFileStore.py +6 -15
- toil/job.py +270 -86
- toil/jobStores/abstractJobStore.py +37 -31
- toil/jobStores/aws/jobStore.py +280 -218
- toil/jobStores/aws/utils.py +60 -31
- toil/jobStores/conftest.py +2 -2
- toil/jobStores/fileJobStore.py +3 -3
- toil/jobStores/googleJobStore.py +3 -4
- toil/leader.py +89 -38
- toil/lib/aws/__init__.py +26 -10
- toil/lib/aws/iam.py +2 -2
- toil/lib/aws/session.py +62 -22
- toil/lib/aws/utils.py +73 -37
- toil/lib/conversions.py +24 -1
- toil/lib/ec2.py +118 -69
- toil/lib/expando.py +1 -1
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +42 -4
- toil/lib/misc.py +1 -3
- toil/lib/resources.py +57 -16
- toil/lib/retry.py +12 -5
- toil/lib/threading.py +29 -14
- toil/lib/throttle.py +1 -1
- toil/options/common.py +31 -30
- toil/options/wdl.py +5 -0
- toil/provisioners/__init__.py +9 -3
- toil/provisioners/abstractProvisioner.py +12 -2
- toil/provisioners/aws/__init__.py +20 -15
- toil/provisioners/aws/awsProvisioner.py +406 -329
- toil/provisioners/gceProvisioner.py +2 -2
- toil/provisioners/node.py +13 -5
- toil/server/app.py +1 -1
- toil/statsAndLogging.py +93 -23
- toil/test/__init__.py +27 -12
- toil/test/batchSystems/batchSystemTest.py +40 -33
- toil/test/batchSystems/batch_system_plugin_test.py +79 -0
- toil/test/batchSystems/test_slurm.py +22 -7
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +245 -236
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +11 -14
- toil/test/jobStores/jobStoreTest.py +40 -54
- toil/test/lib/aws/test_iam.py +2 -2
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
- toil/test/provisioners/clusterTest.py +99 -16
- toil/test/server/serverTest.py +2 -2
- toil/test/src/autoDeploymentTest.py +1 -1
- toil/test/src/dockerCheckTest.py +2 -1
- toil/test/src/environmentTest.py +125 -0
- toil/test/src/fileStoreTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +18 -8
- toil/test/src/jobTest.py +1 -1
- toil/test/src/realtimeLoggerTest.py +4 -0
- toil/test/src/workerTest.py +52 -19
- toil/test/utils/toilDebugTest.py +62 -4
- toil/test/utils/utilsTest.py +23 -21
- toil/test/wdl/wdltoil_test.py +49 -21
- toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
- toil/toilState.py +68 -9
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilDebugJob.py +153 -26
- toil/utils/toilLaunchCluster.py +12 -2
- toil/utils/toilRsyncCluster.py +7 -2
- toil/utils/toilSshCluster.py +7 -3
- toil/utils/toilStats.py +310 -266
- toil/utils/toilStatus.py +98 -52
- toil/version.py +11 -11
- toil/wdl/wdltoil.py +644 -225
- toil/worker.py +125 -83
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
- toil-7.0.0.dist-info/METADATA +158 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/common.py
CHANGED
|
@@ -23,6 +23,7 @@ import tempfile
|
|
|
23
23
|
import time
|
|
24
24
|
import uuid
|
|
25
25
|
import warnings
|
|
26
|
+
from io import StringIO
|
|
26
27
|
|
|
27
28
|
from ruamel.yaml import YAML
|
|
28
29
|
from ruamel.yaml.comments import CommentedMap
|
|
@@ -63,8 +64,7 @@ else:
|
|
|
63
64
|
from typing_extensions import Literal
|
|
64
65
|
|
|
65
66
|
from toil import logProcessContext, lookupEnvVar
|
|
66
|
-
from toil.batchSystems.options import
|
|
67
|
-
set_batchsystem_options)
|
|
67
|
+
from toil.batchSystems.options import set_batchsystem_options
|
|
68
68
|
from toil.bus import (ClusterDesiredSizeMessage,
|
|
69
69
|
ClusterSizeMessage,
|
|
70
70
|
JobCompletedMessage,
|
|
@@ -74,17 +74,15 @@ from toil.bus import (ClusterDesiredSizeMessage,
|
|
|
74
74
|
MessageBus,
|
|
75
75
|
QueueSizeMessage)
|
|
76
76
|
from toil.fileStores import FileID
|
|
77
|
-
from toil.lib.aws import zone_to_region, build_tag_dict_from_env
|
|
78
77
|
from toil.lib.compatibility import deprecated
|
|
79
78
|
from toil.lib.io import try_path, AtomicFileCreate
|
|
80
79
|
from toil.lib.retry import retry
|
|
81
80
|
from toil.provisioners import (add_provisioner_options,
|
|
82
|
-
cluster_factory
|
|
83
|
-
parse_node_types)
|
|
81
|
+
cluster_factory)
|
|
84
82
|
from toil.realtimeLogger import RealtimeLogger
|
|
85
83
|
from toil.statsAndLogging import (add_logging_options,
|
|
86
84
|
set_logging_from_options)
|
|
87
|
-
from toil.version import dockerRegistry, dockerTag, version
|
|
85
|
+
from toil.version import dockerRegistry, dockerTag, version
|
|
88
86
|
|
|
89
87
|
if TYPE_CHECKING:
|
|
90
88
|
from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
|
|
@@ -124,6 +122,7 @@ class Config:
|
|
|
124
122
|
kubernetes_owner: Optional[str]
|
|
125
123
|
kubernetes_service_account: Optional[str]
|
|
126
124
|
kubernetes_pod_timeout: float
|
|
125
|
+
kubernetes_privileged: bool
|
|
127
126
|
tes_endpoint: str
|
|
128
127
|
tes_user: str
|
|
129
128
|
tes_password: str
|
|
@@ -137,6 +136,7 @@ class Config:
|
|
|
137
136
|
"""The backing scheduler will be instructed, if possible, to save logs
|
|
138
137
|
to this directory, where the leader can read them."""
|
|
139
138
|
statePollingWait: int
|
|
139
|
+
state_polling_timeout: int
|
|
140
140
|
disableAutoDeployment: bool
|
|
141
141
|
|
|
142
142
|
# Core options
|
|
@@ -148,6 +148,7 @@ class Config:
|
|
|
148
148
|
workflowAttemptNumber: int
|
|
149
149
|
jobStore: str
|
|
150
150
|
logLevel: str
|
|
151
|
+
colored_logs: bool
|
|
151
152
|
workDir: Optional[str]
|
|
152
153
|
coordination_dir: Optional[str]
|
|
153
154
|
noStdOutErr: bool
|
|
@@ -208,6 +209,7 @@ class Config:
|
|
|
208
209
|
doubleMem: bool
|
|
209
210
|
maxJobDuration: int
|
|
210
211
|
rescueJobsFrequency: int
|
|
212
|
+
job_store_timeout: float
|
|
211
213
|
|
|
212
214
|
# Log management
|
|
213
215
|
maxLogFileSize: int
|
|
@@ -285,8 +287,6 @@ class Config:
|
|
|
285
287
|
|
|
286
288
|
1. options object under option_name
|
|
287
289
|
2. options object under old_names
|
|
288
|
-
3. environment variables in env
|
|
289
|
-
4. provided default value
|
|
290
290
|
|
|
291
291
|
Selected option value is run through parsing_funtion if it is set.
|
|
292
292
|
Then the parsed value is run through check_function to check it for
|
|
@@ -375,6 +375,7 @@ class Config:
|
|
|
375
375
|
set_option("doubleMem")
|
|
376
376
|
set_option("maxJobDuration")
|
|
377
377
|
set_option("rescueJobsFrequency")
|
|
378
|
+
set_option("job_store_timeout")
|
|
378
379
|
|
|
379
380
|
# Log management
|
|
380
381
|
set_option("maxLogFileSize")
|
|
@@ -401,10 +402,17 @@ class Config:
|
|
|
401
402
|
set_option("badWorker")
|
|
402
403
|
set_option("badWorkerFailInterval")
|
|
403
404
|
set_option("logLevel")
|
|
405
|
+
set_option("colored_logs")
|
|
404
406
|
|
|
405
|
-
|
|
407
|
+
# Apply overrides as highest priority
|
|
408
|
+
# Override workDir with value of TOIL_WORKDIR_OVERRIDE if it exists
|
|
409
|
+
if os.getenv('TOIL_WORKDIR_OVERRIDE') is not None:
|
|
410
|
+
self.workDir = os.getenv('TOIL_WORKDIR_OVERRIDE')
|
|
411
|
+
# Override workDir with value of TOIL_WORKDIR_OVERRIDE if it exists
|
|
412
|
+
if os.getenv('TOIL_COORDINATION_DIR_OVERRIDE') is not None:
|
|
413
|
+
self.workDir = os.getenv('TOIL_COORDINATION_DIR_OVERRIDE')
|
|
406
414
|
|
|
407
|
-
|
|
415
|
+
self.check_configuration_consistency()
|
|
408
416
|
|
|
409
417
|
def check_configuration_consistency(self) -> None:
|
|
410
418
|
"""Old checks that cannot be fit into an action class for argparse"""
|
|
@@ -580,19 +588,23 @@ def generate_config(filepath: str) -> None:
|
|
|
580
588
|
with AtomicFileCreate(filepath) as temp_path:
|
|
581
589
|
with open(temp_path, "w") as f:
|
|
582
590
|
f.write("config_version: 1.0\n")
|
|
583
|
-
yaml = YAML(typ=
|
|
591
|
+
yaml = YAML(typ='rt')
|
|
584
592
|
for data in all_data:
|
|
585
593
|
if "config_version" in data:
|
|
586
594
|
del data["config_version"]
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
595
|
+
with StringIO() as data_string:
|
|
596
|
+
yaml.dump(data, data_string)
|
|
597
|
+
for line in data_string.readline():
|
|
598
|
+
if line:
|
|
599
|
+
f.write("#")
|
|
600
|
+
f.write(f"{line}\n")
|
|
592
601
|
|
|
593
602
|
|
|
594
603
|
def parser_with_common_options(
|
|
595
|
-
|
|
604
|
+
provisioner_options: bool = False,
|
|
605
|
+
jobstore_option: bool = True,
|
|
606
|
+
prog: Optional[str] = None,
|
|
607
|
+
default_log_level: Optional[int] = None
|
|
596
608
|
) -> ArgParser:
|
|
597
609
|
parser = ArgParser(prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter)
|
|
598
610
|
|
|
@@ -603,7 +615,7 @@ def parser_with_common_options(
|
|
|
603
615
|
parser.add_argument('jobStore', type=str, help=JOBSTORE_HELP)
|
|
604
616
|
|
|
605
617
|
# always add these
|
|
606
|
-
add_logging_options(parser)
|
|
618
|
+
add_logging_options(parser, default_log_level)
|
|
607
619
|
parser.add_argument("--version", action='version', version=version)
|
|
608
620
|
parser.add_argument("--tempDirRoot", dest="tempDirRoot", type=str, default=tempfile.gettempdir(),
|
|
609
621
|
help="Path to where temporary directory containing all temp files are created, "
|
|
@@ -701,7 +713,7 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
|
|
|
701
713
|
help="WDL document URI")
|
|
702
714
|
parser.add_argument("inputs_uri", type=str, nargs='?',
|
|
703
715
|
help="WDL input JSON URI")
|
|
704
|
-
parser.add_argument("--input", "-i", dest="inputs_uri", type=str,
|
|
716
|
+
parser.add_argument("--input", "--inputs", "-i", dest="inputs_uri", type=str,
|
|
705
717
|
help="WDL input JSON URI")
|
|
706
718
|
check_arguments(typ="wdl")
|
|
707
719
|
|
|
@@ -709,7 +721,7 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool
|
|
|
709
721
|
@lru_cache(maxsize=None)
|
|
710
722
|
def getNodeID() -> str:
|
|
711
723
|
"""
|
|
712
|
-
Return unique ID of the current node (host). The resulting string will be
|
|
724
|
+
Return unique ID of the current node (host). The resulting string will be convertible to a uuid.UUID.
|
|
713
725
|
|
|
714
726
|
Tries several methods until success. The returned ID should be identical across calls from different processes on
|
|
715
727
|
the same node at least until the next OS reboot.
|
|
@@ -757,7 +769,7 @@ def getNodeID() -> str:
|
|
|
757
769
|
"experience cryptic job failures")
|
|
758
770
|
if len(nodeID.replace('-', '')) < UUID_LENGTH:
|
|
759
771
|
# Some platforms (Mac) give us not enough actual hex characters.
|
|
760
|
-
# Repeat them so the result is
|
|
772
|
+
# Repeat them so the result is convertible to a uuid.UUID
|
|
761
773
|
nodeID = nodeID.replace('-', '')
|
|
762
774
|
num_repeats = UUID_LENGTH // len(nodeID) + 1
|
|
763
775
|
nodeID = nodeID * num_repeats
|
|
@@ -801,6 +813,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
801
813
|
set_logging_from_options(self.options)
|
|
802
814
|
config = Config()
|
|
803
815
|
config.setOptions(self.options)
|
|
816
|
+
logger.debug("Loaded configuration: %s", vars(self.options))
|
|
804
817
|
if config.jobStore is None:
|
|
805
818
|
raise RuntimeError("No jobstore provided!")
|
|
806
819
|
jobStore = self.getJobStore(config.jobStore)
|
|
@@ -875,6 +888,16 @@ class Toil(ContextManager["Toil"]):
|
|
|
875
888
|
"""
|
|
876
889
|
self._assertContextManagerUsed()
|
|
877
890
|
|
|
891
|
+
from toil.job import Job
|
|
892
|
+
|
|
893
|
+
# Check that the rootJob is an instance of the Job class
|
|
894
|
+
if not isinstance(rootJob, Job):
|
|
895
|
+
raise RuntimeError("The type of the root job is not a job.")
|
|
896
|
+
|
|
897
|
+
# Check that the rootJob has been initialized
|
|
898
|
+
rootJob.check_initialized()
|
|
899
|
+
|
|
900
|
+
|
|
878
901
|
# Write shared files to the job store
|
|
879
902
|
self._jobStore.write_leader_pid()
|
|
880
903
|
self._jobStore.write_leader_node_id()
|
|
@@ -1255,18 +1278,14 @@ class Toil(ContextManager["Toil"]):
|
|
|
1255
1278
|
--workDir flag
|
|
1256
1279
|
:param config_coordination_dir: Value passed to the program using the
|
|
1257
1280
|
--coordinationDir flag
|
|
1281
|
+
:param workflow_id: Used if a tmpdir_prefix exists to create full
|
|
1282
|
+
directory paths unique per workflow
|
|
1258
1283
|
|
|
1259
1284
|
:return: Path to the Toil coordination directory. Ought to be on a
|
|
1260
1285
|
POSIX filesystem that allows directories containing open files to be
|
|
1261
1286
|
deleted.
|
|
1262
1287
|
"""
|
|
1263
1288
|
|
|
1264
|
-
if 'XDG_RUNTIME_DIR' in os.environ and not os.path.exists(os.environ['XDG_RUNTIME_DIR']):
|
|
1265
|
-
# Slurm has been observed providing this variable but not keeping
|
|
1266
|
-
# the directory live as long as we run for.
|
|
1267
|
-
logger.warning('XDG_RUNTIME_DIR is set to nonexistent directory %s; your environment may be out of spec!',
|
|
1268
|
-
os.environ['XDG_RUNTIME_DIR'])
|
|
1269
|
-
|
|
1270
1289
|
# Go get a coordination directory, using a lot of short-circuiting of
|
|
1271
1290
|
# or and the fact that and returns its second argument when it
|
|
1272
1291
|
# succeeds.
|
|
@@ -1289,6 +1308,9 @@ class Toil(ContextManager["Toil"]):
|
|
|
1289
1308
|
os.path.join(os.environ['XDG_RUNTIME_DIR'], 'toil'))) or
|
|
1290
1309
|
# Try under /run/lock. It might be a temp dir style sticky directory.
|
|
1291
1310
|
try_path('/run/lock') or
|
|
1311
|
+
# Try all possible temp directories, falling back to the current working
|
|
1312
|
+
# directory
|
|
1313
|
+
tempfile.gettempdir() or
|
|
1292
1314
|
# Finally, fall back on the work dir and hope it's a legit filesystem.
|
|
1293
1315
|
cls.getToilWorkDir(config_work_dir)
|
|
1294
1316
|
)
|
|
@@ -1299,7 +1321,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1299
1321
|
return coordination_dir
|
|
1300
1322
|
|
|
1301
1323
|
@staticmethod
|
|
1302
|
-
def
|
|
1324
|
+
def get_workflow_path_component(workflow_id: str) -> str:
|
|
1303
1325
|
"""
|
|
1304
1326
|
Get a safe filesystem path component for a workflow.
|
|
1305
1327
|
|
|
@@ -1308,7 +1330,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1308
1330
|
|
|
1309
1331
|
:param workflow_id: The ID of the current Toil workflow.
|
|
1310
1332
|
"""
|
|
1311
|
-
return str(uuid.uuid5(uuid.UUID(getNodeID()), workflow_id)).replace('-', '')
|
|
1333
|
+
return "toilwf-" + str(uuid.uuid5(uuid.UUID(getNodeID()), workflow_id)).replace('-', '')
|
|
1312
1334
|
|
|
1313
1335
|
@classmethod
|
|
1314
1336
|
def getLocalWorkflowDir(
|
|
@@ -1325,7 +1347,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1325
1347
|
|
|
1326
1348
|
# Create a directory unique to each host in case workDir is on a shared FS.
|
|
1327
1349
|
# This prevents workers on different nodes from erasing each other's directories.
|
|
1328
|
-
workflowDir: str = os.path.join(base, cls.
|
|
1350
|
+
workflowDir: str = os.path.join(base, cls.get_workflow_path_component(workflowID))
|
|
1329
1351
|
try:
|
|
1330
1352
|
# Directory creation is atomic
|
|
1331
1353
|
os.mkdir(workflowDir)
|
|
@@ -1367,7 +1389,8 @@ class Toil(ContextManager["Toil"]):
|
|
|
1367
1389
|
base = cls.get_toil_coordination_dir(config_work_dir, config_coordination_dir)
|
|
1368
1390
|
|
|
1369
1391
|
# Make a per-workflow and node subdirectory
|
|
1370
|
-
subdir = os.path.join(base, cls.
|
|
1392
|
+
subdir = os.path.join(base, cls.get_workflow_path_component(workflow_id))
|
|
1393
|
+
|
|
1371
1394
|
# Make it exist
|
|
1372
1395
|
os.makedirs(subdir, exist_ok=True)
|
|
1373
1396
|
# TODO: May interfere with workflow directory creation logging if it's the same directory.
|
|
@@ -1425,6 +1448,8 @@ class ToilMetrics:
|
|
|
1425
1448
|
clusterName = str(provisioner.clusterName)
|
|
1426
1449
|
if provisioner._zone is not None:
|
|
1427
1450
|
if provisioner.cloud == 'aws':
|
|
1451
|
+
# lazy import to avoid AWS dependency if the aws extra is not installed
|
|
1452
|
+
from toil.lib.aws import zone_to_region
|
|
1428
1453
|
# Remove AZ name
|
|
1429
1454
|
region = zone_to_region(provisioner._zone)
|
|
1430
1455
|
else:
|