toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -13,16 +13,16 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
15
|
from types import TracebackType
|
|
16
|
-
from typing import Any, ContextManager,
|
|
16
|
+
from typing import Any, ContextManager, Optional
|
|
17
17
|
|
|
18
|
-
from toil.batchSystems.abstractBatchSystem import
|
|
19
|
-
WorkerCleanupInfo)
|
|
18
|
+
from toil.batchSystems.abstractBatchSystem import BatchSystemSupport, WorkerCleanupInfo
|
|
20
19
|
from toil.batchSystems.local_support import BatchSystemLocalSupport
|
|
21
20
|
from toil.common import Config, Toil
|
|
22
21
|
from toil.lib.threading import LastProcessStandingArena
|
|
23
22
|
|
|
24
23
|
logger = logging.getLogger(__name__)
|
|
25
24
|
|
|
25
|
+
|
|
26
26
|
class BatchSystemCleanupSupport(BatchSystemLocalSupport):
|
|
27
27
|
"""
|
|
28
28
|
Adds cleanup support when the last running job leaves a node, for batch
|
|
@@ -33,7 +33,7 @@ class BatchSystemCleanupSupport(BatchSystemLocalSupport):
|
|
|
33
33
|
def supportsWorkerCleanup(cls) -> bool:
|
|
34
34
|
return True
|
|
35
35
|
|
|
36
|
-
def getWorkerContexts(self) ->
|
|
36
|
+
def getWorkerContexts(self) -> list[ContextManager[Any]]:
|
|
37
37
|
# Tell worker to register for and invoke cleanup
|
|
38
38
|
|
|
39
39
|
# Create a context manager that has a copy of our cleanup info
|
|
@@ -44,9 +44,12 @@ class BatchSystemCleanupSupport(BatchSystemLocalSupport):
|
|
|
44
44
|
contexts.append(context)
|
|
45
45
|
return contexts
|
|
46
46
|
|
|
47
|
-
def __init__(
|
|
47
|
+
def __init__(
|
|
48
|
+
self, config: Config, maxCores: float, maxMemory: int, maxDisk: int
|
|
49
|
+
) -> None:
|
|
48
50
|
super().__init__(config, maxCores, maxMemory, maxDisk)
|
|
49
51
|
|
|
52
|
+
|
|
50
53
|
class WorkerCleanupContext:
|
|
51
54
|
"""
|
|
52
55
|
Context manager used by :class:`BatchSystemCleanupSupport` to implement
|
|
@@ -63,31 +66,38 @@ class WorkerCleanupContext:
|
|
|
63
66
|
the last to exit the context manager.
|
|
64
67
|
"""
|
|
65
68
|
|
|
66
|
-
|
|
67
69
|
self.workerCleanupInfo = workerCleanupInfo
|
|
68
70
|
# Don't set self.arena or MyPy will be upset that sometimes it doesn't have the right type.
|
|
69
71
|
|
|
70
72
|
def __enter__(self) -> None:
|
|
71
73
|
# Set up an arena so we know who is the last worker to leave
|
|
72
|
-
self.arena = LastProcessStandingArena(
|
|
73
|
-
|
|
74
|
-
|
|
74
|
+
self.arena = LastProcessStandingArena(
|
|
75
|
+
Toil.get_toil_coordination_dir(
|
|
76
|
+
self.workerCleanupInfo.work_dir, self.workerCleanupInfo.coordination_dir
|
|
77
|
+
),
|
|
78
|
+
Toil.get_workflow_path_component(self.workerCleanupInfo.workflow_id)
|
|
79
|
+
+ "-cleanup",
|
|
80
|
+
)
|
|
81
|
+
logger.debug("Entering cleanup arena")
|
|
75
82
|
self.arena.enter()
|
|
76
|
-
logger.debug(
|
|
83
|
+
logger.debug("Cleanup arena entered")
|
|
77
84
|
|
|
78
85
|
# This is exactly the signature MyPy demands.
|
|
79
86
|
# Also, it demands we not say we can return a bool if we return False
|
|
80
87
|
# always, because it can be smarter about reachability if it knows what
|
|
81
88
|
# context managers never eat exceptions. So it decides any context manager
|
|
82
89
|
# that is always falsey but claims to return a bool is an error.
|
|
83
|
-
def __exit__(
|
|
84
|
-
|
|
90
|
+
def __exit__(
|
|
91
|
+
self,
|
|
92
|
+
type: Optional[type[BaseException]],
|
|
93
|
+
value: Optional[BaseException],
|
|
94
|
+
traceback: Optional[TracebackType],
|
|
95
|
+
) -> None:
|
|
96
|
+
logger.debug("Leaving cleanup arena")
|
|
85
97
|
for _ in self.arena.leave():
|
|
86
98
|
# We are the last concurrent worker to finish.
|
|
87
99
|
# Do batch system cleanup.
|
|
88
|
-
logger.debug(
|
|
100
|
+
logger.debug("Cleaning up worker")
|
|
89
101
|
BatchSystemSupport.workerCleanup(self.workerCleanupInfo)
|
|
90
102
|
# Now the coordination_dir is allowed to no longer exist on the node.
|
|
91
|
-
logger.debug(
|
|
92
|
-
|
|
93
|
-
|
|
103
|
+
logger.debug("Cleanup arena left")
|
|
@@ -22,21 +22,24 @@ import os
|
|
|
22
22
|
import pickle
|
|
23
23
|
import subprocess
|
|
24
24
|
import sys
|
|
25
|
-
from typing import Any,
|
|
25
|
+
from typing import Any, Optional
|
|
26
26
|
|
|
27
27
|
from toil.batchSystems.abstractBatchSystem import EXIT_STATUS_UNAVAILABLE_VALUE
|
|
28
|
-
from toil.job import JobDescription
|
|
29
28
|
from toil.resource import Resource
|
|
30
29
|
from toil.statsAndLogging import configure_root_logger, set_log_level
|
|
31
30
|
|
|
32
31
|
logger = logging.getLogger(__name__)
|
|
33
32
|
|
|
34
33
|
|
|
35
|
-
def pack_job(
|
|
34
|
+
def pack_job(
|
|
35
|
+
command: str,
|
|
36
|
+
user_script: Optional[Resource] = None,
|
|
37
|
+
environment: Optional[dict[str, str]] = None,
|
|
38
|
+
) -> list[str]:
|
|
36
39
|
"""
|
|
37
|
-
Create a command that
|
|
40
|
+
Create a command that runs the given command in an environment.
|
|
38
41
|
|
|
39
|
-
:param
|
|
42
|
+
:param command: Worker command to run to run the job.
|
|
40
43
|
:param user_script: User script that will be loaded before the job is run.
|
|
41
44
|
:param environment: Environment variable dict that will be applied before
|
|
42
45
|
the job is run.
|
|
@@ -46,19 +49,21 @@ def pack_job(job_desc: JobDescription, user_script: Optional[Resource] = None, e
|
|
|
46
49
|
"""
|
|
47
50
|
# Make a job dict to send to the executor.
|
|
48
51
|
# TODO: Factor out executor setup from here and Kubernetes and TES
|
|
49
|
-
job:
|
|
52
|
+
job: dict[str, Any] = {"command": command}
|
|
50
53
|
if user_script is not None:
|
|
51
54
|
# If there's a user script resource be sure to send it along
|
|
52
|
-
job[
|
|
55
|
+
job["userScript"] = user_script
|
|
53
56
|
if environment is not None:
|
|
54
57
|
# We also may have an environment to send.
|
|
55
|
-
job[
|
|
58
|
+
job["environment"] = environment
|
|
56
59
|
# Encode it in a form we can send in a command-line argument. Pickle in
|
|
57
60
|
# the highest protocol to prevent mixed-Python-version workflows from
|
|
58
61
|
# trying to work. Make sure it is text so we can ship it via JSON.
|
|
59
|
-
encoded_job = base64.b64encode(pickle.dumps(job, pickle.HIGHEST_PROTOCOL)).decode(
|
|
62
|
+
encoded_job = base64.b64encode(pickle.dumps(job, pickle.HIGHEST_PROTOCOL)).decode(
|
|
63
|
+
"utf-8"
|
|
64
|
+
)
|
|
60
65
|
# Make a command to run it in the executor
|
|
61
|
-
command_list = [
|
|
66
|
+
command_list = ["_toil_contained_executor", encoded_job]
|
|
62
67
|
|
|
63
68
|
return command_list
|
|
64
69
|
|
|
@@ -82,53 +87,51 @@ def executor() -> None:
|
|
|
82
87
|
exit_code = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
83
88
|
|
|
84
89
|
if len(sys.argv) != 2:
|
|
85
|
-
logger.error(
|
|
90
|
+
logger.error("Executor requires exactly one base64-encoded argument")
|
|
86
91
|
sys.exit(exit_code)
|
|
87
92
|
|
|
88
93
|
# Take in a base64-encoded pickled dict as our first argument and decode it
|
|
89
94
|
try:
|
|
90
95
|
# Make sure to encode the text arguments to bytes before base 64 decoding
|
|
91
|
-
job = pickle.loads(base64.b64decode(sys.argv[1].encode(
|
|
96
|
+
job = pickle.loads(base64.b64decode(sys.argv[1].encode("utf-8")))
|
|
92
97
|
except:
|
|
93
98
|
exc_info = sys.exc_info()
|
|
94
|
-
logger.error(
|
|
99
|
+
logger.error("Exception while unpickling task: ", exc_info=exc_info)
|
|
95
100
|
sys.exit(exit_code)
|
|
96
101
|
|
|
97
|
-
if
|
|
102
|
+
if "environment" in job:
|
|
98
103
|
# Adopt the job environment into the executor.
|
|
99
104
|
# This lets us use things like TOIL_WORKDIR when figuring out how to talk to other executors.
|
|
100
|
-
logger.debug(
|
|
101
|
-
for var, value in job[
|
|
105
|
+
logger.debug("Adopting environment: %s", str(job["environment"].keys()))
|
|
106
|
+
for var, value in job["environment"].items():
|
|
102
107
|
os.environ[var] = value
|
|
103
108
|
|
|
104
109
|
# Set JTRES_ROOT and other global state needed for resource
|
|
105
110
|
# downloading/deployment to work.
|
|
106
111
|
# TODO: Every worker downloads resources independently.
|
|
107
112
|
# We should have a way to share a resource directory.
|
|
108
|
-
logger.debug(
|
|
113
|
+
logger.debug("Preparing system for resource download")
|
|
109
114
|
Resource.prepareSystem()
|
|
110
115
|
try:
|
|
111
|
-
if
|
|
112
|
-
job[
|
|
116
|
+
if "userScript" in job:
|
|
117
|
+
job["userScript"].register()
|
|
113
118
|
|
|
114
119
|
# Start the child process
|
|
115
|
-
logger.debug("Invoking command: '%s'", job[
|
|
116
|
-
child = subprocess.Popen(
|
|
117
|
-
|
|
118
|
-
|
|
120
|
+
logger.debug("Invoking command: '%s'", job["command"])
|
|
121
|
+
child = subprocess.Popen(
|
|
122
|
+
job["command"], preexec_fn=lambda: os.setpgrp(), shell=True
|
|
123
|
+
)
|
|
119
124
|
|
|
120
125
|
# Reproduce child's exit code
|
|
121
126
|
exit_code = child.wait()
|
|
122
127
|
except:
|
|
123
128
|
# This will print a traceback for us, since exit() in the finally
|
|
124
129
|
# will bypass the normal way of getting one.
|
|
125
|
-
logger.exception(
|
|
130
|
+
logger.exception("Encountered exception running child")
|
|
126
131
|
finally:
|
|
127
|
-
logger.debug(
|
|
132
|
+
logger.debug("Cleaning up resources")
|
|
128
133
|
# TODO: Change resource system to use a shared resource directory for everyone.
|
|
129
134
|
# Then move this into worker cleanup somehow
|
|
130
135
|
Resource.cleanSystem()
|
|
131
|
-
logger.debug(
|
|
136
|
+
logger.debug("Shutting down")
|
|
132
137
|
sys.exit(exit_code)
|
|
133
|
-
|
|
134
|
-
|
toil/batchSystems/gridengine.py
CHANGED
|
@@ -17,10 +17,11 @@ import os
|
|
|
17
17
|
import shlex
|
|
18
18
|
import time
|
|
19
19
|
from shlex import quote
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import Optional
|
|
21
21
|
|
|
22
|
-
from toil.batchSystems.abstractGridEngineBatchSystem import
|
|
23
|
-
AbstractGridEngineBatchSystem
|
|
22
|
+
from toil.batchSystems.abstractGridEngineBatchSystem import (
|
|
23
|
+
AbstractGridEngineBatchSystem,
|
|
24
|
+
)
|
|
24
25
|
from toil.lib.misc import CalledProcessErrorStderr, call_command
|
|
25
26
|
|
|
26
27
|
logger = logging.getLogger(__name__)
|
|
@@ -28,37 +29,42 @@ logger = logging.getLogger(__name__)
|
|
|
28
29
|
|
|
29
30
|
class GridEngineBatchSystem(AbstractGridEngineBatchSystem):
|
|
30
31
|
|
|
31
|
-
class
|
|
32
|
+
class GridEngineThread(AbstractGridEngineBatchSystem.GridEngineThread):
|
|
32
33
|
"""
|
|
33
34
|
Grid Engine-specific AbstractGridEngineWorker methods
|
|
34
35
|
"""
|
|
36
|
+
|
|
35
37
|
def getRunningJobIDs(self):
|
|
36
38
|
times = {}
|
|
37
39
|
with self.runningJobsLock:
|
|
38
40
|
currentjobs = {str(self.batchJobIDs[x][0]): x for x in self.runningJobs}
|
|
39
41
|
stdout = call_command(["qstat"])
|
|
40
42
|
|
|
41
|
-
for currline in stdout.split(
|
|
43
|
+
for currline in stdout.split("\n"):
|
|
42
44
|
items = currline.strip().split()
|
|
43
45
|
if items:
|
|
44
|
-
if items[0] in currentjobs and items[4] ==
|
|
46
|
+
if items[0] in currentjobs and items[4] == "r":
|
|
45
47
|
jobstart = " ".join(items[5:7])
|
|
46
|
-
jobstart = time.mktime(
|
|
48
|
+
jobstart = time.mktime(
|
|
49
|
+
time.strptime(jobstart, "%m/%d/%Y %H:%M:%S")
|
|
50
|
+
)
|
|
47
51
|
times[currentjobs[items[0]]] = time.time() - jobstart
|
|
48
52
|
|
|
49
53
|
return times
|
|
50
54
|
|
|
51
55
|
def killJob(self, jobID):
|
|
52
|
-
call_command([
|
|
53
|
-
|
|
54
|
-
def prepareSubmission(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
call_command(["qdel", self.getBatchSystemID(jobID)])
|
|
57
|
+
|
|
58
|
+
def prepareSubmission(
|
|
59
|
+
self,
|
|
60
|
+
cpu: int,
|
|
61
|
+
memory: int,
|
|
62
|
+
jobID: int,
|
|
63
|
+
command: str,
|
|
64
|
+
jobName: str,
|
|
65
|
+
job_environment: Optional[dict[str, str]] = None,
|
|
66
|
+
gpus: Optional[int] = None,
|
|
67
|
+
):
|
|
62
68
|
# POSIX qsub
|
|
63
69
|
# <https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/qsub.html>
|
|
64
70
|
# expects a single script argument, which is supposed to be a file.
|
|
@@ -67,11 +73,13 @@ class GridEngineBatchSystem(AbstractGridEngineBatchSystem):
|
|
|
67
73
|
# hope that the qsub we are using is clever enough to forward along
|
|
68
74
|
# arguments. Otherwise, some qsubs will go looking for the full
|
|
69
75
|
# Toil command string as a file.
|
|
70
|
-
return self.prepareQsub(cpu, memory, jobID, job_environment) + shlex.split(
|
|
76
|
+
return self.prepareQsub(cpu, memory, jobID, job_environment) + shlex.split(
|
|
77
|
+
command
|
|
78
|
+
)
|
|
71
79
|
|
|
72
80
|
def submitJob(self, subLine):
|
|
73
81
|
stdout = call_command(subLine)
|
|
74
|
-
output = stdout.split(
|
|
82
|
+
output = stdout.split("\n")[0].strip()
|
|
75
83
|
result = int(output)
|
|
76
84
|
return result
|
|
77
85
|
|
|
@@ -84,8 +92,8 @@ class GridEngineBatchSystem(AbstractGridEngineBatchSystem):
|
|
|
84
92
|
"""
|
|
85
93
|
# the task is set as part of the job ID if using getBatchSystemID()
|
|
86
94
|
job, task = (sgeJobID, None)
|
|
87
|
-
if
|
|
88
|
-
job, task = sgeJobID.split(
|
|
95
|
+
if "." in sgeJobID:
|
|
96
|
+
job, task = sgeJobID.split(".", 1)
|
|
89
97
|
assert task is None, "task ids not currently support by qstat logic below"
|
|
90
98
|
|
|
91
99
|
# First try qstat to see if job is still running, if not get the
|
|
@@ -101,66 +109,94 @@ class GridEngineBatchSystem(AbstractGridEngineBatchSystem):
|
|
|
101
109
|
if task is not None:
|
|
102
110
|
args.extend(["-t", str(task)])
|
|
103
111
|
stdout = call_command(args)
|
|
104
|
-
for line in stdout.split(
|
|
112
|
+
for line in stdout.split("\n"):
|
|
105
113
|
if line.startswith("failed") and int(line.split()[1]) == 1:
|
|
106
114
|
return 1
|
|
107
115
|
elif line.startswith("exit_status"):
|
|
108
|
-
logger.debug(
|
|
116
|
+
logger.debug("Exit Status: %r", line.split()[1])
|
|
109
117
|
return int(line.split()[1])
|
|
110
118
|
return None
|
|
111
119
|
|
|
112
120
|
"""
|
|
113
121
|
Implementation-specific helper methods
|
|
114
122
|
"""
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
123
|
+
|
|
124
|
+
def prepareQsub(
|
|
125
|
+
self,
|
|
126
|
+
cpu: int,
|
|
127
|
+
mem: int,
|
|
128
|
+
jobID: int,
|
|
129
|
+
job_environment: Optional[dict[str, str]] = None,
|
|
130
|
+
) -> list[str]:
|
|
131
|
+
qsubline = [
|
|
132
|
+
"qsub",
|
|
133
|
+
"-V",
|
|
134
|
+
"-b",
|
|
135
|
+
"y",
|
|
136
|
+
"-terse",
|
|
137
|
+
"-j",
|
|
138
|
+
"y",
|
|
139
|
+
"-cwd",
|
|
140
|
+
"-N",
|
|
141
|
+
"toil_job_" + str(jobID),
|
|
142
|
+
]
|
|
122
143
|
|
|
123
144
|
environment = self.boss.environment.copy()
|
|
124
145
|
if job_environment:
|
|
125
146
|
environment.update(job_environment)
|
|
126
147
|
|
|
127
148
|
if environment:
|
|
128
|
-
qsubline.append(
|
|
129
|
-
qsubline.append(
|
|
130
|
-
|
|
149
|
+
qsubline.append("-v")
|
|
150
|
+
qsubline.append(
|
|
151
|
+
",".join(
|
|
152
|
+
k + "=" + quote(os.environ[k] if v is None else v)
|
|
153
|
+
for k, v in environment.items()
|
|
154
|
+
)
|
|
155
|
+
)
|
|
131
156
|
|
|
132
157
|
reqline = list()
|
|
133
|
-
sgeArgs = os.getenv(
|
|
158
|
+
sgeArgs = os.getenv("TOIL_GRIDENGINE_ARGS")
|
|
134
159
|
if mem is not None:
|
|
135
|
-
memStr = str(mem // 1024) +
|
|
160
|
+
memStr = str(mem // 1024) + "K"
|
|
136
161
|
if not self.boss.config.manualMemArgs:
|
|
137
162
|
# for UGE instead of SGE; see #2309
|
|
138
|
-
reqline += [
|
|
163
|
+
reqline += ["vf=" + memStr, "h_vmem=" + memStr]
|
|
139
164
|
elif self.boss.config.manualMemArgs and not sgeArgs:
|
|
140
|
-
raise ValueError(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
165
|
+
raise ValueError(
|
|
166
|
+
"--manualMemArgs set to True, but TOIL_GRIDGENGINE_ARGS is not set."
|
|
167
|
+
"Please set TOIL_GRIDGENGINE_ARGS to specify memory allocation for "
|
|
168
|
+
"your system. Default adds the arguments: vf=<mem> h_vmem=<mem> "
|
|
169
|
+
"to qsub."
|
|
170
|
+
)
|
|
144
171
|
if len(reqline) > 0:
|
|
145
|
-
qsubline.extend([
|
|
172
|
+
qsubline.extend(["-hard", "-l", ",".join(reqline)])
|
|
146
173
|
if sgeArgs:
|
|
147
174
|
sgeArgs = sgeArgs.split()
|
|
148
175
|
for arg in sgeArgs:
|
|
149
176
|
if arg.startswith(("vf=", "h_vmem=", "-pe")):
|
|
150
|
-
raise ValueError(
|
|
177
|
+
raise ValueError(
|
|
178
|
+
"Unexpected CPU, memory or pe specifications in TOIL_GRIDGENGINE_ARGs: %s"
|
|
179
|
+
% arg
|
|
180
|
+
)
|
|
151
181
|
qsubline.extend(sgeArgs)
|
|
152
182
|
# If cpu == 1 (or None) then don't add PE env variable to the qsub command.
|
|
153
183
|
# This will allow for use of the serial queue for these jobs.
|
|
154
|
-
if (
|
|
184
|
+
if (
|
|
185
|
+
(os.getenv("TOIL_GRIDENGINE_PE") is not None)
|
|
186
|
+
and (cpu is not None)
|
|
187
|
+
and (cpu > 1)
|
|
188
|
+
):
|
|
155
189
|
peCpu = int(math.ceil(cpu))
|
|
156
|
-
qsubline.extend([
|
|
190
|
+
qsubline.extend(["-pe", os.getenv("TOIL_GRIDENGINE_PE"), str(peCpu)])
|
|
157
191
|
elif (cpu is not None) and (cpu > 1):
|
|
158
|
-
raise RuntimeError(
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
192
|
+
raise RuntimeError(
|
|
193
|
+
"must specify PE in TOIL_GRIDENGINE_PE environment variable when using multiple CPUs. "
|
|
194
|
+
"Run qconf -spl and your local documentation for possible values"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
stdoutfile: str = self.boss.format_std_out_err_path(jobID, "$JOB_ID", "out")
|
|
198
|
+
stderrfile: str = self.boss.format_std_out_err_path(jobID, "$JOB_ID", "err")
|
|
199
|
+
qsubline.extend(["-o", stdoutfile, "-e", stderrfile])
|
|
164
200
|
|
|
165
201
|
return qsubline
|
|
166
202
|
|