toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/provisioners/node.py
CHANGED
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import datetime
|
|
15
15
|
import logging
|
|
16
|
-
import pipes
|
|
17
16
|
import socket
|
|
18
17
|
import subprocess
|
|
19
18
|
import time
|
|
20
19
|
from itertools import count
|
|
21
|
-
from
|
|
20
|
+
from shlex import quote
|
|
21
|
+
from typing import Any, Optional, Union
|
|
22
22
|
|
|
23
23
|
from toil.lib.memoize import parse_iso_utc
|
|
24
24
|
|
|
@@ -30,12 +30,21 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
class Node:
|
|
31
31
|
maxWaitTime = 7 * 60
|
|
32
32
|
|
|
33
|
-
def __init__(
|
|
34
|
-
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
publicIP: str,
|
|
36
|
+
privateIP: str,
|
|
37
|
+
name: str,
|
|
38
|
+
launchTime: Union[datetime.datetime, str],
|
|
39
|
+
nodeType: Optional[str],
|
|
40
|
+
preemptible: bool,
|
|
41
|
+
tags: Optional[dict[str, str]] = None,
|
|
42
|
+
use_private_ip: Optional[bool] = None,
|
|
43
|
+
) -> None:
|
|
35
44
|
self.publicIP = publicIP
|
|
36
45
|
self.privateIP = privateIP
|
|
37
46
|
if use_private_ip:
|
|
38
|
-
self.effectiveIP = self.privateIP
|
|
47
|
+
self.effectiveIP = self.privateIP # or self.publicIP?
|
|
39
48
|
else:
|
|
40
49
|
self.effectiveIP = self.publicIP or self.privateIP
|
|
41
50
|
self.name = name
|
|
@@ -78,7 +87,7 @@ class Node:
|
|
|
78
87
|
else:
|
|
79
88
|
return 1
|
|
80
89
|
|
|
81
|
-
def waitForNode(self, role: str, keyName: str=
|
|
90
|
+
def waitForNode(self, role: str, keyName: str = "core") -> None:
|
|
82
91
|
self._waitForSSHPort()
|
|
83
92
|
# wait here so docker commands can be used reliably afterwards
|
|
84
93
|
self._waitForSSHKeys(keyName=keyName)
|
|
@@ -86,8 +95,8 @@ class Node:
|
|
|
86
95
|
self._waitForAppliance(role=role, keyName=keyName)
|
|
87
96
|
|
|
88
97
|
def copySshKeys(self, keyName):
|
|
89
|
-
"""
|
|
90
|
-
if keyName ==
|
|
98
|
+
"""Copy authorized_keys file to the core user from the keyName user."""
|
|
99
|
+
if keyName == "core":
|
|
91
100
|
return # No point.
|
|
92
101
|
|
|
93
102
|
# Make sure that keys are there.
|
|
@@ -96,9 +105,17 @@ class Node:
|
|
|
96
105
|
# copy keys to core user so that the ssh calls will work
|
|
97
106
|
# - normal mechanism failed unless public key was in the google-ssh format
|
|
98
107
|
# - even so, the key wasn't copied correctly to the core account
|
|
99
|
-
keyFile =
|
|
100
|
-
self.sshInstance(
|
|
101
|
-
|
|
108
|
+
keyFile = "/home/%s/.ssh/authorized_keys" % keyName
|
|
109
|
+
self.sshInstance(
|
|
110
|
+
"/usr/bin/sudo", "/usr/bin/cp", keyFile, "/home/core/.ssh", user=keyName
|
|
111
|
+
)
|
|
112
|
+
self.sshInstance(
|
|
113
|
+
"/usr/bin/sudo",
|
|
114
|
+
"/usr/bin/chown",
|
|
115
|
+
"core",
|
|
116
|
+
"/home/core/.ssh/authorized_keys",
|
|
117
|
+
user=keyName,
|
|
118
|
+
)
|
|
102
119
|
|
|
103
120
|
def injectFile(self, fromFile, toFile, role):
|
|
104
121
|
"""
|
|
@@ -110,9 +127,13 @@ class Node:
|
|
|
110
127
|
self.coreRsync([fromFile, ":" + toFile], applianceName=role)
|
|
111
128
|
return True
|
|
112
129
|
except Exception as e:
|
|
113
|
-
logger.debug(
|
|
130
|
+
logger.debug(
|
|
131
|
+
"Rsync to new node failed, trying again. Error message: %s" % e
|
|
132
|
+
)
|
|
114
133
|
time.sleep(10 * retry)
|
|
115
|
-
raise RuntimeError(
|
|
134
|
+
raise RuntimeError(
|
|
135
|
+
f"Failed to inject file {fromFile} to {role} with ip {self.effectiveIP}"
|
|
136
|
+
)
|
|
116
137
|
|
|
117
138
|
def extractFile(self, fromFile, toFile, role):
|
|
118
139
|
"""
|
|
@@ -124,74 +145,111 @@ class Node:
|
|
|
124
145
|
self.coreRsync([":" + fromFile, toFile], applianceName=role)
|
|
125
146
|
return True
|
|
126
147
|
except Exception as e:
|
|
127
|
-
logger.debug(
|
|
148
|
+
logger.debug(
|
|
149
|
+
"Rsync from new node failed, trying again. Error message: %s" % e
|
|
150
|
+
)
|
|
128
151
|
time.sleep(10 * retry)
|
|
129
|
-
raise RuntimeError(
|
|
152
|
+
raise RuntimeError(
|
|
153
|
+
f"Failed to extract file {fromFile} from {role} with ip {self.effectiveIP}"
|
|
154
|
+
)
|
|
130
155
|
|
|
131
|
-
def _waitForSSHKeys(self, keyName=
|
|
156
|
+
def _waitForSSHKeys(self, keyName="core"):
|
|
132
157
|
# the propagation of public ssh keys vs. opening the SSH port is racey, so this method blocks until
|
|
133
158
|
# the keys are propagated and the instance can be SSH into
|
|
134
159
|
start_time = time.time()
|
|
135
160
|
last_error = None
|
|
136
161
|
while True:
|
|
137
162
|
if time.time() - start_time > self.maxWaitTime:
|
|
138
|
-
raise RuntimeError(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
163
|
+
raise RuntimeError(
|
|
164
|
+
f"Key propagation failed on machine with ip {self.effectiveIP}."
|
|
165
|
+
+ (
|
|
166
|
+
"\n\nMake sure that your public key is attached to your account and you are using "
|
|
167
|
+
"the correct private key. If you are using a key with a passphrase, be sure to "
|
|
168
|
+
"set up ssh-agent. For details, refer to "
|
|
169
|
+
"https://toil.readthedocs.io/en/latest/running/cloud/cloud.html."
|
|
170
|
+
if last_error and "Permission denied" in last_error
|
|
171
|
+
else ""
|
|
172
|
+
)
|
|
173
|
+
)
|
|
144
174
|
try:
|
|
145
|
-
logger.info(
|
|
146
|
-
|
|
175
|
+
logger.info(
|
|
176
|
+
"Attempting to establish SSH connection to %s@%s...",
|
|
177
|
+
keyName,
|
|
178
|
+
self.effectiveIP,
|
|
179
|
+
)
|
|
180
|
+
self.sshInstance("ps", sshOptions=["-oBatchMode=yes"], user=keyName)
|
|
147
181
|
except RuntimeError as err:
|
|
148
182
|
last_error = str(err)
|
|
149
|
-
logger.info(
|
|
183
|
+
logger.info(
|
|
184
|
+
"Connection rejected, waiting for public SSH key to be propagated. Trying again in 10s."
|
|
185
|
+
)
|
|
150
186
|
time.sleep(10)
|
|
151
187
|
else:
|
|
152
|
-
logger.info(
|
|
188
|
+
logger.info("...SSH connection established.")
|
|
153
189
|
return
|
|
154
190
|
|
|
155
|
-
def _waitForDockerDaemon(self, keyName=
|
|
156
|
-
logger.info(
|
|
191
|
+
def _waitForDockerDaemon(self, keyName="core"):
|
|
192
|
+
logger.info("Waiting for docker on %s to start...", self.effectiveIP)
|
|
157
193
|
sleepTime = 10
|
|
158
194
|
startTime = time.time()
|
|
159
195
|
while True:
|
|
160
196
|
if time.time() - startTime > self.maxWaitTime:
|
|
161
|
-
raise RuntimeError(
|
|
197
|
+
raise RuntimeError(
|
|
198
|
+
"Docker daemon failed to start on machine with ip %s"
|
|
199
|
+
% self.effectiveIP
|
|
200
|
+
)
|
|
162
201
|
try:
|
|
163
|
-
output = self.sshInstance(
|
|
164
|
-
|
|
202
|
+
output = self.sshInstance(
|
|
203
|
+
"/usr/bin/ps", "auxww", sshOptions=["-oBatchMode=yes"], user=keyName
|
|
204
|
+
)
|
|
205
|
+
if b"dockerd" in output:
|
|
165
206
|
# docker daemon has started
|
|
166
|
-
logger.info(
|
|
207
|
+
logger.info("Docker daemon running")
|
|
167
208
|
break
|
|
168
209
|
else:
|
|
169
|
-
logger.info(
|
|
210
|
+
logger.info(
|
|
211
|
+
"... Still waiting for docker daemon, trying in %s sec..."
|
|
212
|
+
% sleepTime
|
|
213
|
+
)
|
|
170
214
|
time.sleep(sleepTime)
|
|
171
215
|
except RuntimeError:
|
|
172
216
|
logger.info("Wait for docker daemon failed ssh, trying again.")
|
|
173
217
|
sleepTime += 20
|
|
174
218
|
|
|
175
|
-
def _waitForAppliance(self, role, keyName=
|
|
176
|
-
logger.info(
|
|
219
|
+
def _waitForAppliance(self, role, keyName="core"):
|
|
220
|
+
logger.info("Waiting for %s Toil appliance to start...", role)
|
|
177
221
|
sleepTime = 20
|
|
178
222
|
startTime = time.time()
|
|
179
223
|
while True:
|
|
180
224
|
if time.time() - startTime > self.maxWaitTime:
|
|
181
|
-
raise RuntimeError(
|
|
182
|
-
|
|
225
|
+
raise RuntimeError(
|
|
226
|
+
"Appliance failed to start on machine with IP: "
|
|
227
|
+
+ self.effectiveIP
|
|
228
|
+
+ "\nCheck if TOIL_APPLIANCE_SELF is set correctly and the container exists."
|
|
229
|
+
)
|
|
183
230
|
try:
|
|
184
|
-
output = self.sshInstance(
|
|
185
|
-
|
|
186
|
-
|
|
231
|
+
output = self.sshInstance(
|
|
232
|
+
"/usr/bin/docker",
|
|
233
|
+
"ps",
|
|
234
|
+
sshOptions=["-oBatchMode=yes"],
|
|
235
|
+
user=keyName,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
role = (
|
|
239
|
+
bytes(role, encoding="utf-8")
|
|
240
|
+
if type(role) != type(output)
|
|
241
|
+
else role
|
|
242
|
+
)
|
|
187
243
|
|
|
188
244
|
if role in output:
|
|
189
|
-
logger.info(
|
|
245
|
+
logger.info("...Toil appliance started")
|
|
190
246
|
break
|
|
191
247
|
else:
|
|
192
|
-
logger.info(
|
|
193
|
-
|
|
194
|
-
|
|
248
|
+
logger.info(
|
|
249
|
+
"...Still waiting for appliance, trying again in %s sec..."
|
|
250
|
+
% sleepTime
|
|
251
|
+
)
|
|
252
|
+
logger.debug(f"Role: {role}\n" f"Output: {output}\n\n")
|
|
195
253
|
time.sleep(sleepTime)
|
|
196
254
|
except RuntimeError:
|
|
197
255
|
# ignore exceptions, keep trying
|
|
@@ -205,13 +263,13 @@ class Node:
|
|
|
205
263
|
:return: the number of unsuccessful attempts to connect to the port before a the first
|
|
206
264
|
success
|
|
207
265
|
"""
|
|
208
|
-
logger.debug(
|
|
266
|
+
logger.debug("Waiting for ssh port on %s to open...", self.effectiveIP)
|
|
209
267
|
for i in count():
|
|
210
268
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
211
269
|
try:
|
|
212
270
|
s.settimeout(a_short_time)
|
|
213
271
|
s.connect((self.effectiveIP, 22))
|
|
214
|
-
logger.debug(
|
|
272
|
+
logger.debug("...ssh port open")
|
|
215
273
|
return i
|
|
216
274
|
except OSError:
|
|
217
275
|
pass
|
|
@@ -225,7 +283,7 @@ class Node:
|
|
|
225
283
|
interactive SSHing. The default value is False. Input=string is passed as
|
|
226
284
|
input to the Popen call.
|
|
227
285
|
"""
|
|
228
|
-
kwargs[
|
|
286
|
+
kwargs["appliance"] = True
|
|
229
287
|
return self.coreSSH(*args, **kwargs)
|
|
230
288
|
|
|
231
289
|
def sshInstance(self, *args, **kwargs):
|
|
@@ -233,7 +291,7 @@ class Node:
|
|
|
233
291
|
Run a command on the instance.
|
|
234
292
|
Returns the binary output of the command.
|
|
235
293
|
"""
|
|
236
|
-
kwargs[
|
|
294
|
+
kwargs["collectStdout"] = True
|
|
237
295
|
return self.coreSSH(*args, **kwargs)
|
|
238
296
|
|
|
239
297
|
def coreSSH(self, *args, **kwargs):
|
|
@@ -249,64 +307,74 @@ class Node:
|
|
|
249
307
|
:param bytes input: UTF-8 encoded input bytes to send to the command
|
|
250
308
|
|
|
251
309
|
"""
|
|
252
|
-
commandTokens = [
|
|
253
|
-
if not kwargs.pop(
|
|
254
|
-
kwargs[
|
|
255
|
-
|
|
256
|
-
|
|
310
|
+
commandTokens = ["ssh", "-tt"]
|
|
311
|
+
if not kwargs.pop("strict", False):
|
|
312
|
+
kwargs["sshOptions"] = [
|
|
313
|
+
"-oUserKnownHostsFile=/dev/null",
|
|
314
|
+
"-oStrictHostKeyChecking=no",
|
|
315
|
+
] + kwargs.get("sshOptions", [])
|
|
316
|
+
sshOptions = kwargs.pop("sshOptions", None)
|
|
257
317
|
# Forward ports:
|
|
258
318
|
# 5050 for Mesos dashboard (although to talk to agents you will need a proxy)
|
|
259
|
-
commandTokens.extend([
|
|
319
|
+
commandTokens.extend(["-L", "5050:localhost:5050"])
|
|
260
320
|
if sshOptions:
|
|
261
321
|
# add specified options to ssh command
|
|
262
322
|
assert isinstance(sshOptions, list)
|
|
263
323
|
commandTokens.extend(sshOptions)
|
|
264
324
|
# specify host
|
|
265
|
-
user = kwargs.pop(
|
|
266
|
-
commandTokens.append(f
|
|
325
|
+
user = kwargs.pop("user", "core") # CHANGED: Is this needed?
|
|
326
|
+
commandTokens.append(f"{user}@{str(self.effectiveIP)}")
|
|
267
327
|
|
|
268
|
-
inputString = kwargs.pop(
|
|
328
|
+
inputString = kwargs.pop("input", None)
|
|
269
329
|
if inputString is not None:
|
|
270
|
-
kwargs[
|
|
330
|
+
kwargs["stdin"] = subprocess.PIPE
|
|
271
331
|
|
|
272
|
-
if kwargs.pop(
|
|
273
|
-
kwargs[
|
|
274
|
-
kwargs[
|
|
332
|
+
if kwargs.pop("collectStdout", None):
|
|
333
|
+
kwargs["stdout"] = subprocess.PIPE
|
|
334
|
+
kwargs["stderr"] = subprocess.PIPE
|
|
275
335
|
|
|
276
|
-
tty = kwargs.pop(
|
|
277
|
-
if kwargs.pop(
|
|
278
|
-
ttyFlag =
|
|
279
|
-
commandTokens += [
|
|
336
|
+
tty = kwargs.pop("tty", None)
|
|
337
|
+
if kwargs.pop("appliance", None):
|
|
338
|
+
ttyFlag = "-t" if tty else ""
|
|
339
|
+
commandTokens += ["docker", "exec", "-i", ttyFlag, "toil_leader"]
|
|
280
340
|
|
|
281
|
-
logger.debug(
|
|
282
|
-
args = list(map(
|
|
341
|
+
logger.debug("Node %s: %s", self.effectiveIP, " ".join(args))
|
|
342
|
+
args = list(map(quote, args))
|
|
283
343
|
commandTokens += args
|
|
284
|
-
logger.debug(
|
|
344
|
+
logger.debug("Full command %s", " ".join(commandTokens))
|
|
285
345
|
process = subprocess.Popen(commandTokens, **kwargs)
|
|
286
346
|
stdout, stderr = process.communicate(input=inputString)
|
|
287
347
|
# at this point the process has already exited, no need for a timeout
|
|
288
348
|
exit_code = process.returncode
|
|
289
349
|
# ssh has been throwing random 255 errors - why?
|
|
290
350
|
if exit_code != 0:
|
|
291
|
-
logger.info(
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
351
|
+
logger.info(
|
|
352
|
+
'Executing the command "%s" on the appliance returned a non-zero '
|
|
353
|
+
"exit code %s with stdout %s and stderr %s"
|
|
354
|
+
% (" ".join(args), exit_code, stdout, stderr)
|
|
355
|
+
)
|
|
356
|
+
raise RuntimeError(
|
|
357
|
+
'Executing the command "%s" on the appliance returned a non-zero '
|
|
358
|
+
"exit code %s with stdout %s and stderr %s"
|
|
359
|
+
% (" ".join(args), exit_code, stdout, stderr)
|
|
360
|
+
)
|
|
297
361
|
return stdout
|
|
298
362
|
|
|
299
|
-
def coreRsync(
|
|
300
|
-
|
|
363
|
+
def coreRsync(
|
|
364
|
+
self, args: list[str], applianceName: str = "toil_leader", **kwargs: Any
|
|
365
|
+
) -> int:
|
|
366
|
+
remoteRsync = (
|
|
367
|
+
"docker exec -i %s rsync -v" % applianceName
|
|
368
|
+
) # Access rsync inside appliance
|
|
301
369
|
parsedArgs = []
|
|
302
370
|
sshCommand = "ssh"
|
|
303
|
-
if not kwargs.pop(
|
|
371
|
+
if not kwargs.pop("strict", False):
|
|
304
372
|
sshCommand = "ssh -oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no"
|
|
305
373
|
hostInserted = False
|
|
306
374
|
# Insert remote host address
|
|
307
375
|
for i in args:
|
|
308
376
|
if i.startswith(":") and not hostInserted:
|
|
309
|
-
user = kwargs.pop(
|
|
377
|
+
user = kwargs.pop("user", "core") # CHANGED: Is this needed?
|
|
310
378
|
i = (f"{user}@{self.effectiveIP}") + i
|
|
311
379
|
hostInserted = True
|
|
312
380
|
elif i.startswith(":") and hostInserted:
|
|
@@ -314,7 +382,7 @@ class Node:
|
|
|
314
382
|
parsedArgs.append(i)
|
|
315
383
|
if not hostInserted:
|
|
316
384
|
raise ValueError("No remote host found in argument list")
|
|
317
|
-
command = [
|
|
385
|
+
command = ["rsync", "-e", sshCommand, "--rsync-path", remoteRsync]
|
|
318
386
|
logger.debug("Running %r.", command + parsedArgs)
|
|
319
387
|
|
|
320
388
|
return subprocess.check_call(command + parsedArgs)
|
toil/realtimeLogger.py
CHANGED
|
@@ -20,7 +20,7 @@ import os.path
|
|
|
20
20
|
import socketserver as SocketServer
|
|
21
21
|
import threading
|
|
22
22
|
from types import TracebackType
|
|
23
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
24
24
|
|
|
25
25
|
from toil.lib.misc import get_public_ip
|
|
26
26
|
from toil.statsAndLogging import set_log_level
|
|
@@ -49,7 +49,7 @@ class LoggingDatagramHandler(SocketServer.BaseRequestHandler):
|
|
|
49
49
|
|
|
50
50
|
try:
|
|
51
51
|
# Parse it as JSON
|
|
52
|
-
message_attrs = json.loads(data.decode(
|
|
52
|
+
message_attrs = json.loads(data.decode("utf-8"))
|
|
53
53
|
# Fluff it up into a proper logging record
|
|
54
54
|
record = logging.makeLogRecord(message_attrs)
|
|
55
55
|
if isinstance(record.args, list):
|
|
@@ -81,7 +81,7 @@ class JSONDatagramHandler(logging.handlers.DatagramHandler):
|
|
|
81
81
|
|
|
82
82
|
def makePickle(self, record: logging.LogRecord) -> bytes:
|
|
83
83
|
"""Actually, encode the record as bare JSON instead."""
|
|
84
|
-
return json.dumps(record.__dict__).encode(
|
|
84
|
+
return json.dumps(record.__dict__).encode("utf-8")
|
|
85
85
|
|
|
86
86
|
|
|
87
87
|
class RealtimeLoggerMetaclass(type):
|
|
@@ -113,7 +113,7 @@ class RealtimeLogger(metaclass=RealtimeLoggerMetaclass):
|
|
|
113
113
|
envPrefix = "TOIL_RT_LOGGING_"
|
|
114
114
|
|
|
115
115
|
# Avoid duplicating the default level everywhere
|
|
116
|
-
defaultLevel =
|
|
116
|
+
defaultLevel = "INFO"
|
|
117
117
|
|
|
118
118
|
# State maintained on server and client
|
|
119
119
|
|
|
@@ -131,19 +131,24 @@ class RealtimeLogger(metaclass=RealtimeLoggerMetaclass):
|
|
|
131
131
|
logger = None
|
|
132
132
|
|
|
133
133
|
@classmethod
|
|
134
|
-
def _startLeader(
|
|
134
|
+
def _startLeader(
|
|
135
|
+
cls, batchSystem: "AbstractBatchSystem", level: str = defaultLevel
|
|
136
|
+
) -> None:
|
|
135
137
|
with cls.lock:
|
|
136
138
|
if cls.initialized == 0:
|
|
137
139
|
cls.initialized += 1
|
|
138
140
|
if level:
|
|
139
|
-
logger.info(
|
|
141
|
+
logger.info("Starting real-time logging.")
|
|
140
142
|
# Start up the logging server
|
|
141
143
|
cls.loggingServer = SocketServer.ThreadingUDPServer(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
server_address=("0.0.0.0", 0),
|
|
145
|
+
RequestHandlerClass=LoggingDatagramHandler,
|
|
146
|
+
)
|
|
144
147
|
|
|
145
148
|
# Set up a thread to do all the serving in the background and exit when we do
|
|
146
|
-
cls.serverThread = threading.Thread(
|
|
149
|
+
cls.serverThread = threading.Thread(
|
|
150
|
+
target=cls.loggingServer.serve_forever
|
|
151
|
+
)
|
|
147
152
|
cls.serverThread.daemon = True
|
|
148
153
|
cls.serverThread.start()
|
|
149
154
|
|
|
@@ -156,28 +161,30 @@ class RealtimeLogger(metaclass=RealtimeLoggerMetaclass):
|
|
|
156
161
|
os.environ[name] = value
|
|
157
162
|
batchSystem.setEnv(name)
|
|
158
163
|
|
|
159
|
-
_setEnv(
|
|
160
|
-
_setEnv(
|
|
164
|
+
_setEnv("ADDRESS", "%s:%i" % (ip, port))
|
|
165
|
+
_setEnv("LEVEL", level)
|
|
161
166
|
else:
|
|
162
|
-
logger.debug(
|
|
167
|
+
logger.debug("Real-time logging disabled")
|
|
163
168
|
else:
|
|
164
169
|
if level:
|
|
165
|
-
logger.warning(
|
|
170
|
+
logger.warning("Ignoring nested request to start real-time logging")
|
|
166
171
|
|
|
167
172
|
@classmethod
|
|
168
173
|
def _stopLeader(cls) -> None:
|
|
169
174
|
"""Stop the server on the leader."""
|
|
170
175
|
with cls.lock:
|
|
171
176
|
if cls.initialized == 0:
|
|
172
|
-
raise RuntimeError(
|
|
177
|
+
raise RuntimeError(
|
|
178
|
+
"Can't stop the server on the leader as the leader was never initialized."
|
|
179
|
+
)
|
|
173
180
|
cls.initialized -= 1
|
|
174
181
|
if cls.initialized == 0:
|
|
175
182
|
if cls.loggingServer:
|
|
176
|
-
logger.info(
|
|
183
|
+
logger.info("Stopping real-time logging server.")
|
|
177
184
|
cls.loggingServer.shutdown()
|
|
178
185
|
cls.loggingServer = None
|
|
179
186
|
if cls.serverThread:
|
|
180
|
-
logger.info(
|
|
187
|
+
logger.info("Joining real-time logging server thread.")
|
|
181
188
|
cls.serverThread.join()
|
|
182
189
|
cls.serverThread = None
|
|
183
190
|
for k in list(os.environ.keys()):
|
|
@@ -198,9 +205,9 @@ class RealtimeLogger(metaclass=RealtimeLoggerMetaclass):
|
|
|
198
205
|
if cls.logger is None:
|
|
199
206
|
with cls.lock:
|
|
200
207
|
if cls.logger is None:
|
|
201
|
-
cls.logger = logging.getLogger(
|
|
208
|
+
cls.logger = logging.getLogger("toil-rt")
|
|
202
209
|
try:
|
|
203
|
-
level = os.environ[cls.envPrefix +
|
|
210
|
+
level = os.environ[cls.envPrefix + "LEVEL"]
|
|
204
211
|
except KeyError:
|
|
205
212
|
# There is no server running on the leader, so suppress most log messages
|
|
206
213
|
# and skip the UDP stuff.
|
|
@@ -209,16 +216,16 @@ class RealtimeLogger(metaclass=RealtimeLoggerMetaclass):
|
|
|
209
216
|
# Adopt the logging level set on the leader.
|
|
210
217
|
set_log_level(level, cls.logger)
|
|
211
218
|
try:
|
|
212
|
-
address = os.environ[cls.envPrefix +
|
|
219
|
+
address = os.environ[cls.envPrefix + "ADDRESS"]
|
|
213
220
|
except KeyError:
|
|
214
221
|
pass
|
|
215
222
|
else:
|
|
216
223
|
# We know where to send messages to, so send them.
|
|
217
|
-
host, port = address.split(
|
|
224
|
+
host, port = address.split(":")
|
|
218
225
|
cls.logger.addHandler(JSONDatagramHandler(host, int(port)))
|
|
219
226
|
return cls.logger
|
|
220
227
|
|
|
221
|
-
def __init__(self, batchSystem:
|
|
228
|
+
def __init__(self, batchSystem: "AbstractBatchSystem", level: str = defaultLevel):
|
|
222
229
|
"""
|
|
223
230
|
Create a context manager that starts up the UDP server.
|
|
224
231
|
|
|
@@ -237,5 +244,10 @@ class RealtimeLogger(metaclass=RealtimeLoggerMetaclass):
|
|
|
237
244
|
RealtimeLogger._startLeader(self.__batchSystem, level=self.__level)
|
|
238
245
|
|
|
239
246
|
# noinspection PyUnusedLocal
|
|
240
|
-
def __exit__(
|
|
247
|
+
def __exit__(
|
|
248
|
+
self,
|
|
249
|
+
exc_type: Optional[type[BaseException]],
|
|
250
|
+
exc_val: Optional[BaseException],
|
|
251
|
+
exc_tb: Optional[TracebackType],
|
|
252
|
+
) -> None:
|
|
241
253
|
RealtimeLogger._stopLeader()
|