toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -24,36 +24,43 @@ from abc import ABCMeta, abstractmethod
|
|
|
24
24
|
from fractions import Fraction
|
|
25
25
|
from unittest import skipIf
|
|
26
26
|
|
|
27
|
-
from toil.batchSystems.abstractBatchSystem import (
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
28
|
+
AbstractBatchSystem,
|
|
29
|
+
BatchSystemSupport,
|
|
30
|
+
InsufficientSystemResources,
|
|
31
|
+
)
|
|
32
|
+
|
|
30
33
|
# Don't import any batch systems here that depend on extras
|
|
31
34
|
# in order to import properly. Import them later, in tests
|
|
32
35
|
# protected by annotations.
|
|
33
36
|
from toil.batchSystems.mesos.test import MesosTestSupport
|
|
34
|
-
from toil.batchSystems.registry import (
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
from toil.batchSystems.registry import (
|
|
38
|
+
add_batch_system_factory,
|
|
39
|
+
get_batch_system,
|
|
40
|
+
get_batch_systems,
|
|
41
|
+
restore_batch_system_plugin_state,
|
|
42
|
+
save_batch_system_plugin_state,
|
|
43
|
+
)
|
|
39
44
|
from toil.batchSystems.singleMachine import SingleMachineBatchSystem
|
|
40
45
|
from toil.common import Config, Toil
|
|
41
46
|
from toil.job import Job, JobDescription, Requirer
|
|
42
47
|
from toil.lib.retry import retry_flaky_test
|
|
43
48
|
from toil.lib.threading import cpu_count
|
|
44
|
-
from toil.test import (
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
49
|
+
from toil.test import (
|
|
50
|
+
ToilTest,
|
|
51
|
+
needs_aws_batch,
|
|
52
|
+
needs_aws_s3,
|
|
53
|
+
needs_fetchable_appliance,
|
|
54
|
+
needs_gridengine,
|
|
55
|
+
needs_htcondor,
|
|
56
|
+
needs_kubernetes,
|
|
57
|
+
needs_kubernetes_installed,
|
|
58
|
+
needs_lsf,
|
|
59
|
+
needs_mesos,
|
|
60
|
+
needs_slurm,
|
|
61
|
+
needs_torque,
|
|
62
|
+
slow,
|
|
63
|
+
)
|
|
57
64
|
|
|
58
65
|
logger = logging.getLogger(__name__)
|
|
59
66
|
|
|
@@ -66,7 +73,10 @@ preemptible = False
|
|
|
66
73
|
|
|
67
74
|
# Since we aren't always attaching the config to the jobs for these tests, we
|
|
68
75
|
# need to use fully specified requirements.
|
|
69
|
-
defaultRequirements = dict(
|
|
76
|
+
defaultRequirements = dict(
|
|
77
|
+
memory=int(100e6), cores=1, disk=1000, preemptible=preemptible, accelerators=[]
|
|
78
|
+
)
|
|
79
|
+
|
|
70
80
|
|
|
71
81
|
class BatchSystemPluginTest(ToilTest):
|
|
72
82
|
"""
|
|
@@ -91,9 +101,10 @@ class BatchSystemPluginTest(ToilTest):
|
|
|
91
101
|
# add its arguments.
|
|
92
102
|
return SingleMachineBatchSystem
|
|
93
103
|
|
|
94
|
-
add_batch_system_factory(
|
|
95
|
-
assert
|
|
96
|
-
assert get_batch_system(
|
|
104
|
+
add_batch_system_factory("testBatchSystem", test_batch_system_factory)
|
|
105
|
+
assert "testBatchSystem" in get_batch_systems()
|
|
106
|
+
assert get_batch_system("testBatchSystem") == SingleMachineBatchSystem
|
|
107
|
+
|
|
97
108
|
|
|
98
109
|
class hidden:
|
|
99
110
|
"""
|
|
@@ -127,8 +138,9 @@ class hidden:
|
|
|
127
138
|
"""
|
|
128
139
|
config = Config()
|
|
129
140
|
from uuid import uuid4
|
|
141
|
+
|
|
130
142
|
config.workflowID = str(uuid4())
|
|
131
|
-
config.cleanWorkDir =
|
|
143
|
+
config.cleanWorkDir = "always"
|
|
132
144
|
return config
|
|
133
145
|
|
|
134
146
|
def _createConfig(self):
|
|
@@ -140,19 +152,16 @@ class hidden:
|
|
|
140
152
|
"""
|
|
141
153
|
return self.createConfig()
|
|
142
154
|
|
|
143
|
-
def _mockJobDescription(self, jobStoreID=None,
|
|
155
|
+
def _mockJobDescription(self, jobStoreID=None, **kwargs):
|
|
144
156
|
"""
|
|
145
|
-
Create a mock-up JobDescription with the given ID
|
|
157
|
+
Create a mock-up JobDescription with the given ID and other parameters.
|
|
146
158
|
"""
|
|
147
159
|
|
|
148
160
|
# TODO: Use a real unittest.Mock? For now we make a real instance and just hack it up.
|
|
149
161
|
|
|
150
162
|
desc = JobDescription(**kwargs)
|
|
151
|
-
# Normally we can't pass in
|
|
152
|
-
#
|
|
153
|
-
# here.
|
|
154
|
-
if command is not None:
|
|
155
|
-
desc.command = command
|
|
163
|
+
# Normally we can't pass in an ID, and the job serialization logic
|
|
164
|
+
# takes care of filling it in. We set it here.
|
|
156
165
|
if jobStoreID is not None:
|
|
157
166
|
desc.jobStoreID = jobStoreID
|
|
158
167
|
|
|
@@ -167,7 +176,7 @@ class hidden:
|
|
|
167
176
|
super().setUp()
|
|
168
177
|
self.config = self._createConfig()
|
|
169
178
|
self.batchSystem = self.createBatchSystem()
|
|
170
|
-
self.tempDir = self._createTempDir(
|
|
179
|
+
self.tempDir = self._createTempDir("testFiles")
|
|
171
180
|
|
|
172
181
|
def tearDown(self):
|
|
173
182
|
self.batchSystem.shutdown()
|
|
@@ -185,12 +194,20 @@ class hidden:
|
|
|
185
194
|
|
|
186
195
|
@retry_flaky_test(prepare=[tearDown, setUp])
|
|
187
196
|
def test_run_jobs(self):
|
|
188
|
-
jobDesc1 = self._mockJobDescription(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
197
|
+
jobDesc1 = self._mockJobDescription(
|
|
198
|
+
jobName="test1",
|
|
199
|
+
unitName=None,
|
|
200
|
+
jobStoreID="1",
|
|
201
|
+
requirements=defaultRequirements,
|
|
202
|
+
)
|
|
203
|
+
jobDesc2 = self._mockJobDescription(
|
|
204
|
+
jobName="test2",
|
|
205
|
+
unitName=None,
|
|
206
|
+
jobStoreID="2",
|
|
207
|
+
requirements=defaultRequirements,
|
|
208
|
+
)
|
|
209
|
+
job1 = self.batchSystem.issueBatchJob("sleep 1000", jobDesc1)
|
|
210
|
+
job2 = self.batchSystem.issueBatchJob("sleep 1000", jobDesc2)
|
|
194
211
|
|
|
195
212
|
issuedIDs = self._waitForJobsToIssue(2)
|
|
196
213
|
self.assertEqual(set(issuedIDs), {job1, job2})
|
|
@@ -205,7 +222,9 @@ class hidden:
|
|
|
205
222
|
# getUpdatedBatchJob, and the sleep time is longer than the time we
|
|
206
223
|
# should spend waiting for both to start, so if our cluster can
|
|
207
224
|
# only run one job at a time, we will fail the test.
|
|
208
|
-
runningJobIDs = self._waitForJobsToStart(
|
|
225
|
+
runningJobIDs = self._waitForJobsToStart(
|
|
226
|
+
2, tries=self.get_max_startup_seconds()
|
|
227
|
+
)
|
|
209
228
|
self.assertEqual(set(runningJobIDs), {job1, job2})
|
|
210
229
|
|
|
211
230
|
# Killing the jobs instead of allowing them to complete means this test can run very
|
|
@@ -219,13 +238,21 @@ class hidden:
|
|
|
219
238
|
# then check for it having happened, but we can't guarantee that
|
|
220
239
|
# the batch system will run against the same filesystem we are
|
|
221
240
|
# looking at.
|
|
222
|
-
jobDesc3 = self._mockJobDescription(
|
|
223
|
-
|
|
224
|
-
|
|
241
|
+
jobDesc3 = self._mockJobDescription(
|
|
242
|
+
jobName="test3",
|
|
243
|
+
unitName=None,
|
|
244
|
+
jobStoreID="3",
|
|
245
|
+
requirements=defaultRequirements,
|
|
246
|
+
)
|
|
247
|
+
job3 = self.batchSystem.issueBatchJob("mktemp -d", jobDesc3)
|
|
225
248
|
|
|
226
249
|
jobUpdateInfo = self.batchSystem.getUpdatedBatchJob(maxWait=1000)
|
|
227
|
-
jobID, exitStatus, wallTime =
|
|
228
|
-
|
|
250
|
+
jobID, exitStatus, wallTime = (
|
|
251
|
+
jobUpdateInfo.jobID,
|
|
252
|
+
jobUpdateInfo.exitStatus,
|
|
253
|
+
jobUpdateInfo.wallTime,
|
|
254
|
+
)
|
|
255
|
+
logger.info(f"Third job completed: {jobID} {exitStatus} {wallTime}")
|
|
229
256
|
|
|
230
257
|
# Since the first two jobs were killed, the only job in the updated jobs queue should
|
|
231
258
|
# be job 3. If the first two jobs were (incorrectly) added to the queue, this will
|
|
@@ -245,47 +272,69 @@ class hidden:
|
|
|
245
272
|
|
|
246
273
|
def test_set_env(self):
|
|
247
274
|
# Start with a relatively safe script
|
|
248
|
-
script_shell =
|
|
275
|
+
script_shell = (
|
|
276
|
+
'if [ "x${FOO}" == "xbar" ] ; then exit 23 ; else exit 42 ; fi'
|
|
277
|
+
)
|
|
249
278
|
|
|
250
279
|
# Escape the semicolons
|
|
251
|
-
script_protected = script_shell.replace(
|
|
280
|
+
script_protected = script_shell.replace(";", r"\;")
|
|
252
281
|
|
|
253
282
|
# Turn into a string which convinces bash to take all args and paste them back together and run them
|
|
254
|
-
command =
|
|
255
|
-
jobDesc4 = self._mockJobDescription(
|
|
256
|
-
|
|
257
|
-
|
|
283
|
+
command = 'bash -c "\\${@}" bash eval ' + script_protected
|
|
284
|
+
jobDesc4 = self._mockJobDescription(
|
|
285
|
+
jobName="test4",
|
|
286
|
+
unitName=None,
|
|
287
|
+
jobStoreID="4",
|
|
288
|
+
requirements=defaultRequirements,
|
|
289
|
+
)
|
|
290
|
+
job4 = self.batchSystem.issueBatchJob(command, jobDesc4)
|
|
258
291
|
jobUpdateInfo = self.batchSystem.getUpdatedBatchJob(maxWait=1000)
|
|
259
|
-
jobID, exitStatus, wallTime =
|
|
292
|
+
jobID, exitStatus, wallTime = (
|
|
293
|
+
jobUpdateInfo.jobID,
|
|
294
|
+
jobUpdateInfo.exitStatus,
|
|
295
|
+
jobUpdateInfo.wallTime,
|
|
296
|
+
)
|
|
260
297
|
self.assertEqual(exitStatus, 42)
|
|
261
298
|
self.assertEqual(jobID, job4)
|
|
262
299
|
# Now set the variable and ensure that it is present
|
|
263
|
-
self.batchSystem.setEnv(
|
|
264
|
-
jobDesc5 = self._mockJobDescription(
|
|
265
|
-
|
|
266
|
-
|
|
300
|
+
self.batchSystem.setEnv("FOO", "bar")
|
|
301
|
+
jobDesc5 = self._mockJobDescription(
|
|
302
|
+
jobName="test5",
|
|
303
|
+
unitName=None,
|
|
304
|
+
jobStoreID="5",
|
|
305
|
+
requirements=defaultRequirements,
|
|
306
|
+
)
|
|
307
|
+
job5 = self.batchSystem.issueBatchJob(command, jobDesc5)
|
|
267
308
|
jobUpdateInfo = self.batchSystem.getUpdatedBatchJob(maxWait=1000)
|
|
268
309
|
self.assertEqual(jobUpdateInfo.exitStatus, 23)
|
|
269
310
|
self.assertEqual(jobUpdateInfo.jobID, job5)
|
|
270
311
|
|
|
271
312
|
def test_set_job_env(self):
|
|
272
|
-
"""
|
|
313
|
+
"""Test the mechanism for setting per-job environment variables to batch system jobs."""
|
|
273
314
|
script = 'if [ "x${FOO}" == "xbar" ] ; then exit 23 ; else exit 42 ; fi'
|
|
274
|
-
command =
|
|
315
|
+
command = 'bash -c "\\${@}" bash eval ' + script.replace(";", r"\;")
|
|
275
316
|
|
|
276
317
|
# Issue a job with a job environment variable
|
|
277
|
-
job_desc_6 = self._mockJobDescription(
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
318
|
+
job_desc_6 = self._mockJobDescription(
|
|
319
|
+
jobName="test6",
|
|
320
|
+
unitName=None,
|
|
321
|
+
jobStoreID="6",
|
|
322
|
+
requirements=defaultRequirements,
|
|
323
|
+
)
|
|
324
|
+
job6 = self.batchSystem.issueBatchJob(
|
|
325
|
+
command, job_desc_6, job_environment={"FOO": "bar"}
|
|
326
|
+
)
|
|
282
327
|
job_update_info = self.batchSystem.getUpdatedBatchJob(maxWait=1000)
|
|
283
328
|
self.assertEqual(job_update_info.exitStatus, 23) # this should succeed
|
|
284
329
|
self.assertEqual(job_update_info.jobID, job6)
|
|
285
330
|
# Now check that the environment variable doesn't exist for other jobs
|
|
286
|
-
job_desc_7 = self._mockJobDescription(
|
|
287
|
-
|
|
288
|
-
|
|
331
|
+
job_desc_7 = self._mockJobDescription(
|
|
332
|
+
jobName="test7",
|
|
333
|
+
unitName=None,
|
|
334
|
+
jobStoreID="7",
|
|
335
|
+
requirements=defaultRequirements,
|
|
336
|
+
)
|
|
337
|
+
job7 = self.batchSystem.issueBatchJob(command, job_desc_7)
|
|
289
338
|
job_update_info = self.batchSystem.getUpdatedBatchJob(maxWait=1000)
|
|
290
339
|
self.assertEqual(job_update_info.exitStatus, 42)
|
|
291
340
|
self.assertEqual(job_update_info.jobID, job7)
|
|
@@ -294,34 +343,67 @@ class hidden:
|
|
|
294
343
|
if isinstance(self.batchSystem, BatchSystemSupport):
|
|
295
344
|
check_resource_request = self.batchSystem.check_resource_request
|
|
296
345
|
# Assuming we have <2000 cores, this should be too many cores
|
|
297
|
-
self.assertRaises(
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
346
|
+
self.assertRaises(
|
|
347
|
+
InsufficientSystemResources,
|
|
348
|
+
check_resource_request,
|
|
349
|
+
Requirer(dict(memory=1000, cores=2000, disk="1G", accelerators=[])),
|
|
350
|
+
)
|
|
351
|
+
self.assertRaises(
|
|
352
|
+
InsufficientSystemResources,
|
|
353
|
+
check_resource_request,
|
|
354
|
+
Requirer(dict(memory=5, cores=2000, disk="1G", accelerators=[])),
|
|
355
|
+
)
|
|
301
356
|
|
|
302
357
|
# This should be too much memory
|
|
303
|
-
self.assertRaises(
|
|
304
|
-
|
|
358
|
+
self.assertRaises(
|
|
359
|
+
InsufficientSystemResources,
|
|
360
|
+
check_resource_request,
|
|
361
|
+
Requirer(dict(memory="5000G", cores=1, disk="1G", accelerators=[])),
|
|
362
|
+
)
|
|
305
363
|
|
|
306
364
|
# This should be too much disk
|
|
307
|
-
self.assertRaises(
|
|
308
|
-
|
|
365
|
+
self.assertRaises(
|
|
366
|
+
InsufficientSystemResources,
|
|
367
|
+
check_resource_request,
|
|
368
|
+
Requirer(dict(memory=5, cores=1, disk="2G", accelerators=[])),
|
|
369
|
+
)
|
|
309
370
|
|
|
310
371
|
# This should be an accelerator we don't have.
|
|
311
372
|
# All the batch systems need code to know they don't have these accelerators.
|
|
312
|
-
self.assertRaises(
|
|
313
|
-
|
|
373
|
+
self.assertRaises(
|
|
374
|
+
InsufficientSystemResources,
|
|
375
|
+
check_resource_request,
|
|
376
|
+
Requirer(
|
|
377
|
+
dict(
|
|
378
|
+
memory=5,
|
|
379
|
+
cores=1,
|
|
380
|
+
disk=100,
|
|
381
|
+
accelerators=[{"kind": "turbo-encabulator", "count": 1}],
|
|
382
|
+
)
|
|
383
|
+
),
|
|
384
|
+
)
|
|
314
385
|
|
|
315
386
|
# These should be missing attributes
|
|
316
|
-
self.assertRaises(
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
387
|
+
self.assertRaises(
|
|
388
|
+
AttributeError,
|
|
389
|
+
check_resource_request,
|
|
390
|
+
Requirer(dict(memory=5, cores=1, disk=1000)),
|
|
391
|
+
)
|
|
392
|
+
self.assertRaises(
|
|
393
|
+
AttributeError,
|
|
394
|
+
check_resource_request,
|
|
395
|
+
Requirer(dict(cores=1, disk=1000, accelerators=[])),
|
|
396
|
+
)
|
|
397
|
+
self.assertRaises(
|
|
398
|
+
AttributeError,
|
|
399
|
+
check_resource_request,
|
|
400
|
+
Requirer(dict(memory=10, disk=1000, accelerators=[])),
|
|
401
|
+
)
|
|
322
402
|
|
|
323
403
|
# This should actually work
|
|
324
|
-
check_resource_request(
|
|
404
|
+
check_resource_request(
|
|
405
|
+
Requirer(dict(memory=10, cores=1, disk=100, accelerators=[]))
|
|
406
|
+
)
|
|
325
407
|
|
|
326
408
|
def testScalableBatchSystem(self):
|
|
327
409
|
# If instance of scalable batch system
|
|
@@ -348,7 +430,7 @@ class hidden:
|
|
|
348
430
|
# prevent an endless loop, give it a few tries
|
|
349
431
|
for it in range(tries):
|
|
350
432
|
running = self.batchSystem.getRunningBatchJobIDs()
|
|
351
|
-
logger.info(f
|
|
433
|
+
logger.info(f"Running jobs now: {running}")
|
|
352
434
|
runningIDs = list(running.keys())
|
|
353
435
|
if len(runningIDs) == numJobs:
|
|
354
436
|
break
|
|
@@ -363,7 +445,7 @@ class hidden:
|
|
|
363
445
|
|
|
364
446
|
cpuCount = cpu_count()
|
|
365
447
|
allocatedCores = sorted({1, 2, cpuCount})
|
|
366
|
-
sleepTime =
|
|
448
|
+
sleepTime = 30
|
|
367
449
|
|
|
368
450
|
@abstractmethod
|
|
369
451
|
def getBatchSystemName(self):
|
|
@@ -398,18 +480,26 @@ class hidden:
|
|
|
398
480
|
Tests that the batch system is allocating core resources properly for concurrent tasks.
|
|
399
481
|
"""
|
|
400
482
|
for coresPerJob in self.allocatedCores:
|
|
401
|
-
tempDir = self._createTempDir(
|
|
483
|
+
tempDir = self._createTempDir("testFiles")
|
|
402
484
|
options = self.getOptions(tempDir)
|
|
403
485
|
|
|
404
|
-
counterPath = os.path.join(tempDir,
|
|
486
|
+
counterPath = os.path.join(tempDir, "counter")
|
|
405
487
|
resetCounters(counterPath)
|
|
406
488
|
value, maxValue = getCounters(counterPath)
|
|
407
489
|
assert (value, maxValue) == (0, 0)
|
|
408
490
|
|
|
409
491
|
root = Job()
|
|
410
492
|
for _ in range(self.cpuCount):
|
|
411
|
-
root.addFollowOn(
|
|
412
|
-
|
|
493
|
+
root.addFollowOn(
|
|
494
|
+
Job.wrapFn(
|
|
495
|
+
measureConcurrency,
|
|
496
|
+
counterPath,
|
|
497
|
+
self.sleepTime,
|
|
498
|
+
cores=coresPerJob,
|
|
499
|
+
memory="1M",
|
|
500
|
+
disk="1Mi",
|
|
501
|
+
)
|
|
502
|
+
)
|
|
413
503
|
with Toil(options) as toil:
|
|
414
504
|
toil.start(root)
|
|
415
505
|
_, maxValue = getCounters(counterPath)
|
|
@@ -423,18 +513,24 @@ class hidden:
|
|
|
423
513
|
# mapping of the number of cores to the OMP_NUM_THREADS value
|
|
424
514
|
0.1: "1",
|
|
425
515
|
1: "1",
|
|
426
|
-
2: "2"
|
|
516
|
+
2: "2",
|
|
427
517
|
}
|
|
428
518
|
|
|
429
519
|
temp_dir = self._createTempDir()
|
|
430
520
|
options = self.getOptions(temp_dir)
|
|
431
521
|
|
|
432
522
|
for cores, expected_omp_threads in test_cases.items():
|
|
433
|
-
if os.environ.get(
|
|
434
|
-
expected_omp_threads = os.environ.get(
|
|
435
|
-
logger.info(
|
|
523
|
+
if os.environ.get("OMP_NUM_THREADS"):
|
|
524
|
+
expected_omp_threads = os.environ.get("OMP_NUM_THREADS")
|
|
525
|
+
logger.info(
|
|
526
|
+
f"OMP_NUM_THREADS is set. Using OMP_NUM_THREADS={expected_omp_threads} instead."
|
|
527
|
+
)
|
|
436
528
|
with Toil(options) as toil:
|
|
437
|
-
output = toil.start(
|
|
529
|
+
output = toil.start(
|
|
530
|
+
Job.wrapFn(
|
|
531
|
+
get_omp_threads, memory="1Mi", cores=cores, disk="1Mi"
|
|
532
|
+
)
|
|
533
|
+
)
|
|
438
534
|
self.assertEqual(output, expected_omp_threads)
|
|
439
535
|
|
|
440
536
|
class AbstractGridEngineBatchSystemTest(AbstractBatchSystemTest):
|
|
@@ -447,9 +543,10 @@ class hidden:
|
|
|
447
543
|
config = super()._createConfig()
|
|
448
544
|
config.statePollingWait = 0.5 # Reduce polling wait so tests run faster
|
|
449
545
|
# can't use _getTestJobStorePath since that method removes the directory
|
|
450
|
-
config.jobStore =
|
|
546
|
+
config.jobStore = "file:" + self._createTempDir("jobStore")
|
|
451
547
|
return config
|
|
452
548
|
|
|
549
|
+
|
|
453
550
|
@needs_kubernetes
|
|
454
551
|
@needs_aws_s3
|
|
455
552
|
@needs_fetchable_appliance
|
|
@@ -464,8 +561,11 @@ class KubernetesBatchSystemTest(hidden.AbstractBatchSystemTest):
|
|
|
464
561
|
def createBatchSystem(self):
|
|
465
562
|
# We know we have Kubernetes so we can import the batch system
|
|
466
563
|
from toil.batchSystems.kubernetes import KubernetesBatchSystem
|
|
467
|
-
|
|
468
|
-
|
|
564
|
+
|
|
565
|
+
return KubernetesBatchSystem(
|
|
566
|
+
config=self.config, maxCores=numCores, maxMemory=1e9, maxDisk=2001
|
|
567
|
+
)
|
|
568
|
+
|
|
469
569
|
|
|
470
570
|
@needs_kubernetes_installed
|
|
471
571
|
class KubernetesBatchSystemBenchTest(ToilTest):
|
|
@@ -489,7 +589,9 @@ class KubernetesBatchSystemBenchTest(ToilTest):
|
|
|
489
589
|
constraints = KubernetesBatchSystem.Placement()
|
|
490
590
|
constraints.set_preemptible(False)
|
|
491
591
|
constraints.apply(normal_spec)
|
|
492
|
-
self.assertEqual(
|
|
592
|
+
self.assertEqual(
|
|
593
|
+
textwrap.dedent(
|
|
594
|
+
"""
|
|
493
595
|
{'node_affinity': {'preferred_during_scheduling_ignored_during_execution': None,
|
|
494
596
|
'required_during_scheduling_ignored_during_execution': {'node_selector_terms': [{'match_expressions': [{'key': 'eks.amazonaws.com/capacityType',
|
|
495
597
|
'operator': 'NotIn',
|
|
@@ -500,14 +602,19 @@ class KubernetesBatchSystemBenchTest(ToilTest):
|
|
|
500
602
|
'match_fields': None}]}},
|
|
501
603
|
'pod_affinity': None,
|
|
502
604
|
'pod_anti_affinity': None}
|
|
503
|
-
"""
|
|
605
|
+
"""
|
|
606
|
+
).strip(),
|
|
607
|
+
str(normal_spec.affinity),
|
|
608
|
+
)
|
|
504
609
|
self.assertEqual(str(normal_spec.tolerations), "None")
|
|
505
610
|
|
|
506
611
|
spot_spec = V1PodSpec(containers=[])
|
|
507
612
|
constraints = KubernetesBatchSystem.Placement()
|
|
508
613
|
constraints.set_preemptible(True)
|
|
509
614
|
constraints.apply(spot_spec)
|
|
510
|
-
self.assertEqual(
|
|
615
|
+
self.assertEqual(
|
|
616
|
+
textwrap.dedent(
|
|
617
|
+
"""
|
|
511
618
|
{'node_affinity': {'preferred_during_scheduling_ignored_during_execution': [{'preference': {'match_expressions': [{'key': 'eks.amazonaws.com/capacityType',
|
|
512
619
|
'operator': 'In',
|
|
513
620
|
'values': ['SPOT']}],
|
|
@@ -521,14 +628,22 @@ class KubernetesBatchSystemBenchTest(ToilTest):
|
|
|
521
628
|
'required_during_scheduling_ignored_during_execution': None},
|
|
522
629
|
'pod_affinity': None,
|
|
523
630
|
'pod_anti_affinity': None}
|
|
524
|
-
"""
|
|
525
|
-
|
|
631
|
+
"""
|
|
632
|
+
).strip(),
|
|
633
|
+
str(spot_spec.affinity),
|
|
634
|
+
)
|
|
635
|
+
self.assertEqual(
|
|
636
|
+
textwrap.dedent(
|
|
637
|
+
"""
|
|
526
638
|
[{'effect': None,
|
|
527
639
|
'key': 'cloud.google.com/gke-preemptible',
|
|
528
640
|
'operator': None,
|
|
529
641
|
'toleration_seconds': None,
|
|
530
642
|
'value': 'true'}]
|
|
531
|
-
"""
|
|
643
|
+
"""
|
|
644
|
+
).strip(),
|
|
645
|
+
str(spot_spec.tolerations),
|
|
646
|
+
)
|
|
532
647
|
|
|
533
648
|
def test_label_constraints(self):
|
|
534
649
|
"""
|
|
@@ -544,11 +659,13 @@ class KubernetesBatchSystemBenchTest(ToilTest):
|
|
|
544
659
|
|
|
545
660
|
spec = V1PodSpec(containers=[])
|
|
546
661
|
constraints = KubernetesBatchSystem.Placement()
|
|
547
|
-
constraints.required_labels = [(
|
|
548
|
-
constraints.desired_labels = [(
|
|
549
|
-
constraints.prohibited_labels = [(
|
|
662
|
+
constraints.required_labels = [("GottaBeSetTo", ["This"])]
|
|
663
|
+
constraints.desired_labels = [("OutghtToBeSetTo", ["That"])]
|
|
664
|
+
constraints.prohibited_labels = [("CannotBe", ["ABadThing"])]
|
|
550
665
|
constraints.apply(spec)
|
|
551
|
-
self.assertEqual(
|
|
666
|
+
self.assertEqual(
|
|
667
|
+
textwrap.dedent(
|
|
668
|
+
"""
|
|
552
669
|
{'node_affinity': {'preferred_during_scheduling_ignored_during_execution': [{'preference': {'match_expressions': [{'key': 'OutghtToBeSetTo',
|
|
553
670
|
'operator': 'In',
|
|
554
671
|
'values': ['That']}],
|
|
@@ -563,7 +680,10 @@ class KubernetesBatchSystemBenchTest(ToilTest):
|
|
|
563
680
|
'match_fields': None}]}},
|
|
564
681
|
'pod_affinity': None,
|
|
565
682
|
'pod_anti_affinity': None}
|
|
566
|
-
"""
|
|
683
|
+
"""
|
|
684
|
+
).strip(),
|
|
685
|
+
str(spec.affinity),
|
|
686
|
+
)
|
|
567
687
|
self.assertEqual(str(spec.tolerations), "None")
|
|
568
688
|
|
|
569
689
|
|
|
@@ -579,13 +699,16 @@ class AWSBatchBatchSystemTest(hidden.AbstractBatchSystemTest):
|
|
|
579
699
|
|
|
580
700
|
def createBatchSystem(self):
|
|
581
701
|
from toil.batchSystems.awsBatch import AWSBatchBatchSystem
|
|
582
|
-
|
|
583
|
-
|
|
702
|
+
|
|
703
|
+
return AWSBatchBatchSystem(
|
|
704
|
+
config=self.config, maxCores=numCores, maxMemory=1e9, maxDisk=2001
|
|
705
|
+
)
|
|
584
706
|
|
|
585
707
|
def get_max_startup_seconds(self) -> int:
|
|
586
708
|
# AWS Batch may need to scale out the compute environment.
|
|
587
709
|
return 300
|
|
588
710
|
|
|
711
|
+
|
|
589
712
|
@slow
|
|
590
713
|
@needs_mesos
|
|
591
714
|
class MesosBatchSystemTest(hidden.AbstractBatchSystemTest, MesosTestSupport):
|
|
@@ -600,7 +723,7 @@ class MesosBatchSystemTest(hidden.AbstractBatchSystemTest, MesosTestSupport):
|
|
|
600
723
|
private IP address
|
|
601
724
|
"""
|
|
602
725
|
config = super().createConfig()
|
|
603
|
-
config.mesos_endpoint =
|
|
726
|
+
config.mesos_endpoint = "localhost:5050"
|
|
604
727
|
return config
|
|
605
728
|
|
|
606
729
|
def supportsWallTime(self):
|
|
@@ -609,19 +732,25 @@ class MesosBatchSystemTest(hidden.AbstractBatchSystemTest, MesosTestSupport):
|
|
|
609
732
|
def createBatchSystem(self):
|
|
610
733
|
# We know we have Mesos so we can import the batch system
|
|
611
734
|
from toil.batchSystems.mesos.batchSystem import MesosBatchSystem
|
|
735
|
+
|
|
612
736
|
self._startMesos(numCores)
|
|
613
|
-
return MesosBatchSystem(
|
|
614
|
-
|
|
737
|
+
return MesosBatchSystem(
|
|
738
|
+
config=self.config, maxCores=numCores, maxMemory=1e9, maxDisk=1001
|
|
739
|
+
)
|
|
615
740
|
|
|
616
741
|
def tearDown(self):
|
|
617
742
|
self._stopMesos()
|
|
618
743
|
super().tearDown()
|
|
619
744
|
|
|
620
745
|
def testIgnoreNode(self):
|
|
621
|
-
self.batchSystem.ignoreNode(
|
|
622
|
-
jobDesc = self._mockJobDescription(
|
|
623
|
-
|
|
624
|
-
|
|
746
|
+
self.batchSystem.ignoreNode("localhost")
|
|
747
|
+
jobDesc = self._mockJobDescription(
|
|
748
|
+
jobName="test2",
|
|
749
|
+
unitName=None,
|
|
750
|
+
jobStoreID="1",
|
|
751
|
+
requirements=defaultRequirements,
|
|
752
|
+
)
|
|
753
|
+
job = self.batchSystem.issueBatchJob("sleep 1000", jobDesc)
|
|
625
754
|
|
|
626
755
|
issuedID = self._waitForJobsToIssue(1)
|
|
627
756
|
self.assertEqual(set(issuedID), {job})
|
|
@@ -638,7 +767,7 @@ def write_temp_file(s: str, temp_dir: str) -> str:
|
|
|
638
767
|
"""
|
|
639
768
|
fd, path = tempfile.mkstemp(dir=temp_dir)
|
|
640
769
|
try:
|
|
641
|
-
encoded = s.encode(
|
|
770
|
+
encoded = s.encode("utf-8")
|
|
642
771
|
assert os.write(fd, encoded) == len(encoded)
|
|
643
772
|
except:
|
|
644
773
|
os.unlink(path)
|
|
@@ -658,8 +787,9 @@ class SingleMachineBatchSystemTest(hidden.AbstractBatchSystemTest):
|
|
|
658
787
|
return True
|
|
659
788
|
|
|
660
789
|
def createBatchSystem(self) -> AbstractBatchSystem:
|
|
661
|
-
return SingleMachineBatchSystem(
|
|
662
|
-
|
|
790
|
+
return SingleMachineBatchSystem(
|
|
791
|
+
config=self.config, maxCores=numCores, maxMemory=1e9, maxDisk=2001
|
|
792
|
+
)
|
|
663
793
|
|
|
664
794
|
def testProcessEscape(self, hide: bool = False) -> None:
|
|
665
795
|
"""
|
|
@@ -680,14 +810,18 @@ class SingleMachineBatchSystemTest(hidden.AbstractBatchSystemTest):
|
|
|
680
810
|
from typing import Any
|
|
681
811
|
|
|
682
812
|
def handle_signal(sig: Any, frame: Any) -> None:
|
|
683
|
-
sys.stderr.write(f
|
|
813
|
+
sys.stderr.write(f"{os.getpid()} ignoring signal {sig}\n")
|
|
684
814
|
|
|
685
|
-
if hasattr(signal,
|
|
815
|
+
if hasattr(signal, "valid_signals"):
|
|
686
816
|
# We can just ask about the signals
|
|
687
817
|
all_signals = signal.valid_signals()
|
|
688
818
|
else:
|
|
689
819
|
# Fish them out by name
|
|
690
|
-
all_signals = [
|
|
820
|
+
all_signals = [
|
|
821
|
+
getattr(signal, n)
|
|
822
|
+
for n in dir(signal)
|
|
823
|
+
if n.startswith("SIG") and not n.startswith("SIG_")
|
|
824
|
+
]
|
|
691
825
|
|
|
692
826
|
for sig in all_signals:
|
|
693
827
|
# Set up to ignore all signals we can and generally be obstinate
|
|
@@ -709,7 +843,7 @@ class SingleMachineBatchSystemTest(hidden.AbstractBatchSystemTest):
|
|
|
709
843
|
fd = os.open(sys.argv[1], os.O_RDONLY)
|
|
710
844
|
fcntl.lockf(fd, fcntl.LOCK_SH)
|
|
711
845
|
|
|
712
|
-
sys.stderr.write(f
|
|
846
|
+
sys.stderr.write(f"{os.getpid()} waiting...\n")
|
|
713
847
|
|
|
714
848
|
while True:
|
|
715
849
|
# Wait around forever
|
|
@@ -721,22 +855,26 @@ class SingleMachineBatchSystemTest(hidden.AbstractBatchSystemTest):
|
|
|
721
855
|
script_path = write_temp_file(self._getScriptSource(script), temp_dir)
|
|
722
856
|
|
|
723
857
|
# We will have all the job processes try and lock this file shared while they are alive.
|
|
724
|
-
lockable_path = write_temp_file(
|
|
858
|
+
lockable_path = write_temp_file("", temp_dir)
|
|
725
859
|
|
|
726
860
|
try:
|
|
727
|
-
command = f
|
|
861
|
+
command = f"{sys.executable} {script_path} {lockable_path}"
|
|
728
862
|
if hide:
|
|
729
863
|
# Tell the children to stop the first child and hide out in the
|
|
730
864
|
# process group it made.
|
|
731
|
-
command +=
|
|
865
|
+
command += " hide"
|
|
732
866
|
|
|
733
867
|
# Start the job
|
|
734
|
-
self.batchSystem.issueBatchJob(
|
|
735
|
-
|
|
868
|
+
self.batchSystem.issueBatchJob(
|
|
869
|
+
command,
|
|
870
|
+
self._mockJobDescription(
|
|
871
|
+
jobName="fork", jobStoreID="1", requirements=defaultRequirements
|
|
872
|
+
),
|
|
873
|
+
)
|
|
736
874
|
# Wait
|
|
737
875
|
time.sleep(10)
|
|
738
876
|
|
|
739
|
-
lockfile = open(lockable_path,
|
|
877
|
+
lockfile = open(lockable_path, "w")
|
|
740
878
|
|
|
741
879
|
if not hide:
|
|
742
880
|
# In hiding mode the job will finish, and the batch system will
|
|
@@ -791,13 +929,14 @@ class MaxCoresSingleMachineBatchSystemTest(ToilTest):
|
|
|
791
929
|
|
|
792
930
|
# Write initial value of counter file containing a tuple of two integers (i, n) where i
|
|
793
931
|
# is the number of currently executing tasks and n the maximum observed value of i
|
|
794
|
-
self.counterPath = write_temp_file(
|
|
932
|
+
self.counterPath = write_temp_file("0,0", temp_dir)
|
|
795
933
|
|
|
796
934
|
def script() -> None:
|
|
797
935
|
import fcntl
|
|
798
936
|
import os
|
|
799
937
|
import sys
|
|
800
938
|
import time
|
|
939
|
+
|
|
801
940
|
def count(delta: int) -> None:
|
|
802
941
|
"""
|
|
803
942
|
Adjust the first integer value in a file by the given amount. If the result
|
|
@@ -807,13 +946,14 @@ class MaxCoresSingleMachineBatchSystemTest(ToilTest):
|
|
|
807
946
|
try:
|
|
808
947
|
fcntl.flock(fd, fcntl.LOCK_EX)
|
|
809
948
|
try:
|
|
810
|
-
s = os.read(fd, 10).decode(
|
|
811
|
-
value, maxValue = list(map(int, s.split(
|
|
949
|
+
s = os.read(fd, 10).decode("utf-8")
|
|
950
|
+
value, maxValue = list(map(int, s.split(",")))
|
|
812
951
|
value += delta
|
|
813
|
-
if value > maxValue:
|
|
952
|
+
if value > maxValue:
|
|
953
|
+
maxValue = value
|
|
814
954
|
os.lseek(fd, 0, 0)
|
|
815
955
|
os.ftruncate(fd, 0)
|
|
816
|
-
os.write(fd, f
|
|
956
|
+
os.write(fd, f"{value},{maxValue}".encode())
|
|
817
957
|
finally:
|
|
818
958
|
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
819
959
|
finally:
|
|
@@ -837,7 +977,7 @@ class MaxCoresSingleMachineBatchSystemTest(ToilTest):
|
|
|
837
977
|
os.unlink(self.counterPath)
|
|
838
978
|
|
|
839
979
|
def scriptCommand(self) -> str:
|
|
840
|
-
return
|
|
980
|
+
return " ".join([sys.executable, self.scriptPath, self.counterPath])
|
|
841
981
|
|
|
842
982
|
@retry_flaky_test(prepare=[tearDown, setUp])
|
|
843
983
|
def test(self):
|
|
@@ -849,7 +989,13 @@ class MaxCoresSingleMachineBatchSystemTest(ToilTest):
|
|
|
849
989
|
minCores = F(1, 10)
|
|
850
990
|
self.assertEqual(float(minCores), SingleMachineBatchSystem.minCores)
|
|
851
991
|
for maxCores in {F(minCores), minCores * 10, F(1), F(numCores, 2), F(numCores)}:
|
|
852
|
-
for coresPerJob in {
|
|
992
|
+
for coresPerJob in {
|
|
993
|
+
F(minCores),
|
|
994
|
+
F(minCores * 10),
|
|
995
|
+
F(1),
|
|
996
|
+
F(maxCores, 2),
|
|
997
|
+
F(maxCores),
|
|
998
|
+
}:
|
|
853
999
|
for load in (F(1, 10), F(1), F(10)):
|
|
854
1000
|
jobs = int(maxCores / coresPerJob * load)
|
|
855
1001
|
if jobs >= 1 and minCores <= coresPerJob < maxCores:
|
|
@@ -859,53 +1005,73 @@ class MaxCoresSingleMachineBatchSystemTest(ToilTest):
|
|
|
859
1005
|
maxCores=float(maxCores),
|
|
860
1006
|
# Ensure that memory or disk requirements don't get in the way.
|
|
861
1007
|
maxMemory=jobs * 10,
|
|
862
|
-
maxDisk=jobs * 10
|
|
1008
|
+
maxDisk=jobs * 10,
|
|
1009
|
+
)
|
|
863
1010
|
try:
|
|
864
1011
|
jobIds = set()
|
|
865
1012
|
for i in range(0, int(jobs)):
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
1013
|
+
desc = JobDescription(
|
|
1014
|
+
requirements=dict(
|
|
1015
|
+
cores=float(coresPerJob),
|
|
1016
|
+
memory=1,
|
|
1017
|
+
disk=1,
|
|
1018
|
+
accelerators=[],
|
|
1019
|
+
preemptible=preemptible,
|
|
1020
|
+
),
|
|
1021
|
+
jobName=str(i),
|
|
1022
|
+
unitName="",
|
|
1023
|
+
)
|
|
1024
|
+
jobIds.add(bs.issueBatchJob(self.scriptCommand(), desc))
|
|
873
1025
|
self.assertEqual(len(jobIds), jobs)
|
|
874
1026
|
while jobIds:
|
|
875
1027
|
job = bs.getUpdatedBatchJob(maxWait=10)
|
|
876
1028
|
self.assertIsNotNone(job)
|
|
877
|
-
jobId, status, wallTime =
|
|
1029
|
+
jobId, status, wallTime = (
|
|
1030
|
+
job.jobID,
|
|
1031
|
+
job.exitStatus,
|
|
1032
|
+
job.wallTime,
|
|
1033
|
+
)
|
|
878
1034
|
self.assertEqual(status, 0)
|
|
879
1035
|
# would raise KeyError on absence
|
|
880
1036
|
jobIds.remove(jobId)
|
|
881
1037
|
finally:
|
|
882
1038
|
bs.shutdown()
|
|
883
|
-
concurrentTasks, maxConcurrentTasks = getCounters(
|
|
1039
|
+
concurrentTasks, maxConcurrentTasks = getCounters(
|
|
1040
|
+
self.counterPath
|
|
1041
|
+
)
|
|
884
1042
|
self.assertEqual(concurrentTasks, 0)
|
|
885
|
-
logger.info(
|
|
886
|
-
|
|
887
|
-
|
|
1043
|
+
logger.info(
|
|
1044
|
+
f"maxCores: {maxCores}, "
|
|
1045
|
+
f"coresPerJob: {coresPerJob}, "
|
|
1046
|
+
f"load: {load}"
|
|
1047
|
+
)
|
|
888
1048
|
# This is the key assertion: we shouldn't run too many jobs.
|
|
889
1049
|
# Because of nondeterminism we can't guarantee hitting the limit.
|
|
890
1050
|
expectedMaxConcurrentTasks = min(maxCores // coresPerJob, jobs)
|
|
891
|
-
self.assertLessEqual(
|
|
1051
|
+
self.assertLessEqual(
|
|
1052
|
+
maxConcurrentTasks, expectedMaxConcurrentTasks
|
|
1053
|
+
)
|
|
892
1054
|
resetCounters(self.counterPath)
|
|
893
1055
|
|
|
894
|
-
@skipIf(
|
|
1056
|
+
@skipIf(
|
|
1057
|
+
SingleMachineBatchSystem.numCores < 3,
|
|
1058
|
+
"Need at least three cores to run this test",
|
|
1059
|
+
)
|
|
895
1060
|
def testServices(self):
|
|
896
1061
|
options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
|
|
897
|
-
options.
|
|
1062
|
+
options.logLevel = "DEBUG"
|
|
898
1063
|
options.maxCores = 3
|
|
899
1064
|
self.assertTrue(options.maxCores <= SingleMachineBatchSystem.numCores)
|
|
900
1065
|
Job.Runner.startToil(Job.wrapJobFn(parentJob, self.scriptCommand()), options)
|
|
901
|
-
with open(self.counterPath,
|
|
1066
|
+
with open(self.counterPath, "r+") as f:
|
|
902
1067
|
s = f.read()
|
|
903
|
-
logger.info(
|
|
1068
|
+
logger.info("Counter is %s", s)
|
|
904
1069
|
self.assertEqual(getCounters(self.counterPath), (0, 3))
|
|
905
1070
|
|
|
906
1071
|
|
|
907
1072
|
# Toil can use only top-level functions so we have to add them here:
|
|
908
1073
|
|
|
1074
|
+
|
|
909
1075
|
def parentJob(job, cmd):
|
|
910
1076
|
job.addChildJobFn(childJob, cmd)
|
|
911
1077
|
|
|
@@ -932,13 +1098,13 @@ class Service(Job.Service):
|
|
|
932
1098
|
self.cmd = cmd
|
|
933
1099
|
|
|
934
1100
|
def start(self, fileStore):
|
|
935
|
-
subprocess.check_call(self.cmd +
|
|
1101
|
+
subprocess.check_call(self.cmd + " 1", shell=True)
|
|
936
1102
|
|
|
937
1103
|
def check(self):
|
|
938
1104
|
return True
|
|
939
1105
|
|
|
940
1106
|
def stop(self, fileStore):
|
|
941
|
-
subprocess.check_call(self.cmd +
|
|
1107
|
+
subprocess.check_call(self.cmd + " -1", shell=True)
|
|
942
1108
|
|
|
943
1109
|
|
|
944
1110
|
@slow
|
|
@@ -950,14 +1116,17 @@ class GridEngineBatchSystemTest(hidden.AbstractGridEngineBatchSystemTest):
|
|
|
950
1116
|
|
|
951
1117
|
def createBatchSystem(self) -> AbstractBatchSystem:
|
|
952
1118
|
from toil.batchSystems.gridengine import GridEngineBatchSystem
|
|
953
|
-
|
|
954
|
-
|
|
1119
|
+
|
|
1120
|
+
return GridEngineBatchSystem(
|
|
1121
|
+
config=self.config, maxCores=numCores, maxMemory=1000e9, maxDisk=1e9
|
|
1122
|
+
)
|
|
955
1123
|
|
|
956
1124
|
def tearDown(self):
|
|
957
1125
|
super().tearDown()
|
|
958
1126
|
# Cleanup GridEngine output log file from qsub
|
|
959
1127
|
from glob import glob
|
|
960
|
-
|
|
1128
|
+
|
|
1129
|
+
for f in glob("toil_job*.o*"):
|
|
961
1130
|
os.unlink(f)
|
|
962
1131
|
|
|
963
1132
|
|
|
@@ -970,14 +1139,17 @@ class SlurmBatchSystemTest(hidden.AbstractGridEngineBatchSystemTest):
|
|
|
970
1139
|
|
|
971
1140
|
def createBatchSystem(self) -> AbstractBatchSystem:
|
|
972
1141
|
from toil.batchSystems.slurm import SlurmBatchSystem
|
|
973
|
-
|
|
974
|
-
|
|
1142
|
+
|
|
1143
|
+
return SlurmBatchSystem(
|
|
1144
|
+
config=self.config, maxCores=numCores, maxMemory=1000e9, maxDisk=1e9
|
|
1145
|
+
)
|
|
975
1146
|
|
|
976
1147
|
def tearDown(self):
|
|
977
1148
|
super().tearDown()
|
|
978
1149
|
# Cleanup 'slurm-%j.out' produced by sbatch
|
|
979
1150
|
from glob import glob
|
|
980
|
-
|
|
1151
|
+
|
|
1152
|
+
for f in glob("slurm-*.out"):
|
|
981
1153
|
os.unlink(f)
|
|
982
1154
|
|
|
983
1155
|
|
|
@@ -987,10 +1159,13 @@ class LSFBatchSystemTest(hidden.AbstractGridEngineBatchSystemTest):
|
|
|
987
1159
|
"""
|
|
988
1160
|
Tests against the LSF batch system
|
|
989
1161
|
"""
|
|
1162
|
+
|
|
990
1163
|
def createBatchSystem(self) -> AbstractBatchSystem:
|
|
991
1164
|
from toil.batchSystems.lsf import LSFBatchSystem
|
|
992
|
-
|
|
993
|
-
|
|
1165
|
+
|
|
1166
|
+
return LSFBatchSystem(
|
|
1167
|
+
config=self.config, maxCores=numCores, maxMemory=1000e9, maxDisk=1e9
|
|
1168
|
+
)
|
|
994
1169
|
|
|
995
1170
|
|
|
996
1171
|
@slow
|
|
@@ -1003,19 +1178,22 @@ class TorqueBatchSystemTest(hidden.AbstractGridEngineBatchSystemTest):
|
|
|
1003
1178
|
def _createDummyConfig(self):
|
|
1004
1179
|
config = super()._createDummyConfig()
|
|
1005
1180
|
# can't use _getTestJobStorePath since that method removes the directory
|
|
1006
|
-
config.jobStore = self._createTempDir(
|
|
1181
|
+
config.jobStore = self._createTempDir("jobStore")
|
|
1007
1182
|
return config
|
|
1008
1183
|
|
|
1009
1184
|
def createBatchSystem(self) -> AbstractBatchSystem:
|
|
1010
1185
|
from toil.batchSystems.torque import TorqueBatchSystem
|
|
1011
|
-
|
|
1012
|
-
|
|
1186
|
+
|
|
1187
|
+
return TorqueBatchSystem(
|
|
1188
|
+
config=self.config, maxCores=numCores, maxMemory=1000e9, maxDisk=1e9
|
|
1189
|
+
)
|
|
1013
1190
|
|
|
1014
1191
|
def tearDown(self):
|
|
1015
1192
|
super().tearDown()
|
|
1016
1193
|
# Cleanup 'toil_job-%j.out' produced by sbatch
|
|
1017
1194
|
from glob import glob
|
|
1018
|
-
|
|
1195
|
+
|
|
1196
|
+
for f in glob("toil_job_*.[oe]*"):
|
|
1019
1197
|
os.unlink(f)
|
|
1020
1198
|
|
|
1021
1199
|
|
|
@@ -1028,8 +1206,10 @@ class HTCondorBatchSystemTest(hidden.AbstractGridEngineBatchSystemTest):
|
|
|
1028
1206
|
|
|
1029
1207
|
def createBatchSystem(self) -> AbstractBatchSystem:
|
|
1030
1208
|
from toil.batchSystems.htcondor import HTCondorBatchSystem
|
|
1031
|
-
|
|
1032
|
-
|
|
1209
|
+
|
|
1210
|
+
return HTCondorBatchSystem(
|
|
1211
|
+
config=self.config, maxCores=numCores, maxMemory=1000e9, maxDisk=1e9
|
|
1212
|
+
)
|
|
1033
1213
|
|
|
1034
1214
|
def tearDown(self):
|
|
1035
1215
|
super().tearDown()
|
|
@@ -1044,46 +1224,71 @@ class SingleMachineBatchSystemJobTest(hidden.AbstractBatchSystemJobTest):
|
|
|
1044
1224
|
return "single_machine"
|
|
1045
1225
|
|
|
1046
1226
|
@slow
|
|
1047
|
-
@retry_flaky_test(
|
|
1227
|
+
@retry_flaky_test(
|
|
1228
|
+
prepare=[
|
|
1229
|
+
hidden.AbstractBatchSystemJobTest.tearDown,
|
|
1230
|
+
hidden.AbstractBatchSystemJobTest.setUp,
|
|
1231
|
+
]
|
|
1232
|
+
)
|
|
1048
1233
|
def testConcurrencyWithDisk(self):
|
|
1049
1234
|
"""
|
|
1050
1235
|
Tests that the batch system is allocating disk resources properly
|
|
1051
1236
|
"""
|
|
1052
|
-
tempDir = self._createTempDir(
|
|
1237
|
+
tempDir = self._createTempDir("testFiles")
|
|
1053
1238
|
|
|
1054
1239
|
options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
|
|
1055
1240
|
options.workDir = tempDir
|
|
1056
1241
|
from toil import physicalDisk
|
|
1242
|
+
|
|
1057
1243
|
availableDisk = physicalDisk(options.workDir)
|
|
1058
|
-
logger.info(
|
|
1244
|
+
logger.info("Testing disk concurrency limits with %s disk space", availableDisk)
|
|
1059
1245
|
# More disk might become available by the time Toil starts, so we limit it here
|
|
1060
1246
|
options.maxDisk = availableDisk
|
|
1061
1247
|
options.batchSystem = self.batchSystemName
|
|
1062
1248
|
|
|
1063
|
-
counterPath = os.path.join(tempDir,
|
|
1249
|
+
counterPath = os.path.join(tempDir, "counter")
|
|
1064
1250
|
resetCounters(counterPath)
|
|
1065
1251
|
value, maxValue = getCounters(counterPath)
|
|
1066
1252
|
assert (value, maxValue) == (0, 0)
|
|
1067
1253
|
|
|
1068
1254
|
half_disk = availableDisk // 2
|
|
1069
1255
|
more_than_half_disk = half_disk + 500
|
|
1070
|
-
logger.info(
|
|
1256
|
+
logger.info("Dividing into parts of %s and %s", half_disk, more_than_half_disk)
|
|
1071
1257
|
|
|
1072
1258
|
root = Job()
|
|
1073
1259
|
# Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The
|
|
1074
1260
|
# batchsystem should not allow the 2 child jobs to run concurrently.
|
|
1075
|
-
root.addChild(
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1261
|
+
root.addChild(
|
|
1262
|
+
Job.wrapFn(
|
|
1263
|
+
measureConcurrency,
|
|
1264
|
+
counterPath,
|
|
1265
|
+
self.sleepTime,
|
|
1266
|
+
cores=1,
|
|
1267
|
+
memory="1M",
|
|
1268
|
+
disk=half_disk,
|
|
1269
|
+
)
|
|
1270
|
+
)
|
|
1271
|
+
root.addChild(
|
|
1272
|
+
Job.wrapFn(
|
|
1273
|
+
measureConcurrency,
|
|
1274
|
+
counterPath,
|
|
1275
|
+
self.sleepTime,
|
|
1276
|
+
cores=1,
|
|
1277
|
+
memory="1M",
|
|
1278
|
+
disk=more_than_half_disk,
|
|
1279
|
+
)
|
|
1280
|
+
)
|
|
1079
1281
|
Job.Runner.startToil(root, options)
|
|
1080
1282
|
_, maxValue = getCounters(counterPath)
|
|
1081
1283
|
|
|
1082
|
-
logger.info(
|
|
1284
|
+
logger.info("After run: %s disk space", physicalDisk(options.workDir))
|
|
1083
1285
|
|
|
1084
1286
|
self.assertEqual(maxValue, 1)
|
|
1085
1287
|
|
|
1086
|
-
@skipIf(
|
|
1288
|
+
@skipIf(
|
|
1289
|
+
SingleMachineBatchSystem.numCores < 4,
|
|
1290
|
+
"Need at least four cores to run this test",
|
|
1291
|
+
)
|
|
1087
1292
|
@slow
|
|
1088
1293
|
def testNestedResourcesDoNotBlock(self):
|
|
1089
1294
|
"""
|
|
@@ -1091,39 +1296,80 @@ class SingleMachineBatchSystemJobTest(hidden.AbstractBatchSystemJobTest):
|
|
|
1091
1296
|
Test that unavailability of cpus for one job that is scheduled does not block another job
|
|
1092
1297
|
that can run.
|
|
1093
1298
|
"""
|
|
1094
|
-
tempDir = self._createTempDir(
|
|
1299
|
+
tempDir = self._createTempDir("testFiles")
|
|
1095
1300
|
|
|
1096
1301
|
options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
|
|
1097
1302
|
options.workDir = tempDir
|
|
1098
1303
|
options.maxCores = 4
|
|
1099
1304
|
from toil import physicalMemory
|
|
1305
|
+
|
|
1100
1306
|
availableMemory = physicalMemory()
|
|
1101
1307
|
options.batchSystem = self.batchSystemName
|
|
1102
1308
|
|
|
1103
|
-
outFile = os.path.join(tempDir,
|
|
1104
|
-
open(outFile,
|
|
1309
|
+
outFile = os.path.join(tempDir, "counter")
|
|
1310
|
+
open(outFile, "w").close()
|
|
1105
1311
|
|
|
1106
1312
|
root = Job()
|
|
1107
1313
|
|
|
1108
|
-
blocker = Job.wrapFn(
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1314
|
+
blocker = Job.wrapFn(
|
|
1315
|
+
_resourceBlockTestAuxFn,
|
|
1316
|
+
outFile=outFile,
|
|
1317
|
+
sleepTime=30,
|
|
1318
|
+
writeVal="b",
|
|
1319
|
+
cores=2,
|
|
1320
|
+
memory="1M",
|
|
1321
|
+
disk="1M",
|
|
1322
|
+
)
|
|
1323
|
+
firstJob = Job.wrapFn(
|
|
1324
|
+
_resourceBlockTestAuxFn,
|
|
1325
|
+
outFile=outFile,
|
|
1326
|
+
sleepTime=5,
|
|
1327
|
+
writeVal="fJ",
|
|
1328
|
+
cores=1,
|
|
1329
|
+
memory="1M",
|
|
1330
|
+
disk="1M",
|
|
1331
|
+
)
|
|
1332
|
+
secondJob = Job.wrapFn(
|
|
1333
|
+
_resourceBlockTestAuxFn,
|
|
1334
|
+
outFile=outFile,
|
|
1335
|
+
sleepTime=10,
|
|
1336
|
+
writeVal="sJ",
|
|
1337
|
+
cores=1,
|
|
1338
|
+
memory="1M",
|
|
1339
|
+
disk="1M",
|
|
1340
|
+
)
|
|
1114
1341
|
|
|
1115
1342
|
# Should block off 50% of memory while waiting for it's 3 cores
|
|
1116
|
-
firstJobChild = Job.wrapFn(
|
|
1117
|
-
|
|
1343
|
+
firstJobChild = Job.wrapFn(
|
|
1344
|
+
_resourceBlockTestAuxFn,
|
|
1345
|
+
outFile=outFile,
|
|
1346
|
+
sleepTime=0,
|
|
1347
|
+
writeVal="fJC",
|
|
1348
|
+
cores=3,
|
|
1349
|
+
memory=int(availableMemory // 2),
|
|
1350
|
+
disk="1M",
|
|
1351
|
+
)
|
|
1118
1352
|
|
|
1119
1353
|
# These two shouldn't be able to run before B because there should be only
|
|
1120
1354
|
# (50% of memory - 1M) available (firstJobChild should be blocking 50%)
|
|
1121
|
-
secondJobChild = Job.wrapFn(
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1355
|
+
secondJobChild = Job.wrapFn(
|
|
1356
|
+
_resourceBlockTestAuxFn,
|
|
1357
|
+
outFile=outFile,
|
|
1358
|
+
sleepTime=5,
|
|
1359
|
+
writeVal="sJC",
|
|
1360
|
+
cores=2,
|
|
1361
|
+
memory=int(availableMemory // 1.5),
|
|
1362
|
+
disk="1M",
|
|
1363
|
+
)
|
|
1364
|
+
secondJobGrandChild = Job.wrapFn(
|
|
1365
|
+
_resourceBlockTestAuxFn,
|
|
1366
|
+
outFile=outFile,
|
|
1367
|
+
sleepTime=5,
|
|
1368
|
+
writeVal="sJGC",
|
|
1369
|
+
cores=2,
|
|
1370
|
+
memory=int(availableMemory // 1.5),
|
|
1371
|
+
disk="1M",
|
|
1372
|
+
)
|
|
1127
1373
|
|
|
1128
1374
|
root.addChild(blocker)
|
|
1129
1375
|
root.addChild(firstJob)
|
|
@@ -1153,9 +1399,11 @@ class SingleMachineBatchSystemJobTest(hidden.AbstractBatchSystemJobTest):
|
|
|
1153
1399
|
outString = oFH.read()
|
|
1154
1400
|
# The ordering of b, fJ and sJ is non-deterministic since they are scheduled at the same
|
|
1155
1401
|
# time. We look for all possible permutations.
|
|
1156
|
-
possibleStarts = tuple(
|
|
1402
|
+
possibleStarts = tuple(
|
|
1403
|
+
"".join(x) for x in itertools.permutations(["b", "fJ", "sJ"])
|
|
1404
|
+
)
|
|
1157
1405
|
assert outString.startswith(possibleStarts)
|
|
1158
|
-
assert outString.endswith(
|
|
1406
|
+
assert outString.endswith("sJCsJGCfJC")
|
|
1159
1407
|
|
|
1160
1408
|
|
|
1161
1409
|
def _resourceBlockTestAuxFn(outFile, sleepTime, writeVal):
|
|
@@ -1165,7 +1413,7 @@ def _resourceBlockTestAuxFn(outFile, sleepTime, writeVal):
|
|
|
1165
1413
|
:param int sleepTime: Time to sleep for
|
|
1166
1414
|
:param str writeVal: Character to write
|
|
1167
1415
|
"""
|
|
1168
|
-
with open(outFile,
|
|
1416
|
+
with open(outFile, "a") as oFH:
|
|
1169
1417
|
fcntl.flock(oFH, fcntl.LOCK_EX)
|
|
1170
1418
|
oFH.write(writeVal)
|
|
1171
1419
|
time.sleep(sleepTime)
|
|
@@ -1177,9 +1425,10 @@ class MesosBatchSystemJobTest(hidden.AbstractBatchSystemJobTest, MesosTestSuppor
|
|
|
1177
1425
|
"""
|
|
1178
1426
|
Tests Toil workflow against the Mesos batch system
|
|
1179
1427
|
"""
|
|
1428
|
+
|
|
1180
1429
|
def getOptions(self, tempDir):
|
|
1181
1430
|
options = super().getOptions(tempDir)
|
|
1182
|
-
options.mesos_endpoint =
|
|
1431
|
+
options.mesos_endpoint = "localhost:5050"
|
|
1183
1432
|
return options
|
|
1184
1433
|
|
|
1185
1434
|
def getBatchSystemName(self):
|
|
@@ -1220,12 +1469,13 @@ def count(delta, file_path):
|
|
|
1220
1469
|
fcntl.flock(fd, fcntl.LOCK_EX)
|
|
1221
1470
|
try:
|
|
1222
1471
|
s = os.read(fd, 10)
|
|
1223
|
-
value, maxValue = (int(i) for i in s.decode(
|
|
1472
|
+
value, maxValue = (int(i) for i in s.decode("utf-8").split(","))
|
|
1224
1473
|
value += delta
|
|
1225
|
-
if value > maxValue:
|
|
1474
|
+
if value > maxValue:
|
|
1475
|
+
maxValue = value
|
|
1226
1476
|
os.lseek(fd, 0, 0)
|
|
1227
1477
|
os.ftruncate(fd, 0)
|
|
1228
|
-
os.write(fd, f
|
|
1478
|
+
os.write(fd, f"{value},{maxValue}".encode())
|
|
1229
1479
|
finally:
|
|
1230
1480
|
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
1231
1481
|
finally:
|
|
@@ -1234,8 +1484,8 @@ def count(delta, file_path):
|
|
|
1234
1484
|
|
|
1235
1485
|
|
|
1236
1486
|
def getCounters(path):
|
|
1237
|
-
with open(path,
|
|
1238
|
-
concurrentTasks, maxConcurrentTasks = (int(i) for i in f.read().split(
|
|
1487
|
+
with open(path, "r+") as f:
|
|
1488
|
+
concurrentTasks, maxConcurrentTasks = (int(i) for i in f.read().split(","))
|
|
1239
1489
|
return concurrentTasks, maxConcurrentTasks
|
|
1240
1490
|
|
|
1241
1491
|
|
|
@@ -1246,4 +1496,4 @@ def resetCounters(path):
|
|
|
1246
1496
|
|
|
1247
1497
|
|
|
1248
1498
|
def get_omp_threads() -> str:
|
|
1249
|
-
return os.environ[
|
|
1499
|
+
return os.environ["OMP_NUM_THREADS"]
|