toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -19,13 +19,15 @@ from uuid import uuid4
|
|
|
19
19
|
|
|
20
20
|
import pytest
|
|
21
21
|
|
|
22
|
-
from toil.test import (
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
from toil.test import (
|
|
23
|
+
ToilTest,
|
|
24
|
+
integrative,
|
|
25
|
+
needs_fetchable_appliance,
|
|
26
|
+
needs_google_project,
|
|
27
|
+
needs_google_storage,
|
|
28
|
+
slow,
|
|
29
|
+
timeLimit,
|
|
30
|
+
)
|
|
29
31
|
from toil.version import exactPython
|
|
30
32
|
|
|
31
33
|
log = logging.getLogger(__name__)
|
|
@@ -37,50 +39,62 @@ log = logging.getLogger(__name__)
|
|
|
37
39
|
@needs_fetchable_appliance
|
|
38
40
|
@slow
|
|
39
41
|
class AbstractGCEAutoscaleTest(ToilTest):
|
|
40
|
-
projectID = os.getenv(
|
|
42
|
+
projectID = os.getenv("TOIL_GOOGLE_PROJECTID")
|
|
41
43
|
|
|
42
44
|
def sshUtil(self, command):
|
|
43
|
-
baseCommand = [
|
|
45
|
+
baseCommand = ["toil", "ssh-cluster", "--insecure", "-p=gce", self.clusterName]
|
|
44
46
|
callCommand = baseCommand + command
|
|
45
47
|
subprocess.check_call(callCommand)
|
|
46
48
|
|
|
47
49
|
def rsyncUtil(self, src, dest):
|
|
48
|
-
baseCommand = [
|
|
50
|
+
baseCommand = [
|
|
51
|
+
"toil",
|
|
52
|
+
"rsync-cluster",
|
|
53
|
+
"--insecure",
|
|
54
|
+
"-p=gce",
|
|
55
|
+
self.clusterName,
|
|
56
|
+
]
|
|
49
57
|
callCommand = baseCommand + [src, dest]
|
|
50
58
|
subprocess.check_call(callCommand)
|
|
51
59
|
|
|
52
60
|
def destroyClusterUtil(self):
|
|
53
|
-
callCommand = [
|
|
61
|
+
callCommand = ["toil", "destroy-cluster", "-p=gce", self.clusterName]
|
|
54
62
|
subprocess.check_call(callCommand)
|
|
55
63
|
|
|
56
64
|
def createClusterUtil(self, args=None):
|
|
57
65
|
if args is None:
|
|
58
66
|
args = []
|
|
59
|
-
callCommand = [
|
|
60
|
-
|
|
67
|
+
callCommand = [
|
|
68
|
+
"toil",
|
|
69
|
+
"launch-cluster",
|
|
70
|
+
self.clusterName,
|
|
71
|
+
"-p=gce",
|
|
72
|
+
"--keyPairName=%s" % self.keyName,
|
|
73
|
+
"--leaderNodeType=%s" % self.leaderInstanceType,
|
|
74
|
+
"--zone=%s" % self.googleZone,
|
|
75
|
+
]
|
|
61
76
|
if self.botoDir is not None:
|
|
62
|
-
callCommand += [
|
|
77
|
+
callCommand += ["--boto=%s" % self.botoDir]
|
|
63
78
|
callCommand = callCommand + args if args else callCommand
|
|
64
|
-
log.info("createClusterUtil: %s" %
|
|
79
|
+
log.info("createClusterUtil: %s" % "".join(callCommand))
|
|
65
80
|
subprocess.check_call(callCommand)
|
|
66
81
|
|
|
67
82
|
def cleanJobStoreUtil(self):
|
|
68
|
-
callCommand = [
|
|
83
|
+
callCommand = ["toil", "clean", self.jobStore]
|
|
69
84
|
subprocess.check_call(callCommand)
|
|
70
85
|
|
|
71
86
|
def __init__(self, methodName):
|
|
72
87
|
super().__init__(methodName=methodName)
|
|
73
88
|
# TODO: add TOIL_GOOGLE_KEYNAME to needs_google_project or ssh with SA account
|
|
74
|
-
self.keyName = os.getenv(
|
|
89
|
+
self.keyName = os.getenv("TOIL_GOOGLE_KEYNAME")
|
|
75
90
|
# TODO: remove this when switching to google jobstore
|
|
76
|
-
self.botoDir = os.getenv(
|
|
91
|
+
self.botoDir = os.getenv("TOIL_BOTO_DIR")
|
|
77
92
|
# TODO: get this from SA account or add an environment variable
|
|
78
|
-
self.googleZone =
|
|
93
|
+
self.googleZone = "us-west1-a"
|
|
79
94
|
|
|
80
|
-
|
|
81
|
-
self.leaderInstanceType = 'n1-standard-1'
|
|
95
|
+
self.leaderInstanceType = "n1-standard-1"
|
|
82
96
|
self.instanceTypes = ["n1-standard-2"]
|
|
83
|
-
self.numWorkers = [
|
|
97
|
+
self.numWorkers = ["2"]
|
|
84
98
|
self.numSamples = 2
|
|
85
99
|
self.spotBid = 0.15
|
|
86
100
|
|
|
@@ -92,7 +106,7 @@ class AbstractGCEAutoscaleTest(ToilTest):
|
|
|
92
106
|
self.destroyClusterUtil()
|
|
93
107
|
self.cleanJobStoreUtil()
|
|
94
108
|
|
|
95
|
-
#def getMatchingRoles(self, clusterName):
|
|
109
|
+
# def getMatchingRoles(self, clusterName):
|
|
96
110
|
# ctx = AWSProvisioner._buildContext(clusterName)
|
|
97
111
|
# roles = list(ctx.local_roles())
|
|
98
112
|
# return roles
|
|
@@ -107,7 +121,6 @@ class AbstractGCEAutoscaleTest(ToilTest):
|
|
|
107
121
|
"""
|
|
108
122
|
raise NotImplementedError()
|
|
109
123
|
|
|
110
|
-
|
|
111
124
|
@abstractmethod
|
|
112
125
|
def _runScript(self, toilOptions):
|
|
113
126
|
"""
|
|
@@ -127,82 +140,103 @@ class AbstractGCEAutoscaleTest(ToilTest):
|
|
|
127
140
|
self.launchCluster()
|
|
128
141
|
|
|
129
142
|
# TODO: What is the point of this test?
|
|
130
|
-
#assert len(self.getMatchingRoles(self.clusterName)) == 1
|
|
143
|
+
# assert len(self.getMatchingRoles(self.clusterName)) == 1
|
|
131
144
|
|
|
132
145
|
# TODO: Add a check of leader and node storage size if set.
|
|
133
146
|
|
|
134
147
|
# --never-download prevents silent upgrades to pip, wheel and setuptools
|
|
135
|
-
venv_command = [
|
|
136
|
-
|
|
148
|
+
venv_command = [
|
|
149
|
+
"virtualenv",
|
|
150
|
+
"--system-site-packages",
|
|
151
|
+
"--never-download",
|
|
152
|
+
"--python",
|
|
153
|
+
exactPython,
|
|
154
|
+
"/home/venv",
|
|
155
|
+
]
|
|
137
156
|
self.sshUtil(venv_command)
|
|
138
157
|
|
|
139
158
|
self._getScript()
|
|
140
159
|
|
|
141
|
-
toilOptions = [
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
160
|
+
toilOptions = [
|
|
161
|
+
self.jobStore,
|
|
162
|
+
"--batchSystem=mesos",
|
|
163
|
+
"--workDir=/var/lib/toil",
|
|
164
|
+
"--clean=always",
|
|
165
|
+
"--retryCount=2",
|
|
166
|
+
"--clusterStats=/home/",
|
|
167
|
+
"--logDebug",
|
|
168
|
+
"--logFile=/home/sort.log",
|
|
169
|
+
"--provisioner=gce",
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
toilOptions.extend(
|
|
173
|
+
[
|
|
174
|
+
"--nodeTypes=" + ",".join(self.instanceTypes),
|
|
175
|
+
"--maxNodes=%s" % ",".join(self.numWorkers),
|
|
176
|
+
]
|
|
177
|
+
)
|
|
153
178
|
if preemptibleJobs:
|
|
154
|
-
toilOptions.extend([
|
|
179
|
+
toilOptions.extend(["--defaultPreemptible"])
|
|
155
180
|
|
|
156
181
|
self._runScript(toilOptions)
|
|
157
182
|
|
|
158
|
-
#TODO: Does this just check if it is still running?
|
|
159
|
-
#assert len(self.getMatchingRoles(self.clusterName)) == 1
|
|
183
|
+
# TODO: Does this just check if it is still running?
|
|
184
|
+
# assert len(self.getMatchingRoles(self.clusterName)) == 1
|
|
160
185
|
|
|
161
|
-
checkStatsCommand = [
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
186
|
+
checkStatsCommand = [
|
|
187
|
+
"/home/venv/bin/python",
|
|
188
|
+
"-c",
|
|
189
|
+
"import json; import os; "
|
|
190
|
+
'json.load(open("/home/" + [f for f in os.listdir("/home/") '
|
|
191
|
+
'if f.endswith(".json")].pop()))',
|
|
192
|
+
]
|
|
166
193
|
|
|
167
194
|
self.sshUtil(checkStatsCommand)
|
|
168
195
|
|
|
169
|
-
|
|
170
196
|
# TODO: Add a check to make sure everything is cleaned up.
|
|
171
197
|
|
|
172
198
|
|
|
173
|
-
|
|
174
199
|
@pytest.mark.timeout(1600)
|
|
175
200
|
class GCEAutoscaleTest(AbstractGCEAutoscaleTest):
|
|
176
201
|
|
|
177
202
|
def __init__(self, name):
|
|
178
203
|
super().__init__(name)
|
|
179
|
-
self.clusterName =
|
|
204
|
+
self.clusterName = "provisioner-test-" + str(uuid4())
|
|
180
205
|
self.requestedLeaderStorage = 80
|
|
181
206
|
|
|
182
207
|
def setUp(self):
|
|
183
208
|
super().setUp()
|
|
184
|
-
self.jobStore = f
|
|
209
|
+
self.jobStore = f"google:{self.projectID}:autoscale-{uuid4()}"
|
|
185
210
|
|
|
186
211
|
def _getScript(self):
|
|
187
212
|
# TODO: Isn't this the key file?
|
|
188
213
|
fileToSort = os.path.join(os.getcwd(), str(uuid4()))
|
|
189
|
-
with open(fileToSort,
|
|
214
|
+
with open(fileToSort, "w") as f:
|
|
190
215
|
# Fixme: making this file larger causes the test to hang
|
|
191
|
-
f.write(
|
|
192
|
-
self.rsyncUtil(
|
|
193
|
-
|
|
216
|
+
f.write("01234567890123456789012345678901")
|
|
217
|
+
self.rsyncUtil(
|
|
218
|
+
os.path.join(self._projectRootPath(), "src/toil/test/sort/sort.py"),
|
|
219
|
+
":/home/sort.py",
|
|
220
|
+
)
|
|
221
|
+
self.rsyncUtil(fileToSort, ":/home/sortFile")
|
|
194
222
|
os.unlink(fileToSort)
|
|
195
223
|
|
|
196
224
|
def _runScript(self, toilOptions):
|
|
197
|
-
runCommand = [
|
|
225
|
+
runCommand = [
|
|
226
|
+
"/home/venv/bin/python",
|
|
227
|
+
"/home/sort.py",
|
|
228
|
+
"--fileToSort=/home/sortFile",
|
|
229
|
+
]
|
|
198
230
|
#'--sseKey=/home/sortFile']
|
|
199
231
|
runCommand.extend(toilOptions)
|
|
200
|
-
log.info("_runScript: %s" %
|
|
232
|
+
log.info("_runScript: %s" % "".join(runCommand))
|
|
201
233
|
self.sshUtil(runCommand)
|
|
202
234
|
|
|
203
235
|
def launchCluster(self):
|
|
204
236
|
# add arguments to test that we can specify leader storage
|
|
205
|
-
self.createClusterUtil(
|
|
237
|
+
self.createClusterUtil(
|
|
238
|
+
args=["--leaderStorage", str(self.requestedLeaderStorage)]
|
|
239
|
+
)
|
|
206
240
|
|
|
207
241
|
# TODO: aren't these checks inherited?
|
|
208
242
|
@integrative
|
|
@@ -210,7 +244,7 @@ class GCEAutoscaleTest(AbstractGCEAutoscaleTest):
|
|
|
210
244
|
@needs_google_storage
|
|
211
245
|
def testAutoScale(self):
|
|
212
246
|
self.instanceTypes = ["n1-standard-2"]
|
|
213
|
-
self.numWorkers = [
|
|
247
|
+
self.numWorkers = ["2"]
|
|
214
248
|
self._test()
|
|
215
249
|
|
|
216
250
|
@integrative
|
|
@@ -220,7 +254,7 @@ class GCEAutoscaleTest(AbstractGCEAutoscaleTest):
|
|
|
220
254
|
self.instanceTypes = ["n1-standard-2:%f" % self.spotBid]
|
|
221
255
|
# Some spot workers have a stopped state after being started, strangely.
|
|
222
256
|
# This could be the natural preemption process, but it seems too rapid.
|
|
223
|
-
self.numWorkers = [
|
|
257
|
+
self.numWorkers = ["3"] # Try 3 to account for a stopped node.
|
|
224
258
|
self._test(preemptibleJobs=True)
|
|
225
259
|
|
|
226
260
|
|
|
@@ -229,35 +263,49 @@ class GCEStaticAutoscaleTest(GCEAutoscaleTest):
|
|
|
229
263
|
"""
|
|
230
264
|
Runs the tests on a statically provisioned cluster with autoscaling enabled.
|
|
231
265
|
"""
|
|
266
|
+
|
|
232
267
|
def __init__(self, name):
|
|
233
268
|
super().__init__(name)
|
|
234
269
|
self.requestedNodeStorage = 20
|
|
235
270
|
|
|
236
271
|
def launchCluster(self):
|
|
237
|
-
self.createClusterUtil(
|
|
238
|
-
|
|
239
|
-
|
|
272
|
+
self.createClusterUtil(
|
|
273
|
+
args=[
|
|
274
|
+
"--leaderStorage",
|
|
275
|
+
str(self.requestedLeaderStorage),
|
|
276
|
+
"--nodeTypes",
|
|
277
|
+
",".join(self.instanceTypes),
|
|
278
|
+
"-w",
|
|
279
|
+
",".join(self.numWorkers),
|
|
280
|
+
"--nodeStorage",
|
|
281
|
+
str(self.requestedLeaderStorage),
|
|
282
|
+
]
|
|
283
|
+
)
|
|
240
284
|
|
|
241
285
|
# TODO: check the number of workers and their storage
|
|
242
|
-
#nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True)
|
|
243
|
-
#nodes.sort(key=lambda x: x.launch_time)
|
|
286
|
+
# nodes = AWSProvisioner._getNodesInCluster(ctx, self.clusterName, both=True)
|
|
287
|
+
# nodes.sort(key=lambda x: x.launch_time)
|
|
244
288
|
# assuming that leader is first
|
|
245
|
-
#workers = nodes[1:]
|
|
289
|
+
# workers = nodes[1:]
|
|
246
290
|
# test that two worker nodes were created
|
|
247
|
-
#self.assertEqual(2, len(workers))
|
|
291
|
+
# self.assertEqual(2, len(workers))
|
|
248
292
|
# test that workers have expected storage size
|
|
249
293
|
# just use the first worker
|
|
250
|
-
#worker = workers[0]
|
|
251
|
-
#worker = next(wait_instances_running(ctx.ec2, [worker]))
|
|
252
|
-
#rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
|
|
253
|
-
#self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
|
|
254
|
-
#rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0]
|
|
255
|
-
#self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
|
|
294
|
+
# worker = workers[0]
|
|
295
|
+
# worker = next(wait_instances_running(ctx.ec2, [worker]))
|
|
296
|
+
# rootBlockDevice = worker.block_device_mapping["/dev/xvda"]
|
|
297
|
+
# self.assertTrue(isinstance(rootBlockDevice, BlockDeviceType))
|
|
298
|
+
# rootVolume = ctx.ec2.get_all_volumes(volume_ids=[rootBlockDevice.volume_id])[0]
|
|
299
|
+
# self.assertGreaterEqual(rootVolume.size, self.requestedNodeStorage)
|
|
256
300
|
|
|
257
301
|
def _runScript(self, toilOptions):
|
|
258
|
-
runCommand = [
|
|
302
|
+
runCommand = [
|
|
303
|
+
"/home/venv/bin/python",
|
|
304
|
+
"/home/sort.py",
|
|
305
|
+
"--fileToSort=/home/sortFile",
|
|
306
|
+
]
|
|
259
307
|
runCommand.extend(toilOptions)
|
|
260
|
-
log.info("_runScript: %s" %
|
|
308
|
+
log.info("_runScript: %s" % "".join(runCommand))
|
|
261
309
|
self.sshUtil(runCommand)
|
|
262
310
|
|
|
263
311
|
|
|
@@ -266,28 +314,37 @@ class GCEAutoscaleTestMultipleNodeTypes(AbstractGCEAutoscaleTest):
|
|
|
266
314
|
|
|
267
315
|
def __init__(self, name):
|
|
268
316
|
super().__init__(name)
|
|
269
|
-
self.clusterName =
|
|
317
|
+
self.clusterName = "provisioner-test-" + str(uuid4())
|
|
270
318
|
|
|
271
319
|
def setUp(self):
|
|
272
320
|
super().setUp()
|
|
273
|
-
self.jobStore = f
|
|
321
|
+
self.jobStore = f"google:{self.projectID}:multinode-{uuid4()}"
|
|
274
322
|
|
|
275
323
|
def _getScript(self):
|
|
276
|
-
sseKeyFile = os.path.join(os.getcwd(),
|
|
277
|
-
with open(sseKeyFile,
|
|
278
|
-
f.write(
|
|
279
|
-
self.rsyncUtil(
|
|
280
|
-
|
|
324
|
+
sseKeyFile = os.path.join(os.getcwd(), "keyFile")
|
|
325
|
+
with open(sseKeyFile, "w") as f:
|
|
326
|
+
f.write("01234567890123456789012345678901")
|
|
327
|
+
self.rsyncUtil(
|
|
328
|
+
os.path.join(self._projectRootPath(), "src/toil/test/sort/sort.py"),
|
|
329
|
+
":/home/sort.py",
|
|
330
|
+
)
|
|
331
|
+
self.rsyncUtil(sseKeyFile, ":/home/keyFile")
|
|
281
332
|
os.unlink(sseKeyFile)
|
|
282
333
|
|
|
283
334
|
def _runScript(self, toilOptions):
|
|
284
|
-
#Set memory requirements so that sort jobs can be run
|
|
335
|
+
# Set memory requirements so that sort jobs can be run
|
|
285
336
|
# on small instances, but merge jobs must be run on large
|
|
286
337
|
# instances
|
|
287
|
-
runCommand = [
|
|
338
|
+
runCommand = [
|
|
339
|
+
"/home/venv/bin/python",
|
|
340
|
+
"/home/sort.py",
|
|
341
|
+
"--fileToSort=/home/s3am/bin/asadmin",
|
|
342
|
+
"--sortMemory=0.6G",
|
|
343
|
+
"--mergeMemory=3.0G",
|
|
344
|
+
]
|
|
288
345
|
runCommand.extend(toilOptions)
|
|
289
|
-
#runCommand.append('--sseKey=/home/keyFile')
|
|
290
|
-
log.info("_runScript: %s" %
|
|
346
|
+
# runCommand.append('--sseKey=/home/keyFile')
|
|
347
|
+
log.info("_runScript: %s" % "".join(runCommand))
|
|
291
348
|
self.sshUtil(runCommand)
|
|
292
349
|
|
|
293
350
|
@integrative
|
|
@@ -295,9 +352,10 @@ class GCEAutoscaleTestMultipleNodeTypes(AbstractGCEAutoscaleTest):
|
|
|
295
352
|
@needs_google_storage
|
|
296
353
|
def testAutoScale(self):
|
|
297
354
|
self.instanceTypes = ["n1-standard-2", "n1-standard-4"]
|
|
298
|
-
self.numWorkers = [
|
|
355
|
+
self.numWorkers = ["2", "1"]
|
|
299
356
|
self._test()
|
|
300
357
|
|
|
358
|
+
|
|
301
359
|
@pytest.mark.timeout(1800)
|
|
302
360
|
class GCERestartTest(AbstractGCEAutoscaleTest):
|
|
303
361
|
"""
|
|
@@ -306,37 +364,53 @@ class GCERestartTest(AbstractGCEAutoscaleTest):
|
|
|
306
364
|
|
|
307
365
|
def __init__(self, name):
|
|
308
366
|
super().__init__(name)
|
|
309
|
-
self.clusterName =
|
|
367
|
+
self.clusterName = "restart-test-" + str(uuid4())
|
|
310
368
|
|
|
311
369
|
def setUp(self):
|
|
312
370
|
super().setUp()
|
|
313
|
-
self.instanceTypes = [
|
|
314
|
-
self.numWorkers = [
|
|
371
|
+
self.instanceTypes = ["n1-standard-1"]
|
|
372
|
+
self.numWorkers = ["1"]
|
|
315
373
|
self.scriptName = "/home/restartScript.py"
|
|
316
374
|
# TODO: replace this with a google job store
|
|
317
|
-
zone =
|
|
318
|
-
self.jobStore = f
|
|
375
|
+
zone = "us-west-2"
|
|
376
|
+
self.jobStore = f"google:{self.projectID}:restart-{uuid4()}"
|
|
319
377
|
|
|
320
378
|
def _getScript(self):
|
|
321
|
-
self.rsyncUtil(
|
|
322
|
-
|
|
323
|
-
|
|
379
|
+
self.rsyncUtil(
|
|
380
|
+
os.path.join(
|
|
381
|
+
self._projectRootPath(), "src/toil/test/provisioners/restartScript.py"
|
|
382
|
+
),
|
|
383
|
+
":" + self.scriptName,
|
|
384
|
+
)
|
|
324
385
|
|
|
325
386
|
def _runScript(self, toilOptions):
|
|
326
387
|
# clean = onSuccess
|
|
327
|
-
disallowedOptions = [
|
|
328
|
-
newOptions = [
|
|
388
|
+
disallowedOptions = ["--clean=always", "--retryCount=2"]
|
|
389
|
+
newOptions = [
|
|
390
|
+
option for option in toilOptions if option not in disallowedOptions
|
|
391
|
+
]
|
|
329
392
|
try:
|
|
330
393
|
# include a default memory - on restart the minimum memory requirement is the default, usually 2 GB
|
|
331
|
-
command = [
|
|
394
|
+
command = [
|
|
395
|
+
"/home/venv/bin/python",
|
|
396
|
+
self.scriptName,
|
|
397
|
+
"-e",
|
|
398
|
+
"FAIL=true",
|
|
399
|
+
"--defaultMemory=50000000",
|
|
400
|
+
]
|
|
332
401
|
command.extend(newOptions)
|
|
333
402
|
self.sshUtil(command)
|
|
334
403
|
except subprocess.CalledProcessError:
|
|
335
404
|
pass
|
|
336
405
|
else:
|
|
337
|
-
self.fail(
|
|
406
|
+
self.fail("Command succeeded when we expected failure")
|
|
338
407
|
with timeLimit(1200):
|
|
339
|
-
command = [
|
|
408
|
+
command = [
|
|
409
|
+
"/home/venv/bin/python",
|
|
410
|
+
self.scriptName,
|
|
411
|
+
"--restart",
|
|
412
|
+
"--defaultMemory=50000000",
|
|
413
|
+
]
|
|
340
414
|
command.extend(toilOptions)
|
|
341
415
|
self.sshUtil(command)
|
|
342
416
|
|
|
@@ -25,21 +25,33 @@ class ProvisionerTest(ToilTest):
|
|
|
25
25
|
|
|
26
26
|
def test_node_type_parsing(self) -> None:
|
|
27
27
|
assert parse_node_types(None) == []
|
|
28
|
-
assert parse_node_types(
|
|
29
|
-
assert parse_node_types(
|
|
30
|
-
assert parse_node_types(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
assert parse_node_types("") == []
|
|
29
|
+
assert parse_node_types("red beans") == [({"red beans"}, None)]
|
|
30
|
+
assert parse_node_types("red beans,rice") == [
|
|
31
|
+
({"red beans"}, None),
|
|
32
|
+
({"rice"}, None),
|
|
33
|
+
]
|
|
34
|
+
assert parse_node_types("red beans/black beans,rice") == [
|
|
35
|
+
({"red beans", "black beans"}, None),
|
|
36
|
+
({"rice"}, None),
|
|
37
|
+
]
|
|
38
|
+
assert parse_node_types("frankfurters:0.05") == [({"frankfurters"}, 0.05)]
|
|
39
|
+
assert parse_node_types(
|
|
40
|
+
"red beans/black beans:999,rice,red beans/black beans"
|
|
41
|
+
) == [
|
|
42
|
+
({"red beans", "black beans"}, 999),
|
|
43
|
+
({"rice"}, None),
|
|
44
|
+
({"red beans", "black beans"}, None),
|
|
45
|
+
]
|
|
34
46
|
with pytest.raises(ValueError):
|
|
35
|
-
parse_node_types(
|
|
47
|
+
parse_node_types("your thoughts:penny")
|
|
36
48
|
with pytest.raises(ValueError) as err:
|
|
37
|
-
parse_node_types(
|
|
38
|
-
assert
|
|
49
|
+
parse_node_types(",,,")
|
|
50
|
+
assert "empty" in str(err.value)
|
|
39
51
|
with pytest.raises(ValueError):
|
|
40
|
-
parse_node_types(
|
|
52
|
+
parse_node_types("now hear this:")
|
|
41
53
|
with pytest.raises(ValueError) as err:
|
|
42
|
-
parse_node_types(
|
|
43
|
-
assert
|
|
54
|
+
parse_node_types("miles I will walk:500:500")
|
|
55
|
+
assert "multiple" in str(err.value)
|
|
44
56
|
with pytest.raises(ValueError):
|
|
45
|
-
parse_node_types(
|
|
57
|
+
parse_node_types("red beans:500/black beans:500,rice")
|
|
@@ -6,12 +6,13 @@ from toil.job import Job
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def f0(job):
|
|
9
|
-
if
|
|
10
|
-
raise RuntimeError(
|
|
9
|
+
if "FAIL" in os.environ:
|
|
10
|
+
raise RuntimeError("failed on purpose")
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
|
|
13
|
+
if __name__ == "__main__":
|
|
13
14
|
parser = ArgumentParser()
|
|
14
15
|
Job.Runner.addToilOptions(parser)
|
|
15
16
|
options = parser.parse_args()
|
|
16
|
-
rootJob = Job.wrapJobFn(f0, cores=0.5, memory=
|
|
17
|
+
rootJob = Job.wrapJobFn(f0, cores=0.5, memory="50 M", disk="50 M")
|
|
17
18
|
Job.Runner.startToil(rootJob, options)
|