toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/wdl/wdl_synthesis.py
DELETED
|
@@ -1,1011 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2015-2021 Regents of the University of California
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
import logging
|
|
15
|
-
import os
|
|
16
|
-
import tempfile
|
|
17
|
-
from typing import Optional
|
|
18
|
-
|
|
19
|
-
from toil.wdl.wdl_functions import heredoc_wdl
|
|
20
|
-
from toil.wdl.wdl_types import (WDLArrayType,
|
|
21
|
-
WDLCompoundType,
|
|
22
|
-
WDLFileType,
|
|
23
|
-
WDLMapType,
|
|
24
|
-
WDLPairType,
|
|
25
|
-
WDLType)
|
|
26
|
-
|
|
27
|
-
logger = logging.getLogger(__name__)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class SynthesizeWDL:
|
|
31
|
-
"""
|
|
32
|
-
SynthesizeWDL takes the "workflows_dictionary" and "tasks_dictionary" produced by
|
|
33
|
-
wdl_analysis.py and uses them to write a native python script for use with Toil.
|
|
34
|
-
|
|
35
|
-
A WDL "workflow" section roughly corresponds to the python "main()" function, where
|
|
36
|
-
functions are wrapped as Toil "jobs", output dependencies specified, and called.
|
|
37
|
-
|
|
38
|
-
A WDL "task" section corresponds to a unique python function, which will be wrapped
|
|
39
|
-
as a Toil "job" and defined outside of the "main()" function that calls it.
|
|
40
|
-
|
|
41
|
-
Generally this handles breaking sections into their corresponding Toil counterparts.
|
|
42
|
-
|
|
43
|
-
For example: write the imports, then write all functions defining jobs (which have subsections
|
|
44
|
-
like: write header, define variables, read "File" types into the jobstore, docker call, etc.),
|
|
45
|
-
then write the main and all of its subsections.
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
def __init__(self,
|
|
49
|
-
version: str,
|
|
50
|
-
tasks_dictionary: dict,
|
|
51
|
-
workflows_dictionary: dict,
|
|
52
|
-
output_directory: str,
|
|
53
|
-
json_dict: dict,
|
|
54
|
-
docker_user: str,
|
|
55
|
-
jobstore: Optional[str] = None,
|
|
56
|
-
destBucket: Optional[str] = None):
|
|
57
|
-
|
|
58
|
-
self.version = version
|
|
59
|
-
self.output_directory = output_directory
|
|
60
|
-
if not os.path.exists(self.output_directory):
|
|
61
|
-
try:
|
|
62
|
-
os.makedirs(self.output_directory)
|
|
63
|
-
except:
|
|
64
|
-
raise OSError(
|
|
65
|
-
'Could not create directory. Insufficient permissions or disk space most likely.')
|
|
66
|
-
|
|
67
|
-
self.output_file = os.path.join(self.output_directory, 'toilwdl_compiled.py')
|
|
68
|
-
|
|
69
|
-
if jobstore:
|
|
70
|
-
self.jobstore = jobstore
|
|
71
|
-
else:
|
|
72
|
-
self.jobstore = tempfile.mkdtemp(prefix=f"{os.getcwd()}{os.sep}toilWorkflowRun")
|
|
73
|
-
os.rmdir(self.jobstore)
|
|
74
|
-
|
|
75
|
-
if docker_user != 'None':
|
|
76
|
-
self.docker_user = "'" + docker_user + "'"
|
|
77
|
-
else:
|
|
78
|
-
self.docker_user = docker_user
|
|
79
|
-
|
|
80
|
-
# only json is required; tsv/csv are optional
|
|
81
|
-
self.json_dict = json_dict
|
|
82
|
-
|
|
83
|
-
# holds task skeletons from WDL task objects
|
|
84
|
-
self.tasks_dictionary = tasks_dictionary
|
|
85
|
-
# holds workflow structure from WDL workflow objects
|
|
86
|
-
self.workflows_dictionary = workflows_dictionary
|
|
87
|
-
|
|
88
|
-
# keep track of which workflow is being written
|
|
89
|
-
self.current_workflow = None
|
|
90
|
-
|
|
91
|
-
# unique iterator to add to cmd names
|
|
92
|
-
self.cmd_num = 0
|
|
93
|
-
|
|
94
|
-
# deposit WDL outputs into a cloud bucket; optional
|
|
95
|
-
self.destBucket = destBucket
|
|
96
|
-
|
|
97
|
-
def write_modules(self):
|
|
98
|
-
# string used to write imports to the file
|
|
99
|
-
module_string = heredoc_wdl('''
|
|
100
|
-
from toil.job import Job
|
|
101
|
-
from toil.common import Toil
|
|
102
|
-
from toil.lib.docker import apiDockerCall
|
|
103
|
-
from toil.wdl.wdl_types import WDLType
|
|
104
|
-
from toil.wdl.wdl_types import WDLStringType
|
|
105
|
-
from toil.wdl.wdl_types import WDLIntType
|
|
106
|
-
from toil.wdl.wdl_types import WDLFloatType
|
|
107
|
-
from toil.wdl.wdl_types import WDLBooleanType
|
|
108
|
-
from toil.wdl.wdl_types import WDLFileType
|
|
109
|
-
from toil.wdl.wdl_types import WDLArrayType
|
|
110
|
-
from toil.wdl.wdl_types import WDLPairType
|
|
111
|
-
from toil.wdl.wdl_types import WDLMapType
|
|
112
|
-
from toil.wdl.wdl_types import WDLFile
|
|
113
|
-
from toil.wdl.wdl_types import WDLPair
|
|
114
|
-
from toil.wdl.wdl_functions import generate_docker_bashscript_file
|
|
115
|
-
from toil.wdl.wdl_functions import generate_stdout_file
|
|
116
|
-
from toil.wdl.wdl_functions import select_first
|
|
117
|
-
from toil.wdl.wdl_functions import sub
|
|
118
|
-
from toil.wdl.wdl_functions import size
|
|
119
|
-
from toil.wdl.wdl_functions import glob
|
|
120
|
-
from toil.wdl.wdl_functions import process_and_read_file
|
|
121
|
-
from toil.wdl.wdl_functions import process_infile
|
|
122
|
-
from toil.wdl.wdl_functions import process_outfile
|
|
123
|
-
from toil.wdl.wdl_functions import abspath_file
|
|
124
|
-
from toil.wdl.wdl_functions import combine_dicts
|
|
125
|
-
from toil.wdl.wdl_functions import parse_memory
|
|
126
|
-
from toil.wdl.wdl_functions import parse_cores
|
|
127
|
-
from toil.wdl.wdl_functions import parse_disk
|
|
128
|
-
from toil.wdl.wdl_functions import read_lines
|
|
129
|
-
from toil.wdl.wdl_functions import read_tsv
|
|
130
|
-
from toil.wdl.wdl_functions import read_csv
|
|
131
|
-
from toil.wdl.wdl_functions import read_json
|
|
132
|
-
from toil.wdl.wdl_functions import read_map
|
|
133
|
-
from toil.wdl.wdl_functions import read_int
|
|
134
|
-
from toil.wdl.wdl_functions import read_string
|
|
135
|
-
from toil.wdl.wdl_functions import read_float
|
|
136
|
-
from toil.wdl.wdl_functions import read_boolean
|
|
137
|
-
from toil.wdl.wdl_functions import write_lines
|
|
138
|
-
from toil.wdl.wdl_functions import write_tsv
|
|
139
|
-
from toil.wdl.wdl_functions import write_json
|
|
140
|
-
from toil.wdl.wdl_functions import write_map
|
|
141
|
-
from toil.wdl.wdl_functions import defined
|
|
142
|
-
from toil.wdl.wdl_functions import basename
|
|
143
|
-
from toil.wdl.wdl_functions import floor
|
|
144
|
-
from toil.wdl.wdl_functions import ceil
|
|
145
|
-
from toil.wdl.wdl_functions import wdl_range
|
|
146
|
-
from toil.wdl.wdl_functions import transpose
|
|
147
|
-
from toil.wdl.wdl_functions import length
|
|
148
|
-
from toil.wdl.wdl_functions import wdl_zip
|
|
149
|
-
from toil.wdl.wdl_functions import cross
|
|
150
|
-
from toil.wdl.wdl_functions import as_pairs
|
|
151
|
-
from toil.wdl.wdl_functions import as_map
|
|
152
|
-
from toil.wdl.wdl_functions import keys
|
|
153
|
-
from toil.wdl.wdl_functions import collect_by_key
|
|
154
|
-
from toil.wdl.wdl_functions import flatten
|
|
155
|
-
import fnmatch
|
|
156
|
-
import textwrap
|
|
157
|
-
import subprocess
|
|
158
|
-
import os
|
|
159
|
-
import errno
|
|
160
|
-
import time
|
|
161
|
-
import shutil
|
|
162
|
-
import shlex
|
|
163
|
-
import uuid
|
|
164
|
-
import logging
|
|
165
|
-
|
|
166
|
-
_toil_wdl_internal__current_working_dir = os.getcwd()
|
|
167
|
-
|
|
168
|
-
logger = logging.getLogger(__name__)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
''', {'jobstore': self.jobstore})[1:]
|
|
172
|
-
return module_string
|
|
173
|
-
|
|
174
|
-
def write_main(self):
|
|
175
|
-
"""
|
|
176
|
-
Writes out a huge string representing the main section of the python
|
|
177
|
-
compiled toil script.
|
|
178
|
-
|
|
179
|
-
Currently looks at and writes 5 sections:
|
|
180
|
-
1. JSON Variables (includes importing and preparing files as tuples)
|
|
181
|
-
2. TSV Variables (includes importing and preparing files as tuples)
|
|
182
|
-
3. CSV Variables (includes importing and preparing files as tuples)
|
|
183
|
-
4. Wrapping each WDL "task" function as a toil job
|
|
184
|
-
5. List out children and encapsulated jobs by priority, then start job0.
|
|
185
|
-
|
|
186
|
-
This should create variable declarations necessary for function calls.
|
|
187
|
-
Map file paths appropriately and store them in the toil fileStore so
|
|
188
|
-
that they are persistent from job to job. Create job wrappers for toil.
|
|
189
|
-
And finally write out, and run the jobs in order of priority using the
|
|
190
|
-
addChild and encapsulate commands provided by toil.
|
|
191
|
-
|
|
192
|
-
:return: giant string containing the main def for the toil script.
|
|
193
|
-
"""
|
|
194
|
-
|
|
195
|
-
main_section = ''
|
|
196
|
-
|
|
197
|
-
# write out the main header
|
|
198
|
-
main_header = self.write_main_header()
|
|
199
|
-
main_section = main_section + main_header
|
|
200
|
-
|
|
201
|
-
# write toil job wrappers with input vars
|
|
202
|
-
jobs_to_write = self.write_main_jobwrappers()
|
|
203
|
-
main_section = main_section + jobs_to_write
|
|
204
|
-
|
|
205
|
-
# loop to export all outputs to a cloud bucket
|
|
206
|
-
if self.destBucket:
|
|
207
|
-
main_destbucket = self.write_main_destbucket()
|
|
208
|
-
main_section = main_section + main_destbucket
|
|
209
|
-
|
|
210
|
-
return main_section
|
|
211
|
-
|
|
212
|
-
def write_main_header(self):
|
|
213
|
-
main_header = heredoc_wdl('''
|
|
214
|
-
if __name__=="__main__":
|
|
215
|
-
options = Job.Runner.getDefaultOptions("{jobstore}")
|
|
216
|
-
options.clean = 'always'
|
|
217
|
-
with Toil(options) as fileStore:
|
|
218
|
-
''', {'jobstore': self.jobstore})
|
|
219
|
-
return main_header
|
|
220
|
-
|
|
221
|
-
def write_main_jobwrappers(self):
|
|
222
|
-
"""
|
|
223
|
-
Writes out 'jobs' as wrapped toil objects in preparation for calling.
|
|
224
|
-
|
|
225
|
-
:return: A string representing this.
|
|
226
|
-
"""
|
|
227
|
-
main_section = ''
|
|
228
|
-
|
|
229
|
-
# toil cannot technically start with multiple jobs, so an empty
|
|
230
|
-
# 'initialize_jobs' function is always called first to get around this
|
|
231
|
-
main_section = main_section + ' job0 = Job.wrapJobFn(initialize_jobs)\n'
|
|
232
|
-
|
|
233
|
-
# declare each job in main as a wrapped toil function in order of priority
|
|
234
|
-
for wf in self.workflows_dictionary:
|
|
235
|
-
self.current_workflow = wf
|
|
236
|
-
for assignment in self.workflows_dictionary[wf]:
|
|
237
|
-
if assignment.startswith('declaration'):
|
|
238
|
-
main_section += self.write_main_jobwrappers_declaration(self.workflows_dictionary[wf][assignment])
|
|
239
|
-
if assignment.startswith('call'):
|
|
240
|
-
main_section += ' job0 = job0.encapsulate()\n'
|
|
241
|
-
main_section += self.write_main_jobwrappers_call(self.workflows_dictionary[wf][assignment])
|
|
242
|
-
if assignment.startswith('scatter'):
|
|
243
|
-
main_section += ' job0 = job0.encapsulate()\n'
|
|
244
|
-
main_section += self.write_main_jobwrappers_scatter(self.workflows_dictionary[wf][assignment],
|
|
245
|
-
assignment)
|
|
246
|
-
if assignment.startswith('if'):
|
|
247
|
-
main_section += ' if {}:\n'.format(self.workflows_dictionary[wf][assignment]['expression'])
|
|
248
|
-
main_section += self.write_main_jobwrappers_if(self.workflows_dictionary[wf][assignment]['body'])
|
|
249
|
-
|
|
250
|
-
main_section += '\n fileStore.start(job0)\n'
|
|
251
|
-
|
|
252
|
-
return main_section
|
|
253
|
-
|
|
254
|
-
def write_main_jobwrappers_declaration(self, declaration):
|
|
255
|
-
|
|
256
|
-
main_section = ''
|
|
257
|
-
var_name, var_type, var_expr = declaration
|
|
258
|
-
|
|
259
|
-
# check the json file for the expression's value
|
|
260
|
-
# this is a higher priority and overrides anything written in the .wdl
|
|
261
|
-
json_expressn = self.json_var(wf=self.current_workflow, var=var_name)
|
|
262
|
-
if json_expressn is not None:
|
|
263
|
-
var_expr = json_expressn
|
|
264
|
-
|
|
265
|
-
main_section += ' {} = {}.create(\n {})\n' \
|
|
266
|
-
.format(var_name, self.write_declaration_type(var_type), var_expr)
|
|
267
|
-
|
|
268
|
-
# import filepath into jobstore
|
|
269
|
-
if self.needs_file_import(var_type) and var_expr:
|
|
270
|
-
main_section += f' {var_name} = process_infile({var_name}, fileStore)\n'
|
|
271
|
-
|
|
272
|
-
return main_section
|
|
273
|
-
|
|
274
|
-
def write_main_destbucket(self):
|
|
275
|
-
"""
|
|
276
|
-
Writes out a loop for exporting outputs to a cloud bucket.
|
|
277
|
-
|
|
278
|
-
:return: A string representing this.
|
|
279
|
-
"""
|
|
280
|
-
main_section = heredoc_wdl('''
|
|
281
|
-
outdir = '{outdir}'
|
|
282
|
-
onlyfiles = [os.path.join(outdir, f) for f in os.listdir(outdir) if os.path.isfile(os.path.join(outdir, f))]
|
|
283
|
-
for output_f_path in onlyfiles:
|
|
284
|
-
output_file = fileStore.writeGlobalFile(output_f_path)
|
|
285
|
-
preserveThisFilename = os.path.basename(output_f_path)
|
|
286
|
-
destUrl = '/'.join(s.strip('/') for s in [destBucket, preserveThisFilename])
|
|
287
|
-
fileStore.exportFile(output_file, destUrl)
|
|
288
|
-
''', {'outdir': self.output_directory}, indent=' ')
|
|
289
|
-
return main_section
|
|
290
|
-
|
|
291
|
-
def fetch_ignoredifs(self, assignments, breaking_assignment):
|
|
292
|
-
ignore_ifs = []
|
|
293
|
-
for assignment in assignments:
|
|
294
|
-
if assignment.startswith('call'):
|
|
295
|
-
pass
|
|
296
|
-
elif assignment.startswith('scatter'):
|
|
297
|
-
pass
|
|
298
|
-
elif assignment.startswith('if'):
|
|
299
|
-
if not self.fetch_ignoredifs_chain(assignments[assignment]['body'], breaking_assignment):
|
|
300
|
-
ignore_ifs.append(assignment)
|
|
301
|
-
return ignore_ifs
|
|
302
|
-
|
|
303
|
-
def fetch_ignoredifs_chain(self, assignments, breaking_assignment):
|
|
304
|
-
for assignment in assignments:
|
|
305
|
-
if assignment.startswith('call'):
|
|
306
|
-
if assignment == breaking_assignment:
|
|
307
|
-
return True
|
|
308
|
-
if assignment.startswith('scatter'):
|
|
309
|
-
if assignment == breaking_assignment:
|
|
310
|
-
return True
|
|
311
|
-
if assignment.startswith('if'):
|
|
312
|
-
return self.fetch_ignoredifs_chain(assignments[assignment]['body'], breaking_assignment)
|
|
313
|
-
return False
|
|
314
|
-
|
|
315
|
-
def write_main_jobwrappers_if(self, if_statement):
|
|
316
|
-
# check for empty if statement
|
|
317
|
-
if not if_statement:
|
|
318
|
-
return self.indent(' pass')
|
|
319
|
-
|
|
320
|
-
main_section = ''
|
|
321
|
-
for assignment in if_statement:
|
|
322
|
-
if assignment.startswith('declaration'):
|
|
323
|
-
main_section += self.write_main_jobwrappers_declaration(if_statement[assignment])
|
|
324
|
-
if assignment.startswith('call'):
|
|
325
|
-
main_section += ' job0 = job0.encapsulate()\n'
|
|
326
|
-
main_section += self.write_main_jobwrappers_call(if_statement[assignment])
|
|
327
|
-
if assignment.startswith('scatter'):
|
|
328
|
-
main_section += ' job0 = job0.encapsulate()\n'
|
|
329
|
-
main_section += self.write_main_jobwrappers_scatter(if_statement[assignment], assignment)
|
|
330
|
-
if assignment.startswith('if'):
|
|
331
|
-
main_section += ' if {}:\n'.format(if_statement[assignment]['expression'])
|
|
332
|
-
main_section += self.write_main_jobwrappers_if(if_statement[assignment]['body'])
|
|
333
|
-
main_section = self.indent(main_section)
|
|
334
|
-
return main_section
|
|
335
|
-
|
|
336
|
-
def write_main_jobwrappers_scatter(self, task, assignment):
|
|
337
|
-
scatter_inputs = self.fetch_scatter_inputs(assignment)
|
|
338
|
-
|
|
339
|
-
main_section = ' {scatter} = job0.addChild({scatter}Cls('.format(scatter=assignment)
|
|
340
|
-
for var in scatter_inputs:
|
|
341
|
-
main_section += var + '=' + var + ', '
|
|
342
|
-
if main_section.endswith(', '):
|
|
343
|
-
main_section = main_section[:-2]
|
|
344
|
-
main_section += '))\n'
|
|
345
|
-
|
|
346
|
-
scatter_outputs = self.fetch_scatter_outputs(task)
|
|
347
|
-
for var in scatter_outputs:
|
|
348
|
-
main_section += ' {var} = {scatter}.rv("{var}")\n'.format(var=var['task'] + '_' + var['output'], scatter=assignment)
|
|
349
|
-
|
|
350
|
-
return main_section
|
|
351
|
-
|
|
352
|
-
def fetch_scatter_outputs(self, task):
|
|
353
|
-
scatteroutputs = []
|
|
354
|
-
|
|
355
|
-
for var in task['body']:
|
|
356
|
-
# TODO variable support
|
|
357
|
-
if var.startswith('call'):
|
|
358
|
-
if 'outputs' in self.tasks_dictionary[task['body'][var]['task']]:
|
|
359
|
-
for output in self.tasks_dictionary[task['body'][var]['task']]['outputs']:
|
|
360
|
-
scatteroutputs.append({'task': task['body'][var]['alias'], 'output': output[0]})
|
|
361
|
-
return scatteroutputs
|
|
362
|
-
|
|
363
|
-
def fetch_scatter_inputs(self, assigned):
|
|
364
|
-
|
|
365
|
-
for wf in self.workflows_dictionary:
|
|
366
|
-
ignored_ifs = self.fetch_ignoredifs(self.workflows_dictionary[wf], assigned)
|
|
367
|
-
# TODO support additional wfs
|
|
368
|
-
break
|
|
369
|
-
|
|
370
|
-
scatternamespace = []
|
|
371
|
-
|
|
372
|
-
for wf in self.workflows_dictionary:
|
|
373
|
-
for assignment in self.workflows_dictionary[wf]:
|
|
374
|
-
if assignment == assigned:
|
|
375
|
-
return scatternamespace
|
|
376
|
-
elif assignment.startswith('declaration'):
|
|
377
|
-
name, _, _ = self.workflows_dictionary[wf][assignment]
|
|
378
|
-
scatternamespace.append(name)
|
|
379
|
-
elif assignment.startswith('call'):
|
|
380
|
-
if 'outputs' in self.tasks_dictionary[self.workflows_dictionary[wf][assignment]['task']]:
|
|
381
|
-
for output in self.tasks_dictionary[self.workflows_dictionary[wf][assignment]['task']]['outputs']:
|
|
382
|
-
scatternamespace.append(self.workflows_dictionary[wf][assignment]['alias'] + '_' + output[0])
|
|
383
|
-
elif assignment.startswith('scatter'):
|
|
384
|
-
for var in self.fetch_scatter_outputs(self.workflows_dictionary[wf][assignment]):
|
|
385
|
-
scatternamespace.append(var['task'] + '_' + var['output'])
|
|
386
|
-
elif assignment.startswith('if') and assignment not in ignored_ifs:
|
|
387
|
-
new_list, cont_or_break = self.fetch_scatter_inputs_chain(self.workflows_dictionary[wf][assignment]['body'],
|
|
388
|
-
assigned,
|
|
389
|
-
ignored_ifs,
|
|
390
|
-
inputs_list=[])
|
|
391
|
-
scatternamespace += new_list
|
|
392
|
-
if not cont_or_break:
|
|
393
|
-
return scatternamespace
|
|
394
|
-
return scatternamespace
|
|
395
|
-
|
|
396
|
-
def fetch_scatter_inputs_chain(self, inputs, assigned, ignored_ifs, inputs_list):
|
|
397
|
-
for i in inputs:
|
|
398
|
-
if i == assigned:
|
|
399
|
-
return inputs_list, False
|
|
400
|
-
elif i.startswith('call'):
|
|
401
|
-
if 'outputs' in self.tasks_dictionary[inputs[i]['task']]:
|
|
402
|
-
for output in self.tasks_dictionary[inputs[i]['task']]['outputs']:
|
|
403
|
-
inputs_list.append(inputs[i]['alias'] + '_' + output[0])
|
|
404
|
-
elif i.startswith('scatter'):
|
|
405
|
-
for var in self.fetch_scatter_outputs(inputs[i]):
|
|
406
|
-
inputs_list.append(var['task'] + '_' + var['output'])
|
|
407
|
-
elif i.startswith('if') and i not in ignored_ifs:
|
|
408
|
-
inputs_list, cont_or_break = self.fetch_scatter_inputs_chain(inputs[i]['body'], assigned, ignored_ifs, inputs_list)
|
|
409
|
-
if not cont_or_break:
|
|
410
|
-
return inputs_list, False
|
|
411
|
-
return inputs_list, True
|
|
412
|
-
|
|
413
|
-
def write_main_jobwrappers_call(self, task):
|
|
414
|
-
main_section = ' {} = job0.addChild({}Cls('.format(task['alias'], task['task'])
|
|
415
|
-
for var in task['io']:
|
|
416
|
-
main_section += var + '=' + task['io'][var] + ', '
|
|
417
|
-
if main_section.endswith(', '):
|
|
418
|
-
main_section = main_section[:-2]
|
|
419
|
-
main_section += '))\n'
|
|
420
|
-
|
|
421
|
-
call_outputs = self.fetch_call_outputs(task)
|
|
422
|
-
for var in call_outputs:
|
|
423
|
-
main_section += ' {var} = {task}.rv("{output}")\n'.format(var=var['task'] + '_' + var['output'],
|
|
424
|
-
task=var['task'],
|
|
425
|
-
output=var['output'])
|
|
426
|
-
return main_section
|
|
427
|
-
|
|
428
|
-
def fetch_call_outputs(self, task):
|
|
429
|
-
calloutputs = []
|
|
430
|
-
if 'outputs' in self.tasks_dictionary[task['task']]:
|
|
431
|
-
for output in self.tasks_dictionary[task['task']]['outputs']:
|
|
432
|
-
calloutputs.append({'task': task['alias'], 'output': output[0]})
|
|
433
|
-
return calloutputs
|
|
434
|
-
|
|
435
|
-
def write_functions(self):
|
|
436
|
-
"""
|
|
437
|
-
Writes out a python function for each WDL "task" object.
|
|
438
|
-
|
|
439
|
-
:return: a giant string containing the meat of the job defs.
|
|
440
|
-
"""
|
|
441
|
-
|
|
442
|
-
# toil cannot technically start with multiple jobs, so an empty
|
|
443
|
-
# 'initialize_jobs' function is always called first to get around this
|
|
444
|
-
fn_section = 'def initialize_jobs(job):\n' + \
|
|
445
|
-
' job.fileStore.logToMaster("initialize_jobs")\n'
|
|
446
|
-
|
|
447
|
-
for job in self.tasks_dictionary:
|
|
448
|
-
fn_section += self.write_function(job)
|
|
449
|
-
|
|
450
|
-
for wf in self.workflows_dictionary:
|
|
451
|
-
for assignment in self.workflows_dictionary[wf]:
|
|
452
|
-
if assignment.startswith('scatter'):
|
|
453
|
-
fn_section += self.write_scatterfunction(self.workflows_dictionary[wf][assignment], assignment)
|
|
454
|
-
if assignment.startswith('if'):
|
|
455
|
-
fn_section += self.write_scatterfunctions_within_if(self.workflows_dictionary[wf][assignment]['body'])
|
|
456
|
-
|
|
457
|
-
return fn_section
|
|
458
|
-
|
|
459
|
-
def write_scatterfunctions_within_if(self, ifstatement):
|
|
460
|
-
fn_section = ''
|
|
461
|
-
for assignment in ifstatement:
|
|
462
|
-
if assignment.startswith('scatter'):
|
|
463
|
-
fn_section += self.write_scatterfunction(ifstatement[assignment], assignment)
|
|
464
|
-
if assignment.startswith('if'):
|
|
465
|
-
fn_section += self.write_scatterfunctions_within_if(ifstatement[assignment]['body'])
|
|
466
|
-
return fn_section
|
|
467
|
-
|
|
468
|
-
def write_scatterfunction(self, job, scattername):
|
|
469
|
-
"""
|
|
470
|
-
Writes out a python function for each WDL "scatter" object.
|
|
471
|
-
"""
|
|
472
|
-
|
|
473
|
-
scatter_outputs = self.fetch_scatter_outputs(job)
|
|
474
|
-
|
|
475
|
-
# write the function header
|
|
476
|
-
fn_section = self.write_scatterfunction_header(scattername)
|
|
477
|
-
|
|
478
|
-
# write the scatter definitions
|
|
479
|
-
fn_section += self.write_scatterfunction_lists(scatter_outputs)
|
|
480
|
-
|
|
481
|
-
# write
|
|
482
|
-
fn_section += self.write_scatterfunction_loop(job, scatter_outputs)
|
|
483
|
-
|
|
484
|
-
# write the outputs for the task to return
|
|
485
|
-
fn_section += self.write_scatterfunction_outputreturn(scatter_outputs)
|
|
486
|
-
|
|
487
|
-
return fn_section
|
|
488
|
-
|
|
489
|
-
def write_scatterfunction_header(self, scattername):
|
|
490
|
-
"""
|
|
491
|
-
|
|
492
|
-
:return:
|
|
493
|
-
"""
|
|
494
|
-
scatter_inputs = self.fetch_scatter_inputs(scattername)
|
|
495
|
-
|
|
496
|
-
fn_section = f'\n\nclass {scattername}Cls(Job):\n'
|
|
497
|
-
fn_section += ' def __init__(self, '
|
|
498
|
-
for input in scatter_inputs:
|
|
499
|
-
fn_section += f'{input}=None, '
|
|
500
|
-
fn_section += '*args, **kwargs):\n'
|
|
501
|
-
fn_section += ' Job.__init__(self)\n\n'
|
|
502
|
-
|
|
503
|
-
for input in scatter_inputs:
|
|
504
|
-
fn_section += ' self.id_{input} = {input}\n'.format(input=input)
|
|
505
|
-
|
|
506
|
-
fn_section += heredoc_wdl('''
|
|
507
|
-
|
|
508
|
-
def run(self, fileStore):
|
|
509
|
-
fileStore.logToMaster("{jobname}")
|
|
510
|
-
tempDir = fileStore.getLocalTempDir()
|
|
511
|
-
|
|
512
|
-
try:
|
|
513
|
-
os.makedirs(os.path.join(tempDir, 'execution'))
|
|
514
|
-
except OSError as e:
|
|
515
|
-
if e.errno != errno.EEXIST:
|
|
516
|
-
raise
|
|
517
|
-
''', {'jobname': scattername}, indent=' ')[1:]
|
|
518
|
-
for input in scatter_inputs:
|
|
519
|
-
fn_section += ' {input} = self.id_{input}\n'.format(input=input)
|
|
520
|
-
return fn_section
|
|
521
|
-
|
|
522
|
-
def write_scatterfunction_outputreturn(self, scatter_outputs):
|
|
523
|
-
"""
|
|
524
|
-
|
|
525
|
-
:return:
|
|
526
|
-
"""
|
|
527
|
-
fn_section = '\n rvDict = {'
|
|
528
|
-
for var in scatter_outputs:
|
|
529
|
-
fn_section += '"{var}": {var}, '.format(var=var['task'] + '_' + var['output'])
|
|
530
|
-
if fn_section.endswith(', '):
|
|
531
|
-
fn_section = fn_section[:-2]
|
|
532
|
-
fn_section += '}\n'
|
|
533
|
-
fn_section += ' return rvDict\n\n'
|
|
534
|
-
|
|
535
|
-
return fn_section[:-1]
|
|
536
|
-
|
|
537
|
-
def write_scatterfunction_lists(self, scatter_outputs):
|
|
538
|
-
"""
|
|
539
|
-
|
|
540
|
-
:return:
|
|
541
|
-
"""
|
|
542
|
-
fn_section = '\n'
|
|
543
|
-
for var in scatter_outputs:
|
|
544
|
-
fn_section += ' {var} = []\n'.format(var=var['task'] + '_' + var['output'])
|
|
545
|
-
|
|
546
|
-
return fn_section
|
|
547
|
-
|
|
548
|
-
def write_scatterfunction_loop(self, job, scatter_outputs):
|
|
549
|
-
"""
|
|
550
|
-
|
|
551
|
-
:return:
|
|
552
|
-
"""
|
|
553
|
-
collection = job['collection']
|
|
554
|
-
item = job['item']
|
|
555
|
-
|
|
556
|
-
fn_section = f' for {item} in {collection}:\n'
|
|
557
|
-
|
|
558
|
-
previous_dependency = 'self'
|
|
559
|
-
for statement in job['body']:
|
|
560
|
-
if statement.startswith('declaration'):
|
|
561
|
-
# reusing write_main_jobwrappers_declaration() here, but it needs to be indented one more level.
|
|
562
|
-
fn_section += self.indent(
|
|
563
|
-
self.write_main_jobwrappers_declaration(job['body'][statement]))
|
|
564
|
-
elif statement.startswith('call'):
|
|
565
|
-
fn_section += self.write_scatter_callwrapper(job['body'][statement], previous_dependency)
|
|
566
|
-
previous_dependency = 'job_' + job['body'][statement]['alias']
|
|
567
|
-
elif statement.startswith('scatter'):
|
|
568
|
-
raise NotImplementedError('nested scatter not implemented.')
|
|
569
|
-
elif statement.startswith('if'):
|
|
570
|
-
fn_section += ' if {}:\n'.format(job['body'][statement]['expression'])
|
|
571
|
-
# reusing write_main_jobwrappers_if() here, but it needs to be indented one more level.
|
|
572
|
-
fn_section += self.indent(self.write_main_jobwrappers_if(job['body'][statement]['body']))
|
|
573
|
-
|
|
574
|
-
# check for empty scatter section
|
|
575
|
-
if len(job['body']) == 0:
|
|
576
|
-
fn_section += ' pass'
|
|
577
|
-
|
|
578
|
-
for var in scatter_outputs:
|
|
579
|
-
fn_section += ' {var}.append({task}.rv("{output}"))\n'.format(var=var['task'] + '_' + var['output'],
|
|
580
|
-
task='job_' + var['task'],
|
|
581
|
-
output=var['output'])
|
|
582
|
-
return fn_section
|
|
583
|
-
|
|
584
|
-
def write_scatter_callwrapper(self, job, previous_dependency):
|
|
585
|
-
fn_section = ' job_{alias} = {pd}.addFollowOn({task}Cls('.format(alias=job['alias'],
|
|
586
|
-
pd=previous_dependency,
|
|
587
|
-
task=job['task'])
|
|
588
|
-
for var in job['io']:
|
|
589
|
-
fn_section += var + '=' + job['io'][var] + ', '
|
|
590
|
-
if fn_section.endswith(', '):
|
|
591
|
-
fn_section = fn_section[:-2]
|
|
592
|
-
fn_section += '))\n'
|
|
593
|
-
return fn_section
|
|
594
|
-
|
|
595
|
-
def write_function(self, job):
|
|
596
|
-
"""
|
|
597
|
-
Writes out a python function for each WDL "task" object.
|
|
598
|
-
|
|
599
|
-
Each python function is a unit of work written out as a string in
|
|
600
|
-
preparation to being written out to a file. In WDL, each "job" is
|
|
601
|
-
called a "task". Each WDL task is written out in multiple steps:
|
|
602
|
-
|
|
603
|
-
1: Header and inputs (e.g. 'def mapping(self, input1, input2)')
|
|
604
|
-
2: Log job name (e.g. 'job.fileStore.logToMaster('initialize_jobs')')
|
|
605
|
-
3: Create temp dir (e.g. 'tempDir = fileStore.getLocalTempDir()')
|
|
606
|
-
4: import filenames and use readGlobalFile() to get files from the
|
|
607
|
-
jobStore
|
|
608
|
-
5: Reformat commandline variables (like converting to ' '.join(files)).
|
|
609
|
-
6: Commandline call using subprocess.Popen().
|
|
610
|
-
7: Write the section returning the outputs. Also logs stats.
|
|
611
|
-
|
|
612
|
-
:return: a giant string containing the meat of the job defs for the toil script.
|
|
613
|
-
"""
|
|
614
|
-
|
|
615
|
-
# write the function header
|
|
616
|
-
fn_section = self.write_function_header(job)
|
|
617
|
-
|
|
618
|
-
# write out commandline keywords
|
|
619
|
-
fn_section += self.write_function_cmdline(job)
|
|
620
|
-
|
|
621
|
-
if self.needsdocker(job):
|
|
622
|
-
# write a bash script to inject into the docker
|
|
623
|
-
fn_section += self.write_function_bashscriptline(job)
|
|
624
|
-
# write a call to the docker API
|
|
625
|
-
fn_section += self.write_function_dockercall(job)
|
|
626
|
-
else:
|
|
627
|
-
# write a subprocess call
|
|
628
|
-
fn_section += self.write_function_subprocesspopen()
|
|
629
|
-
|
|
630
|
-
# write the outputs for the definition to return
|
|
631
|
-
fn_section += self.write_function_outputreturn(job, docker=self.needsdocker(job))
|
|
632
|
-
|
|
633
|
-
return fn_section
|
|
634
|
-
|
|
635
|
-
def write_function_header(self, job):
|
|
636
|
-
"""
|
|
637
|
-
Writes the header that starts each function, for example, this function
|
|
638
|
-
can write and return:
|
|
639
|
-
|
|
640
|
-
'def write_function_header(self, job, job_declaration_array):'
|
|
641
|
-
|
|
642
|
-
:param job: A list such that:
|
|
643
|
-
(job priority #, job ID #, Job Skeleton Name, Job Alias)
|
|
644
|
-
:param job_declaration_array: A list of all inputs that job requires.
|
|
645
|
-
:return: A string representing this.
|
|
646
|
-
"""
|
|
647
|
-
fn_section = f'\n\nclass {job}Cls(Job):\n'
|
|
648
|
-
fn_section += ' def __init__(self, '
|
|
649
|
-
if 'inputs' in self.tasks_dictionary[job]:
|
|
650
|
-
for i in self.tasks_dictionary[job]['inputs']:
|
|
651
|
-
var = i[0]
|
|
652
|
-
vartype = i[1]
|
|
653
|
-
if vartype == 'String':
|
|
654
|
-
fn_section += f'{var}="", '
|
|
655
|
-
else:
|
|
656
|
-
fn_section += f'{var}=None, '
|
|
657
|
-
fn_section += '*args, **kwargs):\n'
|
|
658
|
-
fn_section += f' super({job}Cls, self).__init__(*args, **kwargs)\n'
|
|
659
|
-
|
|
660
|
-
# TODO: Resolve inherent problems resolving resource requirements
|
|
661
|
-
# In WDL, "local-disk " + 500 + " HDD" cannot be directly converted to python.
|
|
662
|
-
# This needs a special handler.
|
|
663
|
-
if 'runtime' in self.tasks_dictionary[job]:
|
|
664
|
-
runtime_resources = []
|
|
665
|
-
if 'memory' in self.tasks_dictionary[job]['runtime']:
|
|
666
|
-
runtime_resources.append('memory=memory')
|
|
667
|
-
memory = self.tasks_dictionary[job]['runtime']['memory']
|
|
668
|
-
fn_section += f' memory=parse_memory({memory})\n'
|
|
669
|
-
if 'cpu' in self.tasks_dictionary[job]['runtime']:
|
|
670
|
-
runtime_resources.append('cores=cores')
|
|
671
|
-
cores = self.tasks_dictionary[job]['runtime']['cpu']
|
|
672
|
-
fn_section += f' cores=parse_cores({cores})\n'
|
|
673
|
-
if 'disks' in self.tasks_dictionary[job]['runtime']:
|
|
674
|
-
runtime_resources.append('disk=disk')
|
|
675
|
-
disk = self.tasks_dictionary[job]['runtime']['disks']
|
|
676
|
-
fn_section += f' disk=parse_disk({disk})\n'
|
|
677
|
-
runtime_resources = ['self'] + runtime_resources
|
|
678
|
-
fn_section += ' Job.__init__({})\n\n'.format(', '.join(runtime_resources))
|
|
679
|
-
|
|
680
|
-
if 'inputs' in self.tasks_dictionary[job]:
|
|
681
|
-
for i in self.tasks_dictionary[job]['inputs']:
|
|
682
|
-
var = i[0]
|
|
683
|
-
var_type = i[1]
|
|
684
|
-
var_expressn = i[2]
|
|
685
|
-
json_expressn = self.json_var(task=job, var=var)
|
|
686
|
-
|
|
687
|
-
# json declarations have priority and can overwrite
|
|
688
|
-
# whatever is in the wdl file
|
|
689
|
-
if json_expressn is not None:
|
|
690
|
-
var_expressn = json_expressn
|
|
691
|
-
|
|
692
|
-
if var_expressn is None:
|
|
693
|
-
# declarations from workflow
|
|
694
|
-
fn_section += f' self.id_{var} = {var}\n'
|
|
695
|
-
else:
|
|
696
|
-
# declarations from a WDL or JSON file
|
|
697
|
-
fn_section += ' self.id_{} = {}.create(\n {})\n'\
|
|
698
|
-
.format(var, self.write_declaration_type(var_type), var_expressn)
|
|
699
|
-
|
|
700
|
-
fn_section += heredoc_wdl('''
|
|
701
|
-
|
|
702
|
-
def run(self, fileStore):
|
|
703
|
-
fileStore.logToMaster("{jobname}")
|
|
704
|
-
tempDir = fileStore.getLocalTempDir()
|
|
705
|
-
|
|
706
|
-
_toil_wdl_internal__stdout_file = os.path.join(tempDir, 'stdout')
|
|
707
|
-
_toil_wdl_internal__stderr_file = os.path.join(tempDir, 'stderr')
|
|
708
|
-
|
|
709
|
-
try:
|
|
710
|
-
os.makedirs(os.path.join(tempDir, 'execution'))
|
|
711
|
-
except OSError as e:
|
|
712
|
-
if e.errno != errno.EEXIST:
|
|
713
|
-
raise
|
|
714
|
-
''', {'jobname': job}, indent=' ')[1:]
|
|
715
|
-
if 'inputs' in self.tasks_dictionary[job]:
|
|
716
|
-
for i in self.tasks_dictionary[job]['inputs']:
|
|
717
|
-
var = i[0]
|
|
718
|
-
var_type = i[1]
|
|
719
|
-
|
|
720
|
-
docker_bool = str(self.needsdocker(job))
|
|
721
|
-
|
|
722
|
-
if self.needs_file_import(var_type):
|
|
723
|
-
args = ', '.join(
|
|
724
|
-
[
|
|
725
|
-
f'abspath_file(self.id_{var}, _toil_wdl_internal__current_working_dir)',
|
|
726
|
-
'tempDir',
|
|
727
|
-
'fileStore',
|
|
728
|
-
f'docker={docker_bool}'
|
|
729
|
-
])
|
|
730
|
-
fn_section += f' {var} = process_and_read_file({args})\n'
|
|
731
|
-
else:
|
|
732
|
-
fn_section += f' {var} = self.id_{var}\n'
|
|
733
|
-
|
|
734
|
-
return fn_section
|
|
735
|
-
|
|
736
|
-
def json_var(self, var, task=None, wf=None):
|
|
737
|
-
"""
|
|
738
|
-
|
|
739
|
-
:param var:
|
|
740
|
-
:param task:
|
|
741
|
-
:param wf:
|
|
742
|
-
:return:
|
|
743
|
-
"""
|
|
744
|
-
# default to the last workflow in the list
|
|
745
|
-
if wf is None:
|
|
746
|
-
for workflow in self.workflows_dictionary:
|
|
747
|
-
wf = workflow
|
|
748
|
-
|
|
749
|
-
for identifier in self.json_dict:
|
|
750
|
-
# check task declarations
|
|
751
|
-
if task:
|
|
752
|
-
if identifier == f'{wf}.{task}.{var}':
|
|
753
|
-
return self.json_dict[identifier]
|
|
754
|
-
# else check workflow declarations
|
|
755
|
-
else:
|
|
756
|
-
if identifier == f'{wf}.{var}':
|
|
757
|
-
return self.json_dict[identifier]
|
|
758
|
-
|
|
759
|
-
return None
|
|
760
|
-
|
|
761
|
-
def needs_file_import(self, var_type: WDLType) -> bool:
|
|
762
|
-
"""
|
|
763
|
-
Check if the given type contains a File type. A return value of True
|
|
764
|
-
means that the value with this type has files to import.
|
|
765
|
-
"""
|
|
766
|
-
if isinstance(var_type, WDLFileType):
|
|
767
|
-
return True
|
|
768
|
-
|
|
769
|
-
if isinstance(var_type, WDLCompoundType):
|
|
770
|
-
if isinstance(var_type, WDLArrayType):
|
|
771
|
-
return self.needs_file_import(var_type.element)
|
|
772
|
-
elif isinstance(var_type, WDLPairType):
|
|
773
|
-
return self.needs_file_import(var_type.left) or self.needs_file_import(var_type.right)
|
|
774
|
-
elif isinstance(var_type, WDLMapType):
|
|
775
|
-
return self.needs_file_import(var_type.key) or self.needs_file_import(var_type.value)
|
|
776
|
-
else:
|
|
777
|
-
raise NotImplementedError
|
|
778
|
-
return False
|
|
779
|
-
|
|
780
|
-
def write_declaration_type(self, var_type: WDLType):
|
|
781
|
-
"""
|
|
782
|
-
Return a string that preserves the construction of the given WDL type
|
|
783
|
-
so it can be passed into the compiled script.
|
|
784
|
-
"""
|
|
785
|
-
section = var_type.__class__.__name__ + '(' # e.g.: 'WDLIntType('
|
|
786
|
-
|
|
787
|
-
if isinstance(var_type, WDLCompoundType):
|
|
788
|
-
if isinstance(var_type, WDLArrayType):
|
|
789
|
-
section += self.write_declaration_type(var_type.element)
|
|
790
|
-
elif isinstance(var_type, WDLPairType):
|
|
791
|
-
section += self.write_declaration_type(var_type.left) + ', '
|
|
792
|
-
section += self.write_declaration_type(var_type.right)
|
|
793
|
-
elif isinstance(var_type, WDLMapType):
|
|
794
|
-
section += self.write_declaration_type(var_type.key) + ', '
|
|
795
|
-
section += self.write_declaration_type(var_type.value)
|
|
796
|
-
else:
|
|
797
|
-
raise ValueError(var_type)
|
|
798
|
-
|
|
799
|
-
if var_type.optional:
|
|
800
|
-
if isinstance(var_type, WDLCompoundType):
|
|
801
|
-
section += ', '
|
|
802
|
-
section += 'optional=True'
|
|
803
|
-
return section + ')'
|
|
804
|
-
|
|
805
|
-
def write_function_bashscriptline(self, job):
|
|
806
|
-
"""
|
|
807
|
-
Writes a function to create a bashscript for injection into the docker
|
|
808
|
-
container.
|
|
809
|
-
|
|
810
|
-
:param job_task_reference: The job referenced in WDL's Task section.
|
|
811
|
-
:param job_alias: The actual job name to be written.
|
|
812
|
-
:return: A string writing all of this.
|
|
813
|
-
"""
|
|
814
|
-
fn_section = " generate_docker_bashscript_file(temp_dir=tempDir, docker_dir=tempDir, globs=["
|
|
815
|
-
# TODO: Add glob
|
|
816
|
-
# if 'outputs' in self.tasks_dictionary[job]:
|
|
817
|
-
# for output in self.tasks_dictionary[job]['outputs']:
|
|
818
|
-
# fn_section += '({}), '.format(output[2])
|
|
819
|
-
if fn_section.endswith(', '):
|
|
820
|
-
fn_section = fn_section[:-2]
|
|
821
|
-
fn_section += f"], cmd=cmd, job_name='{str(job)}')\n\n"
|
|
822
|
-
|
|
823
|
-
return fn_section
|
|
824
|
-
|
|
825
|
-
def write_function_dockercall(self, job):
|
|
826
|
-
"""
|
|
827
|
-
Writes a string containing the apiDockerCall() that will run the job.
|
|
828
|
-
|
|
829
|
-
:param job_task_reference: The name of the job calling docker.
|
|
830
|
-
:param docker_image: The corresponding name of the docker image.
|
|
831
|
-
e.g. "ubuntu:latest"
|
|
832
|
-
:return: A string containing the apiDockerCall() that will run the job.
|
|
833
|
-
"""
|
|
834
|
-
docker_dict = {"docker_image": self.tasks_dictionary[job]['runtime']['docker'],
|
|
835
|
-
"job_task_reference": job,
|
|
836
|
-
"docker_user": str(self.docker_user)}
|
|
837
|
-
docker_template = heredoc_wdl('''
|
|
838
|
-
# apiDockerCall() with demux=True returns a tuple of bytes objects (stdout, stderr).
|
|
839
|
-
_toil_wdl_internal__stdout, _toil_wdl_internal__stderr = \\
|
|
840
|
-
apiDockerCall(self,
|
|
841
|
-
image={docker_image},
|
|
842
|
-
working_dir=tempDir,
|
|
843
|
-
parameters=[os.path.join(tempDir, "{job_task_reference}_script.sh")],
|
|
844
|
-
entrypoint="/bin/bash",
|
|
845
|
-
user={docker_user},
|
|
846
|
-
stderr=True,
|
|
847
|
-
demux=True,
|
|
848
|
-
volumes={{tempDir: {{"bind": tempDir}}}})
|
|
849
|
-
with open(os.path.join(_toil_wdl_internal__current_working_dir, '{job_task_reference}.log'), 'wb') as f:
|
|
850
|
-
if _toil_wdl_internal__stdout:
|
|
851
|
-
f.write(_toil_wdl_internal__stdout)
|
|
852
|
-
if _toil_wdl_internal__stderr:
|
|
853
|
-
f.write(_toil_wdl_internal__stderr)
|
|
854
|
-
''', docker_dict, indent=' ')[1:]
|
|
855
|
-
|
|
856
|
-
return docker_template
|
|
857
|
-
|
|
858
|
-
def write_function_cmdline(self, job):
|
|
859
|
-
"""
|
|
860
|
-
Write a series of commandline variables to be concatenated together
|
|
861
|
-
eventually and either called with subprocess.Popen() or with
|
|
862
|
-
apiDockerCall() if a docker image is called for.
|
|
863
|
-
|
|
864
|
-
:param job: A list such that:
|
|
865
|
-
(job priority #, job ID #, Job Skeleton Name, Job Alias)
|
|
866
|
-
:return: A string representing this.
|
|
867
|
-
"""
|
|
868
|
-
|
|
869
|
-
fn_section = '\n'
|
|
870
|
-
cmd_array = []
|
|
871
|
-
if 'raw_commandline' in self.tasks_dictionary[job]:
|
|
872
|
-
for cmd in self.tasks_dictionary[job]['raw_commandline']:
|
|
873
|
-
if not cmd.startswith("r'''"):
|
|
874
|
-
cmd = 'str({i} if not isinstance({i}, WDLFile) else process_and_read_file({i}, tempDir, fileStore)).strip("{nl}")'.format(i=cmd, nl=r"\n")
|
|
875
|
-
fn_section = fn_section + heredoc_wdl('''
|
|
876
|
-
try:
|
|
877
|
-
# Intended to deal with "optional" inputs that may not exist
|
|
878
|
-
# TODO: handle this better
|
|
879
|
-
command{num} = {cmd}
|
|
880
|
-
except:
|
|
881
|
-
command{num} = ''\n''', {'cmd': cmd, 'num': self.cmd_num}, indent=' ')
|
|
882
|
-
cmd_array.append('command' + str(self.cmd_num))
|
|
883
|
-
self.cmd_num = self.cmd_num + 1
|
|
884
|
-
|
|
885
|
-
if cmd_array:
|
|
886
|
-
fn_section += '\n cmd = '
|
|
887
|
-
for command in cmd_array:
|
|
888
|
-
fn_section += f'{command} + '
|
|
889
|
-
if fn_section.endswith(' + '):
|
|
890
|
-
fn_section = fn_section[:-3]
|
|
891
|
-
fn_section += '\n cmd = textwrap.dedent(cmd.strip("{nl}"))\n'.format(nl=r"\n")
|
|
892
|
-
else:
|
|
893
|
-
# empty command section
|
|
894
|
-
fn_section += ' cmd = ""'
|
|
895
|
-
|
|
896
|
-
return fn_section
|
|
897
|
-
|
|
898
|
-
def write_function_subprocesspopen(self):
|
|
899
|
-
"""
|
|
900
|
-
Write a subprocess.Popen() call for this function and write it out as a
|
|
901
|
-
string.
|
|
902
|
-
|
|
903
|
-
:param job: A list such that:
|
|
904
|
-
(job priority #, job ID #, Job Skeleton Name, Job Alias)
|
|
905
|
-
:return: A string representing this.
|
|
906
|
-
"""
|
|
907
|
-
fn_section = heredoc_wdl('''
|
|
908
|
-
this_process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
909
|
-
_toil_wdl_internal__stdout, _toil_wdl_internal__stderr = this_process.communicate()\n''', indent=' ')
|
|
910
|
-
|
|
911
|
-
return fn_section
|
|
912
|
-
|
|
913
|
-
def write_function_outputreturn(self, job, docker=False):
|
|
914
|
-
"""
|
|
915
|
-
Find the output values that this function needs and write them out as a
|
|
916
|
-
string.
|
|
917
|
-
|
|
918
|
-
:param job: A list such that:
|
|
919
|
-
(job priority #, job ID #, Job Skeleton Name, Job Alias)
|
|
920
|
-
:param job_task_reference: The name of the job to look up values for.
|
|
921
|
-
:return: A string representing this.
|
|
922
|
-
"""
|
|
923
|
-
|
|
924
|
-
fn_section = ''
|
|
925
|
-
|
|
926
|
-
fn_section += heredoc_wdl('''
|
|
927
|
-
_toil_wdl_internal__stdout_file = generate_stdout_file(_toil_wdl_internal__stdout,
|
|
928
|
-
tempDir,
|
|
929
|
-
fileStore=fileStore)
|
|
930
|
-
_toil_wdl_internal__stderr_file = generate_stdout_file(_toil_wdl_internal__stderr,
|
|
931
|
-
tempDir,
|
|
932
|
-
fileStore=fileStore,
|
|
933
|
-
stderr=True)
|
|
934
|
-
''', indent=' ')[1:]
|
|
935
|
-
|
|
936
|
-
if 'outputs' in self.tasks_dictionary[job]:
|
|
937
|
-
return_values = []
|
|
938
|
-
for output in self.tasks_dictionary[job]['outputs']:
|
|
939
|
-
output_name = output[0]
|
|
940
|
-
output_type = output[1]
|
|
941
|
-
output_value = output[2]
|
|
942
|
-
|
|
943
|
-
if self.needs_file_import(output_type):
|
|
944
|
-
nonglob_dict = {
|
|
945
|
-
"output_name": output_name,
|
|
946
|
-
"output_type": self.write_declaration_type(output_type),
|
|
947
|
-
"expression": output_value,
|
|
948
|
-
"out_dir": self.output_directory}
|
|
949
|
-
|
|
950
|
-
nonglob_template = heredoc_wdl('''
|
|
951
|
-
{output_name} = {output_type}.create(
|
|
952
|
-
{expression}, output=True)
|
|
953
|
-
{output_name} = process_outfile({output_name}, fileStore, tempDir, '{out_dir}')
|
|
954
|
-
''', nonglob_dict, indent=' ')[1:]
|
|
955
|
-
fn_section += nonglob_template
|
|
956
|
-
return_values.append(output_name)
|
|
957
|
-
else:
|
|
958
|
-
fn_section += f' {output_name} = {output_value}\n'
|
|
959
|
-
return_values.append(output_name)
|
|
960
|
-
|
|
961
|
-
if return_values:
|
|
962
|
-
fn_section += ' rvDict = {'
|
|
963
|
-
for return_value in return_values:
|
|
964
|
-
fn_section += '"{rv}": {rv}, '.format(rv=return_value)
|
|
965
|
-
if fn_section.endswith(', '):
|
|
966
|
-
fn_section = fn_section[:-2]
|
|
967
|
-
if return_values:
|
|
968
|
-
fn_section = fn_section + '}\n'
|
|
969
|
-
|
|
970
|
-
if return_values:
|
|
971
|
-
fn_section += ' return rvDict\n\n'
|
|
972
|
-
|
|
973
|
-
return fn_section
|
|
974
|
-
|
|
975
|
-
def indent(self, string2indent: str) -> str:
|
|
976
|
-
"""
|
|
977
|
-
Indent the input string by 4 spaces.
|
|
978
|
-
"""
|
|
979
|
-
split_string = string2indent.split('\n')
|
|
980
|
-
return '\n'.join(f' {line}' for line in split_string)
|
|
981
|
-
|
|
982
|
-
def needsdocker(self, job):
|
|
983
|
-
"""
|
|
984
|
-
|
|
985
|
-
:param job:
|
|
986
|
-
:return:
|
|
987
|
-
"""
|
|
988
|
-
if 'runtime' in self.tasks_dictionary[job]:
|
|
989
|
-
if 'docker' in self.tasks_dictionary[job]['runtime']:
|
|
990
|
-
return True
|
|
991
|
-
|
|
992
|
-
return False
|
|
993
|
-
|
|
994
|
-
def write_python_file(self,
|
|
995
|
-
module_section,
|
|
996
|
-
fn_section,
|
|
997
|
-
main_section,
|
|
998
|
-
output_file):
|
|
999
|
-
"""
|
|
1000
|
-
Just takes three strings and writes them to output_file.
|
|
1001
|
-
|
|
1002
|
-
:param module_section: A string of 'import modules'.
|
|
1003
|
-
:param fn_section: A string of python 'def functions()'.
|
|
1004
|
-
:param main_section: A string declaring toil options and main's header.
|
|
1005
|
-
:param job_section: A string import files into toil and declaring jobs.
|
|
1006
|
-
:param output_file: The file to write the compiled toil script to.
|
|
1007
|
-
"""
|
|
1008
|
-
with open(output_file, 'w') as file:
|
|
1009
|
-
file.write(module_section)
|
|
1010
|
-
file.write(fn_section)
|
|
1011
|
-
file.write(main_section)
|