toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +39 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/htcondor.py +0 -1
  8. toil/batchSystems/kubernetes.py +34 -31
  9. toil/batchSystems/local_support.py +3 -1
  10. toil/batchSystems/lsf.py +7 -7
  11. toil/batchSystems/mesos/batchSystem.py +7 -7
  12. toil/batchSystems/options.py +32 -83
  13. toil/batchSystems/registry.py +104 -23
  14. toil/batchSystems/singleMachine.py +16 -13
  15. toil/batchSystems/slurm.py +87 -16
  16. toil/batchSystems/torque.py +0 -1
  17. toil/bus.py +44 -8
  18. toil/common.py +544 -753
  19. toil/cwl/__init__.py +28 -32
  20. toil/cwl/cwltoil.py +595 -574
  21. toil/cwl/utils.py +55 -10
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/__init__.py +2 -2
  24. toil/fileStores/abstractFileStore.py +88 -14
  25. toil/fileStores/cachingFileStore.py +610 -549
  26. toil/fileStores/nonCachingFileStore.py +46 -22
  27. toil/job.py +182 -101
  28. toil/jobStores/abstractJobStore.py +161 -95
  29. toil/jobStores/aws/jobStore.py +23 -9
  30. toil/jobStores/aws/utils.py +6 -6
  31. toil/jobStores/fileJobStore.py +116 -18
  32. toil/jobStores/googleJobStore.py +16 -7
  33. toil/jobStores/utils.py +5 -6
  34. toil/leader.py +87 -56
  35. toil/lib/accelerators.py +10 -5
  36. toil/lib/aws/__init__.py +3 -14
  37. toil/lib/aws/ami.py +22 -9
  38. toil/lib/aws/iam.py +21 -13
  39. toil/lib/aws/session.py +2 -16
  40. toil/lib/aws/utils.py +4 -5
  41. toil/lib/compatibility.py +1 -1
  42. toil/lib/conversions.py +26 -3
  43. toil/lib/docker.py +22 -23
  44. toil/lib/ec2.py +10 -6
  45. toil/lib/ec2nodes.py +106 -100
  46. toil/lib/encryption/_nacl.py +2 -1
  47. toil/lib/generatedEC2Lists.py +325 -18
  48. toil/lib/io.py +49 -2
  49. toil/lib/misc.py +1 -1
  50. toil/lib/resources.py +9 -2
  51. toil/lib/threading.py +101 -38
  52. toil/options/common.py +736 -0
  53. toil/options/cwl.py +336 -0
  54. toil/options/wdl.py +37 -0
  55. toil/provisioners/abstractProvisioner.py +9 -4
  56. toil/provisioners/aws/__init__.py +3 -6
  57. toil/provisioners/aws/awsProvisioner.py +6 -0
  58. toil/provisioners/clusterScaler.py +3 -2
  59. toil/provisioners/gceProvisioner.py +2 -2
  60. toil/realtimeLogger.py +2 -1
  61. toil/resource.py +24 -18
  62. toil/server/app.py +2 -3
  63. toil/server/cli/wes_cwl_runner.py +4 -4
  64. toil/server/utils.py +1 -1
  65. toil/server/wes/abstract_backend.py +3 -2
  66. toil/server/wes/amazon_wes_utils.py +5 -4
  67. toil/server/wes/tasks.py +2 -3
  68. toil/server/wes/toil_backend.py +2 -10
  69. toil/server/wsgi_app.py +2 -0
  70. toil/serviceManager.py +12 -10
  71. toil/statsAndLogging.py +41 -9
  72. toil/test/__init__.py +29 -54
  73. toil/test/batchSystems/batchSystemTest.py +11 -111
  74. toil/test/batchSystems/test_slurm.py +24 -8
  75. toil/test/cactus/__init__.py +0 -0
  76. toil/test/cactus/test_cactus_integration.py +58 -0
  77. toil/test/cwl/cwlTest.py +438 -223
  78. toil/test/cwl/glob_dir.cwl +15 -0
  79. toil/test/cwl/preemptible.cwl +21 -0
  80. toil/test/cwl/preemptible_expression.cwl +28 -0
  81. toil/test/cwl/revsort.cwl +1 -1
  82. toil/test/cwl/revsort2.cwl +1 -1
  83. toil/test/docs/scriptsTest.py +2 -3
  84. toil/test/jobStores/jobStoreTest.py +34 -21
  85. toil/test/lib/aws/test_iam.py +4 -14
  86. toil/test/lib/aws/test_utils.py +0 -3
  87. toil/test/lib/dockerTest.py +4 -4
  88. toil/test/lib/test_ec2.py +12 -17
  89. toil/test/mesos/helloWorld.py +4 -5
  90. toil/test/mesos/stress.py +1 -1
  91. toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
  92. toil/test/options/options.py +37 -0
  93. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  94. toil/test/provisioners/clusterScalerTest.py +6 -4
  95. toil/test/provisioners/clusterTest.py +23 -11
  96. toil/test/provisioners/gceProvisionerTest.py +0 -6
  97. toil/test/provisioners/restartScript.py +3 -2
  98. toil/test/server/serverTest.py +1 -1
  99. toil/test/sort/restart_sort.py +2 -1
  100. toil/test/sort/sort.py +2 -1
  101. toil/test/sort/sortTest.py +2 -13
  102. toil/test/src/autoDeploymentTest.py +45 -45
  103. toil/test/src/busTest.py +5 -5
  104. toil/test/src/checkpointTest.py +2 -2
  105. toil/test/src/deferredFunctionTest.py +1 -1
  106. toil/test/src/fileStoreTest.py +32 -16
  107. toil/test/src/helloWorldTest.py +1 -1
  108. toil/test/src/importExportFileTest.py +1 -1
  109. toil/test/src/jobDescriptionTest.py +2 -1
  110. toil/test/src/jobServiceTest.py +1 -1
  111. toil/test/src/jobTest.py +18 -18
  112. toil/test/src/miscTests.py +5 -3
  113. toil/test/src/promisedRequirementTest.py +3 -3
  114. toil/test/src/realtimeLoggerTest.py +1 -1
  115. toil/test/src/resourceTest.py +2 -2
  116. toil/test/src/restartDAGTest.py +1 -1
  117. toil/test/src/resumabilityTest.py +36 -2
  118. toil/test/src/retainTempDirTest.py +1 -1
  119. toil/test/src/systemTest.py +2 -2
  120. toil/test/src/toilContextManagerTest.py +2 -2
  121. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  122. toil/test/utils/toilDebugTest.py +98 -32
  123. toil/test/utils/toilKillTest.py +2 -2
  124. toil/test/utils/utilsTest.py +23 -3
  125. toil/test/wdl/wdltoil_test.py +223 -45
  126. toil/toilState.py +7 -6
  127. toil/utils/toilClean.py +1 -1
  128. toil/utils/toilConfig.py +36 -0
  129. toil/utils/toilDebugFile.py +60 -33
  130. toil/utils/toilDebugJob.py +39 -12
  131. toil/utils/toilDestroyCluster.py +1 -1
  132. toil/utils/toilKill.py +1 -1
  133. toil/utils/toilLaunchCluster.py +13 -2
  134. toil/utils/toilMain.py +3 -2
  135. toil/utils/toilRsyncCluster.py +1 -1
  136. toil/utils/toilSshCluster.py +1 -1
  137. toil/utils/toilStats.py +445 -305
  138. toil/utils/toilStatus.py +2 -5
  139. toil/version.py +10 -10
  140. toil/wdl/utils.py +2 -122
  141. toil/wdl/wdltoil.py +1257 -492
  142. toil/worker.py +55 -46
  143. toil-6.1.0.dist-info/METADATA +124 -0
  144. toil-6.1.0.dist-info/RECORD +241 -0
  145. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
  146. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
  147. toil/batchSystems/parasol.py +0 -379
  148. toil/batchSystems/tes.py +0 -459
  149. toil/test/batchSystems/parasolTestSupport.py +0 -117
  150. toil/test/wdl/builtinTest.py +0 -506
  151. toil/test/wdl/toilwdlTest.py +0 -522
  152. toil/wdl/toilwdl.py +0 -141
  153. toil/wdl/versions/dev.py +0 -107
  154. toil/wdl/versions/draft2.py +0 -980
  155. toil/wdl/versions/v1.py +0 -794
  156. toil/wdl/wdl_analysis.py +0 -116
  157. toil/wdl/wdl_functions.py +0 -997
  158. toil/wdl/wdl_synthesis.py +0 -1011
  159. toil/wdl/wdl_types.py +0 -243
  160. toil-5.12.0.dist-info/METADATA +0 -118
  161. toil-5.12.0.dist-info/RECORD +0 -244
  162. /toil/{wdl/versions → options}/__init__.py +0 -0
  163. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  164. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
@@ -1,980 +0,0 @@
1
- # Copyright (C) 2018-2020 UCSC Computational Genomics Lab
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import logging
15
- import os
16
- from collections import OrderedDict
17
-
18
- from wdlparse.draft2 import wdl_parser
19
-
20
- from toil.wdl.wdl_analysis import AnalyzeWDL
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- class AnalyzeDraft2WDL(AnalyzeWDL):
26
- """
27
- AnalyzeWDL implementation for the draft-2 version.
28
- """
29
-
30
- @property
31
- def version(self) -> str:
32
- return 'draft-2'
33
-
34
- def analyze(self):
35
- """
36
- Analyzes the WDL file passed into the constructor and generates the two
37
- intermediate data structures: `self.workflows_dictionary` and
38
- `self.tasks_dictionary`.
39
-
40
- :return: Returns nothing.
41
- """
42
- # parse the wdl AST into 2 dictionaries
43
- with open(self.wdl_file) as wdl:
44
- wdl_string = wdl.read()
45
- ast = wdl_parser.parse(wdl_string).ast()
46
- self.create_tasks_dict(ast)
47
- self.create_workflows_dict(ast)
48
-
49
- def write_AST(self, out_dir=None):
50
- """
51
- Writes a file with the AST for a wdl file in the out_dir.
52
- """
53
- if out_dir is None:
54
- out_dir = os.getcwd()
55
- with open(os.path.join(out_dir, 'AST.out'), 'w') as f:
56
- with open(self.wdl_file) as wdl:
57
- wdl_string = wdl.read()
58
- ast = wdl_parser.parse(wdl_string).ast()
59
- f.write(ast.dumps(indent=2))
60
-
61
- def find_asts(self, ast_root, name):
62
- """
63
- Finds an AST node with the given name and the entire subtree under it.
64
- A function borrowed from scottfrazer. Thank you Scott Frazer!
65
-
66
- :param ast_root: The WDL AST. The whole thing generally, but really
67
- any portion that you wish to search.
68
- :param name: The name of the subtree you're looking for, like "Task".
69
- :return: nodes representing the AST subtrees matching the "name" given.
70
- """
71
- nodes = []
72
- if isinstance(ast_root, wdl_parser.AstList):
73
- for node in ast_root:
74
- nodes.extend(self.find_asts(node, name))
75
- elif isinstance(ast_root, wdl_parser.Ast):
76
- if ast_root.name == name:
77
- nodes.append(ast_root)
78
- for attr_name, attr in ast_root.attributes.items():
79
- nodes.extend(self.find_asts(attr, name))
80
- return nodes
81
-
82
- def create_tasks_dict(self, ast):
83
- """
84
- Parse each "Task" in the AST. This will create self.tasks_dictionary,
85
- where each task name is a key.
86
-
87
- :return: Creates the self.tasks_dictionary necessary for much of the
88
- parser. Returning it is only necessary for unittests.
89
- """
90
- tasks = self.find_asts(ast, 'Task')
91
- for task in tasks:
92
- self.parse_task(task)
93
- return self.tasks_dictionary
94
-
95
- def parse_task(self, task):
96
- """
97
- Parses a WDL task AST subtree.
98
-
99
- Currently looks at and parses 4 sections:
100
- 1. Declarations (e.g. string x = 'helloworld')
101
- 2. Commandline (a bash command with dynamic variables inserted)
102
- 3. Runtime (docker image; disk; CPU; RAM; etc.)
103
- 4. Outputs (expected return values/files)
104
-
105
- :param task: An AST subtree of a WDL "Task".
106
- :return: Returns nothing but adds a task to the self.tasks_dictionary
107
- necessary for much of the parser.
108
- """
109
-
110
- task_name = task.attributes["name"].source_string
111
-
112
- # task declarations
113
- declaration_array = []
114
- for declaration_subAST in task.attr("declarations"):
115
- declaration_array.append(self.parse_declaration(declaration_subAST))
116
- self.tasks_dictionary.setdefault(task_name, OrderedDict())['inputs'] = declaration_array
117
-
118
- for section in task.attr("sections"):
119
-
120
- # task commandline entries section [command(s) to run]
121
- if section.name == "RawCommand":
122
- command_array = self.parse_task_rawcommand(section)
123
- self.tasks_dictionary.setdefault(task_name, OrderedDict())['raw_commandline'] = command_array
124
-
125
- # task runtime section (docker image; disk; CPU; RAM; etc.)
126
- if section.name == "Runtime":
127
- runtime_dict = self.parse_task_runtime(section.attr("map"))
128
- self.tasks_dictionary.setdefault(task_name, OrderedDict())['runtime'] = runtime_dict
129
-
130
- # task output filenames section (expected return values/files)
131
- if section.name == "Outputs":
132
- output_array = self.parse_task_outputs(section)
133
- self.tasks_dictionary.setdefault(task_name, OrderedDict())['outputs'] = output_array
134
-
135
- def parse_task_rawcommand_attributes(self, code_snippet):
136
- """
137
-
138
- :param code_snippet:
139
- :return:
140
- """
141
- attr_dict = OrderedDict()
142
- if isinstance(code_snippet, wdl_parser.Terminal):
143
- raise NotImplementedError
144
- if isinstance(code_snippet, wdl_parser.Ast):
145
- raise NotImplementedError
146
- if isinstance(code_snippet, wdl_parser.AstList):
147
- for ast in code_snippet:
148
- if ast.name == 'CommandParameterAttr':
149
- # TODO rewrite
150
- if ast.attributes['value'].str == 'string':
151
- attr_dict[ast.attributes['key'].source_string] = "'" + ast.attributes['value'].source_string + "'"
152
- else:
153
- attr_dict[ast.attributes['key'].source_string] = ast.attributes['value'].source_string
154
- return attr_dict
155
-
156
- def parse_task_rawcommand(self, rawcommand_subAST):
157
- """
158
- Parses the rawcommand section of the WDL task AST subtree.
159
-
160
- Task "rawcommands" are divided into many parts. There are 2 types of
161
- parts: normal strings, & variables that can serve as changeable inputs.
162
-
163
- The following example command:
164
- 'echo ${variable1} ${variable2} > output_file.txt'
165
-
166
- Has 5 parts:
167
- Normal String: 'echo '
168
- Variable Input: variable1
169
- Normal String: ' '
170
- Variable Input: variable2
171
- Normal String: ' > output_file.txt'
172
-
173
- Variables can also have additional conditions, like 'sep', which is like
174
- the python ''.join() function and in WDL looks like: ${sep=" -V " GVCFs}
175
- and would be translated as: ' -V '.join(GVCFs).
176
-
177
- :param rawcommand_subAST: A subAST representing some bash command.
178
- :return: A list=[] of tuples=() representing the parts of the command:
179
- e.g. [(command_var, command_type, additional_conditions_list), ...]
180
- Where: command_var = 'GVCFs'
181
- command_type = 'variable'
182
- command_actions = {'sep': ' -V '}
183
- """
184
- command_array = []
185
- for code_snippet in rawcommand_subAST.attributes["parts"]:
186
-
187
- # normal string
188
- if isinstance(code_snippet, wdl_parser.Terminal):
189
- command_var = "r'''" + code_snippet.source_string + "'''"
190
-
191
- # a variable like ${dinosaurDNA}
192
- if isinstance(code_snippet, wdl_parser.Ast):
193
- if code_snippet.name == 'CommandParameter':
194
- # change in the future? seems to be a different parameter but works for all cases it seems?
195
- code_expr = self.parse_declaration_expressn(code_snippet.attr('expr'), es='')
196
- code_attributes = self.parse_task_rawcommand_attributes(code_snippet.attr('attributes'))
197
- command_var = self.modify_cmd_expr_w_attributes(code_expr, code_attributes)
198
-
199
- if isinstance(code_snippet, wdl_parser.AstList):
200
- raise NotImplementedError
201
- command_array.append(command_var)
202
-
203
- return command_array
204
-
205
- def modify_cmd_expr_w_attributes(self, code_expr, code_attr):
206
- """
207
-
208
- :param code_expr:
209
- :param code_attr:
210
- :return:
211
- """
212
- for param in code_attr:
213
- if param == 'sep':
214
- code_expr = f"{code_attr[param]}.join(str(x) for x in {code_expr})"
215
- elif param == 'default':
216
- code_expr = "{expr} if {expr} else {default}".format(default=code_attr[param], expr=code_expr)
217
- else:
218
- raise NotImplementedError
219
- return code_expr
220
-
221
- def parse_task_runtime_key(self, i):
222
- """
223
-
224
- :param runtime_subAST:
225
- :return:
226
- """
227
- if isinstance(i, wdl_parser.Terminal):
228
- return i.source_string
229
- if isinstance(i, wdl_parser.Ast):
230
- raise NotImplementedError
231
- if isinstance(i, wdl_parser.AstList):
232
- raise NotImplementedError
233
-
234
- def parse_task_runtime(self, runtime_subAST):
235
- """
236
- Parses the runtime section of the WDL task AST subtree.
237
-
238
- The task "runtime" section currently supports context fields for a
239
- docker container, CPU resources, RAM resources, and disk resources.
240
-
241
- :param runtime_subAST: A subAST representing runtime parameters.
242
- :return: A list=[] of runtime attributes, for example:
243
- runtime_attributes = [('docker','quay.io/encode-dcc/map:v1.0'),
244
- ('cpu','2'),
245
- ('memory','17.1 GB'),
246
- ('disks','local-disk 420 HDD')]
247
- """
248
- runtime_attributes = OrderedDict()
249
- if isinstance(runtime_subAST, wdl_parser.Terminal):
250
- raise NotImplementedError
251
- elif isinstance(runtime_subAST, wdl_parser.Ast):
252
- raise NotImplementedError
253
- elif isinstance(runtime_subAST, wdl_parser.AstList):
254
- for ast in runtime_subAST:
255
- key = self.parse_task_runtime_key(ast.attr('key'))
256
- value = self.parse_declaration_expressn(ast.attr('value'), es='')
257
- if value.startswith('"'):
258
- value = self.translate_wdl_string_to_python_string(value[1:-1])
259
- runtime_attributes[key] = value
260
- return runtime_attributes
261
-
262
- def parse_task_outputs(self, i):
263
- """
264
- Parse the WDL output section.
265
-
266
- Outputs are like declarations, with a type, name, and value. Examples:
267
-
268
- ------------
269
- Simple Cases
270
- ------------
271
-
272
- 'Int num = 7'
273
- var_name: 'num'
274
- var_type: 'Int'
275
- var_value: 7
276
-
277
- String idea = 'Lab grown golden eagle burgers.'
278
- var_name: 'idea'
279
- var_type: 'String'
280
- var_value: 'Lab grown golden eagle burgers.'
281
-
282
- File ideaFile = 'goldenEagleStemCellStartUpDisrupt.txt'
283
- var_name: 'ideaFile'
284
- var_type: 'File'
285
- var_value: 'goldenEagleStemCellStartUpDisrupt.txt'
286
-
287
- -------------------
288
- More Abstract Cases
289
- -------------------
290
-
291
- Array[File] allOfMyTerribleIdeas = glob(*.txt)[0]
292
- var_name: 'allOfMyTerribleIdeas'
293
- var_type**: 'File'
294
- var_value: [*.txt]
295
- var_actions: {'index_lookup': '0', 'glob': 'None'}
296
-
297
- **toilwdl.py converts 'Array[File]' to 'ArrayFile'
298
-
299
- :return: output_array representing outputs generated by the job/task:
300
- e.g. x = [(var_name, var_type, var_value, var_actions), ...]
301
- """
302
- output_array = []
303
- for j in i.attributes['attributes']:
304
- if j.name == 'Output':
305
- output_array.append(self.parse_declaration(j))
306
- else:
307
- raise NotImplementedError
308
- return output_array
309
-
310
- def translate_wdl_string_to_python_string(self, some_string):
311
- """
312
- Parses a string representing a given job's output filename into something
313
- python can read. Replaces ${string}'s with normal variables and the rest
314
- with normal strings all concatenated with ' + '.
315
-
316
- Will not work with additional parameters, such as:
317
- ${default="foo" bar}
318
- or
319
- ${true="foo" false="bar" Boolean baz}
320
-
321
- This method expects to be passed only strings with some combination of
322
- "${abc}" and "abc" blocks.
323
-
324
- :param job: A list such that:
325
- (job priority #, job ID #, Job Skeleton Name, Job Alias)
326
- :param some_string: e.g. '${sampleName}.vcf'
327
- :return: output_string, e.g. 'sampleName + ".vcf"'
328
- """
329
-
330
- try:
331
- # add support for 'sep'
332
- output_string = ''
333
- edited_string = some_string.strip()
334
-
335
- if edited_string.find('${') != -1:
336
- continue_loop = True
337
- while continue_loop:
338
- index_start = edited_string.find('${')
339
- index_end = edited_string.find('}', index_start)
340
-
341
- stringword = edited_string[:index_start]
342
-
343
- if index_start != 0:
344
- output_string = output_string + "'" + stringword + "' + "
345
-
346
- keyword = edited_string[index_start + 2:index_end]
347
- output_string = output_string + "str(" + keyword + ") + "
348
-
349
- edited_string = edited_string[index_end + 1:]
350
- if edited_string.find('${') == -1:
351
- continue_loop = False
352
- if edited_string:
353
- output_string = output_string + "'" + edited_string + "' + "
354
- else:
355
- output_string = "'" + edited_string + "'"
356
-
357
- if output_string.endswith(' + '):
358
- output_string = output_string[:-3]
359
-
360
- return output_string
361
- except:
362
- return ''
363
-
364
- def create_workflows_dict(self, ast):
365
- """
366
- Parse each "Workflow" in the AST. This will create self.workflows_dictionary,
367
- where each called job is a tuple key of the form: (priority#, job#, name, alias).
368
-
369
- :return: Creates the self.workflows_dictionary necessary for much of the
370
- parser. Returning it is only necessary for unittests.
371
- """
372
- workflows = self.find_asts(ast, 'Workflow')
373
- for workflow in workflows:
374
- self.parse_workflow(workflow)
375
- return self.workflows_dictionary
376
-
377
- def parse_workflow(self, workflow):
378
- """
379
- Parses a WDL workflow AST subtree.
380
-
381
- Returns nothing but creates the self.workflows_dictionary necessary for much
382
- of the parser.
383
-
384
- :param workflow: An AST subtree of a WDL "Workflow".
385
- :return: Returns nothing but adds a workflow to the
386
- self.workflows_dictionary necessary for much of the parser.
387
- """
388
- workflow_name = workflow.attr('name').source_string
389
- self.workflows_dictionary[workflow_name] = self.parse_workflow_body(workflow.attr("body"))
390
-
391
- def parse_workflow_body(self, i):
392
- """
393
- Currently looks at and parses 3 sections:
394
- 1. Declarations (e.g. String x = 'helloworld')
395
- 2. Calls (similar to a python def)
396
- 3. Scatter (which expects to map to a Call or multiple Calls)
397
- 4. Conditionals
398
- """
399
- subworkflow_dict = OrderedDict()
400
- if isinstance(i, wdl_parser.Terminal):
401
- raise NotImplementedError
402
- elif isinstance(i, wdl_parser.Ast):
403
- raise NotImplementedError
404
- elif isinstance(i, wdl_parser.AstList):
405
- for ast in i:
406
- if ast.name == "Declaration":
407
- declaration = self.parse_declaration(ast)
408
- subworkflow_dict['declaration' + str(self.declaration_number)] = declaration
409
- self.declaration_number += 1
410
-
411
- elif ast.name == "Scatter":
412
- scattertask = self.parse_workflow_scatter(ast)
413
- subworkflow_dict['scatter' + str(self.scatter_number)] = scattertask
414
- self.scatter_number += 1
415
-
416
- elif ast.name == "Call":
417
- task = self.parse_workflow_call(ast)
418
- subworkflow_dict['call' + str(self.call_number)] = task
419
- self.call_number += 1
420
-
421
- elif ast.name == "If":
422
- task = self.parse_workflow_if(ast)
423
- subworkflow_dict['if' + str(self.if_number)] = task
424
- self.if_number += 1
425
- return subworkflow_dict
426
-
427
- def parse_workflow_if(self, ifAST):
428
- expression = self.parse_workflow_if_expression(ifAST.attr('expression'))
429
- body = self.parse_workflow_body(ifAST.attr('body'))
430
- return {'expression': expression, 'body': body}
431
-
432
- def parse_workflow_if_expression(self, i):
433
- return self.parse_declaration_expressn(i, es='')
434
-
435
- def parse_workflow_scatter(self, scatterAST):
436
- item = self.parse_workflow_scatter_item(scatterAST.attr('item'))
437
- collection = self.parse_workflow_scatter_collection(scatterAST.attr('collection'))
438
- body = self.parse_workflow_body(scatterAST.attr('body'))
439
- return {'item': item, 'collection': collection, 'body': body}
440
-
441
- def parse_workflow_scatter_item(self, i):
442
- if isinstance(i, wdl_parser.Terminal):
443
- return i.source_string
444
- elif isinstance(i, wdl_parser.Ast):
445
- raise NotImplementedError
446
- elif isinstance(i, wdl_parser.AstList):
447
- raise NotImplementedError
448
-
449
- def parse_workflow_scatter_collection(self, i):
450
- if isinstance(i, wdl_parser.Terminal):
451
- return i.source_string
452
- elif isinstance(i, wdl_parser.Ast):
453
- return self.parse_declaration_expressn(i, es='')
454
- elif isinstance(i, wdl_parser.AstList):
455
- raise NotImplementedError
456
-
457
- def parse_declaration(self, ast):
458
- """
459
- Parses a WDL declaration AST subtree into a Python tuple.
460
-
461
- Examples:
462
-
463
- String my_name
464
- String your_name
465
- Int two_chains_i_mean_names = 0
466
-
467
- :param ast: Some subAST representing a task declaration like:
468
- 'String file_name'
469
- :return: var_name, var_type, var_value
470
- Example:
471
- Input subAST representing: 'String file_name'
472
- Output: var_name='file_name', var_type='String', var_value=None
473
- """
474
- var_name = self.parse_declaration_name(ast.attr("name"))
475
- var_type = self.parse_declaration_type(ast.attr("type"))
476
- var_expressn = self.parse_declaration_expressn(ast.attr("expression"), es='')
477
-
478
- return var_name, var_type, var_expressn
479
-
480
- def parse_declaration_name(self, nameAST):
481
- """
482
- Required.
483
-
484
- Nothing fancy here. Just the name of the workflow
485
- function. For example: "rnaseqexample" would be the following
486
- wdl workflow's name:
487
-
488
- workflow rnaseqexample {File y; call a {inputs: y}; call b;}
489
- task a {File y}
490
- task b {command{"echo 'ATCG'"}}
491
-
492
- :param nameAST:
493
- :return:
494
- """
495
- if isinstance(nameAST, wdl_parser.Terminal):
496
- return nameAST.source_string
497
- elif isinstance(nameAST, wdl_parser.Ast):
498
- return nameAST.source_string
499
- elif isinstance(nameAST, wdl_parser.AstList):
500
- raise NotImplementedError
501
-
502
- def parse_declaration_type(self, typeAST):
503
- """
504
- Required.
505
-
506
- Currently supported:
507
- Types are: Boolean, Float, Int, File, String, Array[subtype],
508
- Pair[subtype, subtype], and Map[subtype, subtype].
509
- OptionalTypes are: Boolean?, Float?, Int?, File?, String?, Array[subtype]?,
510
- Pair[subtype, subtype]?, and Map[subtype, subtype]?.
511
-
512
- Python is not typed, so we don't need typing except to identify type: "File",
513
- which Toil needs to import, so we recursively travel down to the innermost
514
- type which will tell us if the variables are files that need importing.
515
-
516
- For Pair and Map compound types, we recursively travel down the subtypes and
517
- store them as attributes of a `WDLType` string. This way, the type structure is
518
- preserved, which will allow us to import files appropriately.
519
-
520
- :param typeAST:
521
- :return: a WDLType instance
522
- """
523
- if isinstance(typeAST, wdl_parser.Terminal):
524
- return self.create_wdl_primitive_type(typeAST.source_string)
525
- elif isinstance(typeAST, wdl_parser.Ast):
526
- if typeAST.name == 'Type':
527
- subtype = typeAST.attr('subtype')
528
- optional = False
529
- elif typeAST.name == 'OptionalType':
530
- subtype = typeAST.attr('innerType')
531
- optional = True
532
- else:
533
- raise NotImplementedError
534
-
535
- if isinstance(subtype, wdl_parser.AstList):
536
- # we're looking at a compound type
537
- name = typeAST.attr('name').source_string
538
- elements = [self.parse_declaration_type(element) for element in subtype]
539
- return self.create_wdl_compound_type(name, elements, optional=optional)
540
- else:
541
- # either a primitive optional type OR deeply recursive types
542
- # TODO: add tests #3331
543
- wdl_type = self.parse_declaration_type(subtype)
544
- wdl_type.optional = optional
545
- return wdl_type
546
- else:
547
- raise NotImplementedError
548
-
549
- def parse_declaration_expressn(self, expressionAST, es):
550
- """
551
- Expressions are optional. Workflow declaration valid examples:
552
-
553
- File x
554
-
555
- or
556
-
557
- File x = '/x/x.tmp'
558
-
559
- :param expressionAST:
560
- :return:
561
- """
562
- if not expressionAST:
563
- return None
564
- else:
565
- if isinstance(expressionAST, wdl_parser.Terminal):
566
- if expressionAST.str == 'boolean':
567
- if expressionAST.source_string == 'false':
568
- return 'False'
569
- elif expressionAST.source_string == 'true':
570
- return 'True'
571
- else:
572
- raise TypeError('Parsed boolean ({}) must be expressed as "true" or "false".'
573
- ''.format(expressionAST.source_string))
574
- elif expressionAST.str == 'string':
575
- parsed_string = self.translate_wdl_string_to_python_string(expressionAST.source_string)
576
- return f'{parsed_string}'
577
- else:
578
- # integers, floats, and variables
579
- return f'{expressionAST.source_string}'
580
- elif isinstance(expressionAST, wdl_parser.Ast):
581
- if expressionAST.name == 'Add':
582
- es = es + self.parse_declaration_expressn_operator(expressionAST.attr('lhs'),
583
- expressionAST.attr('rhs'),
584
- es,
585
- operator=' + ')
586
- elif expressionAST.name == 'Subtract':
587
- es = es + self.parse_declaration_expressn_operator(expressionAST.attr('lhs'),
588
- expressionAST.attr('rhs'),
589
- es,
590
- operator=' - ')
591
- elif expressionAST.name == 'Multiply':
592
- es = es + self.parse_declaration_expressn_operator(expressionAST.attr('lhs'),
593
- expressionAST.attr('rhs'),
594
- es,
595
- operator=' * ')
596
- elif expressionAST.name == 'Divide':
597
- es = es + self.parse_declaration_expressn_operator(expressionAST.attr('lhs'),
598
- expressionAST.attr('rhs'),
599
- es,
600
- operator=' / ')
601
- elif expressionAST.name == 'GreaterThan':
602
- es = es + self.parse_declaration_expressn_operator(expressionAST.attr('lhs'),
603
- expressionAST.attr('rhs'),
604
- es,
605
- operator=' > ')
606
- elif expressionAST.name == 'LessThan':
607
- es = es + self.parse_declaration_expressn_operator(expressionAST.attr('lhs'),
608
- expressionAST.attr('rhs'),
609
- es,
610
- operator=' < ')
611
- elif expressionAST.name == 'FunctionCall':
612
- es = es + self.parse_declaration_expressn_fncall(expressionAST.attr('name'),
613
- expressionAST.attr('params'),
614
- es)
615
- elif expressionAST.name == 'TernaryIf':
616
- es = es + self.parse_declaration_expressn_ternaryif(expressionAST.attr('cond'),
617
- expressionAST.attr('iftrue'),
618
- expressionAST.attr('iffalse'),
619
- es)
620
- elif expressionAST.name == 'MemberAccess':
621
- es = es + self.parse_declaration_expressn_memberaccess(expressionAST.attr('lhs'),
622
- expressionAST.attr('rhs'),
623
- es)
624
- elif expressionAST.name == 'ArrayLiteral':
625
- es = es + self.parse_declaration_expressn_arrayliteral(expressionAST.attr('values'),
626
- es)
627
- elif expressionAST.name == 'TupleLiteral':
628
- es = es + self.parse_declaration_expressn_tupleliteral(expressionAST.attr('values'),
629
- es)
630
- elif expressionAST.name == 'ArrayOrMapLookup':
631
- es = es + self.parse_declaration_expressn_arraymaplookup(expressionAST.attr('lhs'),
632
- expressionAST.attr('rhs'),
633
- es)
634
- elif expressionAST.name == 'LogicalNot':
635
- es = es + self.parse_declaration_expressn_logicalnot(expressionAST.attr('expression'),
636
- es)
637
- else:
638
- raise NotImplementedError
639
- elif isinstance(expressionAST, wdl_parser.AstList):
640
- raise NotImplementedError
641
- return '(' + es + ')'
642
-
643
- def parse_declaration_expressn_logicalnot(self, exprssn, es):
644
- if isinstance(exprssn, wdl_parser.Terminal):
645
- es = es + exprssn.source_string
646
- elif isinstance(exprssn, wdl_parser.Ast):
647
- es = es + self.parse_declaration_expressn(exprssn, es='')
648
- elif isinstance(exprssn, wdl_parser.AstList):
649
- raise NotImplementedError
650
- return ' not ' + es
651
-
652
- def parse_declaration_expressn_arraymaplookup(self, lhsAST, rhsAST, es):
653
- """
654
-
655
- :param lhsAST:
656
- :param rhsAST:
657
- :param es:
658
- :return:
659
- """
660
- if isinstance(lhsAST, wdl_parser.Terminal):
661
- es = es + lhsAST.source_string
662
- elif isinstance(lhsAST, wdl_parser.Ast):
663
- # parenthesis must be removed because 'i[0]' works, but '(i)[0]' does not
664
- es = es + self.parse_declaration_expressn(lhsAST, es='')[1:-1]
665
- elif isinstance(lhsAST, wdl_parser.AstList):
666
- raise NotImplementedError
667
-
668
- if isinstance(rhsAST, wdl_parser.Terminal):
669
- indexnum = rhsAST.source_string
670
- elif isinstance(rhsAST, wdl_parser.Ast):
671
- raise NotImplementedError
672
- elif isinstance(rhsAST, wdl_parser.AstList):
673
- raise NotImplementedError
674
-
675
- return es + f'[{indexnum}]'
676
-
677
- def parse_declaration_expressn_memberaccess(self, lhsAST, rhsAST, es):
678
- """
679
- Instead of "Class.variablename", use "Class.rv('variablename')".
680
-
681
- :param lhsAST:
682
- :param rhsAST:
683
- :param es:
684
- :return:
685
- """
686
- if isinstance(lhsAST, wdl_parser.Terminal):
687
- es = es + lhsAST.source_string
688
- elif isinstance(lhsAST, wdl_parser.Ast):
689
- es = es + self.parse_declaration_expressn(lhsAST, es)
690
- elif isinstance(lhsAST, wdl_parser.AstList):
691
- raise NotImplementedError
692
-
693
- # hack-y way to make sure pair.left and pair.right are parsed correctly.
694
- if isinstance(rhsAST, wdl_parser.Terminal) and (
695
- rhsAST.source_string == 'left' or rhsAST.source_string == 'right'):
696
- es = es + '.'
697
- else:
698
- es = es + '_'
699
-
700
- if isinstance(rhsAST, wdl_parser.Terminal):
701
- es = es + rhsAST.source_string
702
- elif isinstance(rhsAST, wdl_parser.Ast):
703
- es = es + self.parse_declaration_expressn(rhsAST, es)
704
- elif isinstance(rhsAST, wdl_parser.AstList):
705
- raise NotImplementedError
706
-
707
- return es
708
-
709
- def parse_declaration_expressn_ternaryif(self, cond, iftrue, iffalse, es):
710
- """
711
- Classic if statement. This needs to be rearranged.
712
-
713
- In wdl, this looks like:
714
- if <condition> then <iftrue> else <iffalse>
715
-
716
- In python, this needs to be:
717
- <iftrue> if <condition> else <iffalse>
718
-
719
- :param cond:
720
- :param iftrue:
721
- :param iffalse:
722
- :param es:
723
- :return:
724
- """
725
- es = es + self.parse_declaration_expressn(iftrue, es='')
726
- es = es + ' if ' + self.parse_declaration_expressn(cond, es='')
727
- es = es + ' else ' + self.parse_declaration_expressn(iffalse, es='')
728
- return es
729
-
730
- def parse_declaration_expressn_tupleliteral(self, values, es):
731
- """
732
- Same in python. Just a parenthesis enclosed tuple.
733
-
734
- :param values:
735
- :param es:
736
- :return:
737
- """
738
- es = es + '('
739
- for ast in values:
740
- es = es + self.parse_declaration_expressn(ast, es='') + ', '
741
- if es.endswith(', '):
742
- es = es[:-2]
743
- return es + ')'
744
-
745
- def parse_declaration_expressn_arrayliteral(self, values, es):
746
- """
747
- Same in python. Just a square bracket enclosed array.
748
-
749
- :param values:
750
- :param es:
751
- :return:
752
- """
753
- es = es + '['
754
- for ast in values:
755
- es = es + self.parse_declaration_expressn(ast, es='') + ', '
756
- if es.endswith(', '):
757
- es = es[:-2]
758
- return es + ']'
759
-
760
- def parse_declaration_expressn_operator(self, lhsAST, rhsAST, es, operator):
761
- """
762
- Simply joins the left and right hand arguments lhs and rhs with an operator.
763
-
764
- :param lhsAST:
765
- :param rhsAST:
766
- :param es:
767
- :param operator:
768
- :return:
769
- """
770
- if isinstance(lhsAST, wdl_parser.Terminal):
771
- if lhsAST.str == 'string':
772
- es = es + f'"{lhsAST.source_string}"'
773
- else:
774
- es = es + f'{lhsAST.source_string}'
775
- elif isinstance(lhsAST, wdl_parser.Ast):
776
- es = es + self.parse_declaration_expressn(lhsAST, es='')
777
- elif isinstance(lhsAST, wdl_parser.AstList):
778
- raise NotImplementedError
779
-
780
- es = es + operator
781
-
782
- if isinstance(rhsAST, wdl_parser.Terminal):
783
- if rhsAST.str == 'string':
784
- es = es + f'"{rhsAST.source_string}"'
785
- else:
786
- es = es + f'{rhsAST.source_string}'
787
- elif isinstance(rhsAST, wdl_parser.Ast):
788
- es = es + self.parse_declaration_expressn(rhsAST, es='')
789
- elif isinstance(rhsAST, wdl_parser.AstList):
790
- raise NotImplementedError
791
- return es
792
-
793
- def parse_declaration_expressn_fncall(self, name, params, es):
794
- """
795
- Parses out cromwell's built-in function calls.
796
-
797
- Some of these are special and need minor adjustments,
798
- for example size() requires a fileStore.
799
-
800
- :param name:
801
- :param params:
802
- :param es:
803
- :return:
804
- """
805
- # name of the function
806
- if isinstance(name, wdl_parser.Terminal):
807
- if name.str:
808
- if name.source_string == 'stdout':
809
- # let the stdout() function reference the generated stdout file path.
810
- return es + '_toil_wdl_internal__stdout_file'
811
- elif name.source_string == 'stderr':
812
- return es + '_toil_wdl_internal__stderr_file'
813
- elif name.source_string in ('range', 'zip'):
814
- # replace python built-in functions
815
- es += f'wdl_{name.source_string}('
816
- else:
817
- es = es + name.source_string + '('
818
- else:
819
- raise NotImplementedError
820
- elif isinstance(name, wdl_parser.Ast):
821
- raise NotImplementedError
822
- elif isinstance(name, wdl_parser.AstList):
823
- raise NotImplementedError
824
-
825
- es_params = self.parse_declaration_expressn_fncall_normalparams(params)
826
-
827
- if name.source_string == 'glob':
828
- return es + es_params + ', tempDir)'
829
- elif name.source_string == 'size':
830
- return es + (es_params + ', ' if es_params else '') + 'fileStore=fileStore)'
831
- elif name.source_string in ('write_lines', 'write_tsv', 'write_json', 'write_map'):
832
- return es + es_params + ', temp_dir=tempDir, file_store=fileStore)'
833
- else:
834
- return es + es_params + ')'
835
-
836
- def parse_declaration_expressn_fncall_normalparams(self, params):
837
-
838
- # arguments passed to the function
839
- if isinstance(params, wdl_parser.Terminal):
840
- raise NotImplementedError
841
- elif isinstance(params, wdl_parser.Ast):
842
- raise NotImplementedError
843
- elif isinstance(params, wdl_parser.AstList):
844
- es_param = ''
845
- for ast in params:
846
- es_param = es_param + self.parse_declaration_expressn(ast, es='') + ', '
847
- if es_param.endswith(', '):
848
- es_param = es_param[:-2]
849
- return es_param
850
-
851
- def parse_workflow_call_taskname(self, i):
852
- """
853
- Required.
854
-
855
- :param i:
856
- :return:
857
- """
858
- if isinstance(i, wdl_parser.Terminal):
859
- return i.source_string
860
- elif isinstance(i, wdl_parser.Ast):
861
- raise NotImplementedError
862
- elif isinstance(i, wdl_parser.AstList):
863
- raise NotImplementedError
864
-
865
- def parse_workflow_call_taskalias(self, i):
866
- """
867
- Required.
868
-
869
- :param i:
870
- :return:
871
- """
872
- if isinstance(i, wdl_parser.Terminal):
873
- return i.source_string
874
- elif isinstance(i, wdl_parser.Ast):
875
- raise NotImplementedError
876
- elif isinstance(i, wdl_parser.AstList):
877
- raise NotImplementedError
878
-
879
- def parse_workflow_call_body_declarations(self, i):
880
- """
881
- Have not seen this used, so expects to return "[]".
882
-
883
- :param i:
884
- :return:
885
- """
886
- declaration_array = []
887
- if isinstance(i, wdl_parser.Terminal):
888
- declaration_array = [i.source_string]
889
- elif isinstance(i, wdl_parser.Ast):
890
- raise NotImplementedError
891
- elif isinstance(i, wdl_parser.AstList):
892
- for ast in i:
893
- declaration_array.append(self.parse_declaration(ast))
894
-
895
- # have not seen this used so raise to check
896
- if declaration_array:
897
- raise NotImplementedError
898
-
899
- return declaration_array
900
-
901
- def parse_workflow_call_body_io(self, i):
902
- """
903
- Required.
904
-
905
- :param i:
906
- :return:
907
- """
908
- if isinstance(i, wdl_parser.Terminal):
909
- raise NotImplementedError
910
- elif isinstance(i, wdl_parser.Ast):
911
- raise NotImplementedError
912
- elif isinstance(i, wdl_parser.AstList):
913
- for ast in i:
914
- assert len(i) == 1
915
- if ast.name == 'Inputs':
916
- return self.parse_workflow_call_body_io_map(ast.attr('map'))
917
- else:
918
- raise NotImplementedError
919
-
920
- def parse_workflow_call_body_io_map(self, i):
921
- """
922
- Required.
923
-
924
- :param i:
925
- :return:
926
- """
927
- io_map = OrderedDict()
928
- if isinstance(i, wdl_parser.Terminal):
929
- raise NotImplementedError
930
- elif isinstance(i, wdl_parser.Ast):
931
- raise NotImplementedError
932
- elif isinstance(i, wdl_parser.AstList):
933
- for ast in i:
934
- if ast.name == 'IOMapping':
935
- key = self.parse_declaration_expressn(ast.attr("key"), es='')
936
- value = self.parse_declaration_expressn(ast.attr("value"), es='')
937
- io_map[key] = value
938
- else:
939
- raise NotImplementedError
940
- return io_map
941
-
942
- def parse_workflow_call_body(self, i):
943
- """
944
- Required.
945
-
946
- :param i:
947
- :return:
948
- """
949
- io_map = OrderedDict()
950
-
951
- if isinstance(i, wdl_parser.Terminal):
952
- return i.source_string # no io mappings; represents just a blank call
953
- elif isinstance(i, wdl_parser.Ast):
954
- if i.name == 'CallBody':
955
- declarations = self.parse_workflow_call_body_declarations(i.attr("declarations")) # have not seen this used
956
- io_map = self.parse_workflow_call_body_io(i.attr('io'))
957
- else:
958
- raise NotImplementedError
959
- elif isinstance(i, wdl_parser.AstList):
960
- raise NotImplementedError
961
-
962
- return io_map
963
-
964
- def parse_workflow_call(self, i):
965
- """
966
- Parses a WDL workflow call AST subtree to give the variable mappings for
967
- that particular job/task "call".
968
-
969
- :param i: WDL workflow job object
970
- :return: python dictionary of io mappings for that job call
971
- """
972
- task_being_called = self.parse_workflow_call_taskname(i.attr("task"))
973
- task_alias = self.parse_workflow_call_taskalias(i.attr("alias"))
974
- io_map = self.parse_workflow_call_body(i.attr("body"))
975
-
976
- if not task_alias:
977
- task_alias = task_being_called
978
-
979
- return {'task': task_being_called, 'alias': task_alias, 'io': io_map}
980
-