toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/wdl/wdl_functions.py DELETED
@@ -1,997 +0,0 @@
1
- # Copyright (C) 2015-2021 Regents of the University of California
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import csv
15
- import json
16
- import logging
17
- import math
18
- import os
19
- import re
20
- import subprocess
21
- import textwrap
22
- import uuid
23
- from typing import Any, Dict, List, Optional, Tuple, Union
24
-
25
- from toil.fileStores.abstractFileStore import AbstractFileStore
26
- from toil.lib.conversions import bytes_in_unit
27
- from toil.lib.resources import glob # type: ignore
28
- from toil.wdl.wdl_types import WDLFile, WDLPair
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- class WDLRuntimeError(Exception):
34
- """ WDL-related run-time error."""
35
-
36
- def __init__(self, message):
37
- super().__init__(message)
38
-
39
-
40
- class WDLJSONEncoder(json.JSONEncoder):
41
- """
42
- Extended JSONEncoder to support WDL-specific JSON encoding.
43
- """
44
-
45
- def default(self, obj):
46
- if isinstance(obj, WDLPair):
47
- return obj.to_dict()
48
- return json.JSONEncoder.default(self, obj)
49
-
50
-
51
- def generate_docker_bashscript_file(temp_dir, docker_dir, globs, cmd, job_name):
52
- '''
53
- Creates a bashscript to inject into a docker container for the job.
54
-
55
- This script wraps the job command(s) given in a bash script, hard links the
56
- outputs and returns an "rc" file containing the exit code. All of this is
57
- done in an effort to parallel the Broad's cromwell engine, which is the
58
- native WDL runner. As they've chosen to write and then run a bashscript for
59
- every command, so shall we.
60
-
61
- :param temp_dir: The current directory outside of docker to deposit the
62
- bashscript into, which will be the bind mount that docker
63
- loads files from into its own containerized filesystem.
64
- This is usually the tempDir created by this individual job
65
- using 'tempDir = job.fileStore.getLocalTempDir()'.
66
- :param docker_dir: The working directory inside of the docker container
67
- which is bind mounted to 'temp_dir'. By default this is
68
- 'data'.
69
- :param globs: A list of expected output files to retrieve as glob patterns
70
- that will be returned as hard links to the current working
71
- directory.
72
- :param cmd: A bash command to be written into the bash script and run.
73
- :param job_name: The job's name, only used to write in a file name
74
- identifying the script as written for that job.
75
- Will be used to call the script later.
76
- :return: Nothing, but it writes and deposits a bash script in temp_dir
77
- intended to be run inside of a docker container for this job.
78
- '''
79
- wdl_copyright = heredoc_wdl(''' \n
80
- # Borrowed/rewritten from the Broad's Cromwell implementation. As
81
- # that is under a BSD-ish license, I include here the license off
82
- # of their GitHub repo. Thank you Broadies!
83
-
84
- # Copyright (c) 2015, Broad Institute, Inc.
85
- # All rights reserved.
86
-
87
- # Redistribution and use in source and binary forms, with or without
88
- # modification, are permitted provided that the following conditions are met:
89
-
90
- # * Redistributions of source code must retain the above copyright notice, this
91
- # list of conditions and the following disclaimer.
92
-
93
- # * Redistributions in binary form must reproduce the above copyright notice,
94
- # this list of conditions and the following disclaimer in the documentation
95
- # and/or other materials provided with the distribution.
96
-
97
- # * Neither the name Broad Institute, Inc. nor the names of its
98
- # contributors may be used to endorse or promote products derived from
99
- # this software without specific prior written permission.
100
-
101
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
102
- # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
103
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
104
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
105
- # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
106
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
107
- # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
108
- # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
109
- # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
110
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
111
-
112
- # make a temp directory w/identifier
113
- ''')
114
- prefix_dict = {"docker_dir": docker_dir,
115
- "cmd": cmd}
116
- bashfile_prefix = heredoc_wdl('''
117
- tmpDir=$(mktemp -d /{docker_dir}/execution/tmp.XXXXXX)
118
- chmod 777 $tmpDir
119
- # set destination for java to deposit all of its files
120
- export _JAVA_OPTIONS=-Djava.io.tmpdir=$tmpDir
121
- export TMPDIR=$tmpDir
122
-
123
- (
124
- cd /{docker_dir}/execution
125
- {cmd}
126
- )
127
-
128
- # gather the input command return code
129
- echo $? > "$tmpDir/rc.tmp"
130
-
131
- ''', prefix_dict)
132
-
133
- bashfile_string = '#!/bin/bash' + wdl_copyright + bashfile_prefix
134
-
135
- begin_globbing_string = heredoc_wdl('''
136
- (
137
- mkdir "$tmpDir/globs"
138
- ''')
139
-
140
- bashfile_string = bashfile_string + begin_globbing_string
141
-
142
- for glob_input in globs:
143
- add_this_glob = \
144
- '( ln -L ' + glob_input + \
145
- ' "$tmpDir/globs" 2> /dev/null ) || ( ln ' + glob_input + \
146
- ' "$tmpDir/globs" )\n'
147
- bashfile_string = bashfile_string + add_this_glob
148
-
149
- bashfile_suffix = heredoc_wdl('''
150
- )
151
-
152
- # flush RAM to disk
153
- sync
154
-
155
- mv "$tmpDir/rc.tmp" "$tmpDir/rc"
156
- chmod -R 777 $tmpDir
157
- ''')
158
-
159
- bashfile_string = bashfile_string + bashfile_suffix
160
-
161
- with open(os.path.join(temp_dir, job_name + '_script.sh'), 'w') as bashfile:
162
- bashfile.write(bashfile_string)
163
-
164
-
165
- def process_single_infile(wdl_file: WDLFile, fileStore: AbstractFileStore) -> WDLFile:
166
- f = wdl_file.file_path
167
- logger.info(f'Importing {f} into the jobstore.')
168
- if f.startswith('http://') or f.startswith('https://') or \
169
- f.startswith('file://') or f.startswith('wasb://'):
170
- filepath = fileStore.importFile(f)
171
- preserveThisFilename = os.path.basename(f)
172
- elif f.startswith('s3://'):
173
- try:
174
- filepath = fileStore.importFile(f)
175
- preserveThisFilename = os.path.basename(f)
176
- except:
177
- from toil.lib.ec2nodes import EC2Regions
178
- success = False
179
- for region in EC2Regions:
180
- try:
181
- html_path = f'http://s3.{region}.amazonaws.com/' + f[5:]
182
- filepath = fileStore.importFile(html_path)
183
- preserveThisFilename = os.path.basename(f)
184
- success = True
185
- except:
186
- pass
187
- if not success:
188
- raise RuntimeError('Unable to import: ' + f)
189
- elif f.startswith('gs://'):
190
- f = 'https://storage.googleapis.com/' + f[5:]
191
- filepath = fileStore.importFile(f)
192
- preserveThisFilename = os.path.basename(f)
193
- else:
194
- filepath = fileStore.importFile("file://" + os.path.abspath(f))
195
- preserveThisFilename = os.path.basename(f)
196
- return WDLFile(file_path=filepath, file_name=preserveThisFilename, imported=True)
197
-
198
-
199
- def process_infile(f: Any, fileStore: AbstractFileStore):
200
- """
201
- Takes any input and imports the WDLFile into the fileStore.
202
-
203
- This returns the input importing all WDLFile instances to the fileStore. Toil
204
- does not preserve a file's original name upon import and so the WDLFile also keeps
205
- track of this.
206
-
207
- :param f: A primitive, WDLFile, or a container. A file needs to be a WDLFile instance
208
- to be imported.
209
- :param fileStore: The fileStore object that is called to load files into the fileStore.
210
- """
211
- if isinstance(f, WDLFile):
212
- # check if this has already been imported into the fileStore
213
- if f.imported:
214
- return f
215
- else:
216
- return process_single_infile(f, fileStore)
217
- elif isinstance(f, list):
218
- # recursively call process_infile() to handle cases like Array[Map[String, File]]
219
- return [process_infile(sf, fileStore) for sf in f]
220
- elif isinstance(f, WDLPair):
221
- f.left = process_infile(f.left, fileStore)
222
- f.right = process_infile(f.right, fileStore)
223
- return f
224
- elif isinstance(f, dict):
225
- return {process_infile(k, fileStore): process_infile(v, fileStore) for k, v in f.items()}
226
- elif isinstance(f, (int, str, bool, float)):
227
- return f
228
- else:
229
- raise WDLRuntimeError(f'Error processing file: {str(f)}')
230
-
231
-
232
- def sub(input_str: str, pattern: str, replace: str) -> str:
233
- """
234
- Given 3 String parameters `input`, `pattern`, `replace`, this function will
235
- replace any occurrence matching `pattern` in `input` by `replace`.
236
- `pattern` is expected to be a regular expression. Details of regex evaluation
237
- will depend on the execution engine running the WDL.
238
-
239
- WDL syntax: String sub(String, String, String)
240
- """
241
-
242
- if isinstance(input_str, WDLFile):
243
- input_str = input_str.file_name
244
- if isinstance(pattern, WDLFile):
245
- pattern = pattern.file_name
246
- if isinstance(replace, WDLFile):
247
- replace = replace.file_name
248
-
249
- return re.sub(pattern=str(pattern), repl=str(replace), string=str(input_str))
250
-
251
-
252
- def defined(i):
253
- if i:
254
- return True
255
- return False
256
-
257
-
258
- def process_single_outfile(wdl_file: WDLFile, fileStore, workDir, outDir) -> WDLFile:
259
- f = wdl_file.file_path
260
- if os.path.exists(f):
261
- output_f_path = f
262
- elif os.path.exists(os.path.abspath(f)):
263
- output_f_path = os.path.abspath(f)
264
- elif os.path.exists(os.path.join(workDir, 'execution', f)):
265
- output_f_path = os.path.join(workDir, 'execution', f)
266
- elif os.path.exists(os.path.join('execution', f)):
267
- output_f_path = os.path.join('execution', f)
268
- elif os.path.exists(os.path.join(workDir, f)):
269
- output_f_path = os.path.join(workDir, f)
270
- elif os.path.exists(os.path.join(outDir, f)):
271
- output_f_path = os.path.join(outDir, f)
272
- else:
273
- tmp = subprocess.check_output(['ls', '-lha', workDir]).decode('utf-8')
274
- exe = subprocess.check_output(['ls', '-lha', os.path.join(workDir, 'execution')]).decode('utf-8')
275
- for std_file in ('stdout', 'stderr'):
276
- std_file = os.path.join(workDir, 'execution', std_file)
277
- if os.path.exists(std_file):
278
- with open(std_file, 'rb') as f:
279
- logger.info(f.read())
280
-
281
- raise RuntimeError('OUTPUT FILE: {} was not found in {}!\n'
282
- '{}\n\n'
283
- '{}\n'.format(f, os.getcwd(), tmp, exe))
284
- output_file = fileStore.writeGlobalFile(output_f_path)
285
- preserveThisFilename = os.path.basename(output_f_path)
286
- fileStore.export_file(output_file, "file://" + os.path.join(os.path.abspath(outDir), preserveThisFilename))
287
- return WDLFile(file_path=output_file, file_name=preserveThisFilename, imported=True)
288
-
289
-
290
- def process_outfile(f, fileStore, workDir, outDir):
291
- if isinstance(f, WDLFile):
292
- return process_single_outfile(f, fileStore, workDir, outDir)
293
- elif isinstance(f, list):
294
- # recursively call process_outfile() to handle cases like Array[Map[String, File]]
295
- return [process_outfile(sf, fileStore, workDir, outDir) for sf in f]
296
- elif isinstance(f, WDLPair):
297
- f.left = process_outfile(f.left, fileStore, workDir, outDir)
298
- f.right = process_outfile(f.right, fileStore, workDir, outDir)
299
- return f
300
- elif isinstance(f, dict):
301
- return {process_outfile(k, fileStore, workDir, outDir):
302
- process_outfile(v, fileStore, workDir, outDir) for k, v in f.items()}
303
- elif isinstance(f, (int, str, bool, float)):
304
- return f
305
- else:
306
- raise WDLRuntimeError(f'Error processing file: {str(f)}')
307
-
308
-
309
- def abspath_single_file(f: WDLFile, cwd: str) -> WDLFile:
310
- path = f.file_path
311
- if path != os.path.abspath(path):
312
- f.file_path = os.path.join(cwd, path)
313
- return f
314
-
315
-
316
- def abspath_file(f: Any, cwd: str):
317
- if not f:
318
- # in the case of "optional" files (same treatment in 'process_and_read_file()')
319
- # TODO: handle this at compile time, not here
320
- return ''
321
- if isinstance(f, WDLFile):
322
- # check if this has already been imported into the fileStore
323
- if f.imported:
324
- return f
325
- path = f.file_path
326
- if path.startswith('s3://') or path.startswith('http://') or path.startswith('https://') or \
327
- path.startswith('file://') or path.startswith('wasb://') or path.startswith('gs://'):
328
- return f
329
- return abspath_single_file(f, cwd)
330
- elif isinstance(f, list):
331
- # recursively call abspath_file() to handle cases like Array[Map[String, File]]
332
- return [abspath_file(sf, cwd) for sf in f]
333
- elif isinstance(f, WDLPair):
334
- f.left = abspath_file(f.left, cwd)
335
- f.right = abspath_file(f.right, cwd)
336
- return f
337
- elif isinstance(f, dict):
338
- return {abspath_file(k, cwd): abspath_file(v, cwd) for k, v in f.items()}
339
- elif isinstance(f, (int, str, bool, float)):
340
- return f
341
- else:
342
- raise WDLRuntimeError(f'Error processing file: ({str(f)}) of type: ({str(type(f))}).')
343
-
344
-
345
- def read_single_file(f: WDLFile, tempDir, fileStore, docker=False) -> str:
346
- import os
347
- try:
348
- fpath = fileStore.readGlobalFile(f.file_path, userPath=os.path.join(tempDir, f.file_name))
349
- except:
350
- fpath = os.path.join(tempDir, f.file_name)
351
- return fpath
352
-
353
-
354
- def read_file(f: Any, tempDir: str, fileStore: AbstractFileStore, docker: bool = False):
355
- if isinstance(f, WDLFile):
356
- return read_single_file(f, tempDir, fileStore, docker=docker)
357
- elif isinstance(f, list):
358
- # recursively call read_file() to handle cases like Array[Map[String, File]]
359
- return [read_file(sf, tempDir, fileStore, docker=docker) for sf in f]
360
- elif isinstance(f, WDLPair):
361
- f.left = read_file(f.left, tempDir, fileStore, docker=docker)
362
- f.right = read_file(f.right, tempDir, fileStore, docker=docker)
363
- return f
364
- elif isinstance(f, dict):
365
- return {read_file(k, tempDir, fileStore, docker=docker):
366
- read_file(v, tempDir, fileStore, docker=docker) for k, v in f.items()}
367
- elif isinstance(f, (int, str, bool, float)):
368
- return f
369
- else:
370
- raise WDLRuntimeError(f'Error processing file: {str(f)}')
371
-
372
-
373
- def process_and_read_file(f, tempDir, fileStore, docker=False):
374
- if not f:
375
- # in the case of "optional" files (same treatment in 'abspath_file()')
376
- # TODO: handle this at compile time, not here and change to the empty string
377
- return None
378
- processed_file = process_infile(f, fileStore)
379
- return read_file(processed_file, tempDir, fileStore, docker=docker)
380
-
381
-
382
- def generate_stdout_file(output, tempDir, fileStore, stderr=False):
383
- """
384
- Create a stdout (or stderr) file from a string or bytes object.
385
-
386
- :param str|bytes output: A str or bytes object that holds the stdout/stderr text.
387
- :param str tempDir: The directory to write the stdout file.
388
- :param fileStore: A fileStore object.
389
- :param bool stderr: If True, a stderr instead of a stdout file is generated.
390
- :return: The file path to the generated file.
391
- """
392
- if output is None:
393
- # write an empty file if there's no stdout/stderr.
394
- output = b''
395
- elif isinstance(output, str):
396
- output = bytes(output, encoding='utf-8')
397
-
398
- # TODO: we need a way to differentiate the stdout/stderr files in the workflow after execution.
399
- # Cromwell generates a folder for each task so the file is simply named stdout and lives in
400
- # the task execution folder. This is not the case with Toil. Though, this would not be a
401
- # problem with intermediate stdout files as each task has its own temp folder.
402
- name = 'stderr' if stderr else 'stdout'
403
- local_path = os.path.join(tempDir, 'execution', name)
404
-
405
- # import to fileStore then read to local temp file
406
- with fileStore.writeGlobalFileStream(cleanup=True, basename=name) as (stream, file_id):
407
- stream.write(output)
408
-
409
- assert file_id is not None
410
- return fileStore.readGlobalFile(fileStoreID=file_id, userPath=local_path)
411
-
412
-
413
- def parse_memory(memory):
414
- """
415
- Parses a string representing memory and returns
416
- an integer # of bytes.
417
-
418
- :param memory:
419
- :return:
420
- """
421
- memory = str(memory)
422
- if 'None' in memory:
423
- return 2147483648 # toil's default
424
- try:
425
- import re
426
- raw_mem_split = re.split('([a-zA-Z]+)', memory)
427
- mem_split = []
428
-
429
- for r in raw_mem_split:
430
- if r:
431
- mem_split.append(r.replace(' ', ''))
432
-
433
- if len(mem_split) == 1:
434
- return int(memory)
435
-
436
- if len(mem_split) == 2:
437
- num = mem_split[0]
438
- unit = mem_split[1]
439
- return int(float(num) * bytes_in_unit(unit))
440
- else:
441
- raise RuntimeError(f'Memory parsing failed: {memory}')
442
- except:
443
- return 2147483648 # toil's default
444
-
445
-
446
- def parse_cores(cores):
447
- cores = str(cores)
448
- if 'None' in cores:
449
- return 1 # toil's default
450
- if cores:
451
- return float(cores)
452
- else:
453
- return 1
454
-
455
-
456
- def parse_disk(disk):
457
- disk = str(disk)
458
- if 'None' in disk:
459
- return 2147483648 # toil's default
460
- try:
461
- total_disk = 0
462
- disks = disk.split(',')
463
- for d in disks:
464
- d = d.strip().split(' ')
465
- if len(d) > 1:
466
- for part in d:
467
- if is_number(part):
468
- total_disk += parse_memory(f'{part} GB')
469
- else:
470
- return parse_memory(d[0]) if parse_memory(d[0]) > 2147483648 else 2147483648
471
- return total_disk if total_disk > 2147483648 else 2147483648
472
- except:
473
- return 2147483648 # toil's default
474
-
475
-
476
- def is_number(s):
477
- try:
478
- float(s)
479
- return True
480
- except ValueError:
481
- return False
482
-
483
-
484
- def size(f: Optional[Union[str, WDLFile, List[Union[str, WDLFile]]]] = None,
485
- unit: Optional[str] = 'B',
486
- fileStore: Optional[AbstractFileStore] = None) -> float:
487
- """
488
- Given a `File` and a `String` (optional), returns the size of the file in Bytes
489
- or in the unit specified by the second argument.
490
-
491
- Supported units are KiloByte ("K", "KB"), MegaByte ("M", "MB"), GigaByte
492
- ("G", "GB"), TeraByte ("T", "TB") (powers of 1000) as well as their binary version
493
- (https://en.wikipedia.org/wiki/Binary_prefix) "Ki" ("KiB"), "Mi" ("MiB"),
494
- "Gi" ("GiB"), "Ti" ("TiB") (powers of 1024). Default unit is Bytes ("B").
495
-
496
- WDL syntax: Float size(File, [String])
497
- Varieties: Float size(File?, [String])
498
- Float size(Array[File], [String])
499
- Float size(Array[File?], [String])
500
- """
501
-
502
- if f is None:
503
- return 0
504
-
505
- # it is possible that size() is called directly (e.g.: size('file')) and so it is not treated as a file.
506
- if isinstance(f, str):
507
- f = WDLFile(file_path=f)
508
- elif isinstance(f, list):
509
- f = [WDLFile(file_path=sf) if isinstance(sf, str) else sf for sf in f]
510
-
511
- assert isinstance(f, (WDLFile, list)), f'size() excepts a "File" or "File?" argument! Not: {type(f)}'
512
-
513
- # validate the input. fileStore is only required if the input is not processed.
514
- f = process_infile(f, fileStore)
515
-
516
- divisor = bytes_in_unit(unit)
517
-
518
- if isinstance(f, list):
519
- total_size = sum(file.file_path.size for file in f)
520
- return total_size / divisor
521
-
522
- fileID = f.file_path
523
- return fileID.size / divisor
524
-
525
-
526
- def select_first(values):
527
- for var in values:
528
- if var:
529
- return var
530
- raise ValueError(f'No defined variables found for select_first array: {str(values)}')
531
-
532
-
533
- def combine_dicts(dict1, dict2):
534
- combineddict= {}
535
- for k, v in dict1.items():
536
- counter1 = 0
537
- while isinstance(v, list):
538
- counter1 += 1
539
- v = v[0]
540
- break
541
-
542
- for k, v in dict2.items():
543
- counter2 = 0
544
- while isinstance(v, list):
545
- counter2 += 1
546
- v = v[0]
547
- break
548
-
549
- for k in dict1:
550
- if counter1 > counter2:
551
- combineddict[k] = dict1[k]
552
- combineddict[k].append(dict2[k])
553
- elif counter1 < counter2:
554
- combineddict[k] = dict2[k]
555
- combineddict[k].append(dict1[k])
556
- else:
557
- combineddict[k] = [dict1[k], dict2[k]]
558
- return combineddict
559
-
560
-
561
- def basename(path, suffix=None):
562
- """https://software.broadinstitute.org/wdl/documentation/article?id=10554"""
563
- path = path.strip()
564
- if suffix:
565
- suffix = suffix.strip()
566
- if path.endswith(suffix):
567
- path = path[:-len(suffix)]
568
- return os.path.basename(path)
569
-
570
-
571
- def heredoc_wdl(template, dictionary={}, indent=''):
572
- template = textwrap.dedent(template).format(**dictionary)
573
- return template.replace('\n', '\n' + indent) + '\n'
574
-
575
-
576
- def floor(i: Union[int, float]) -> int:
577
- """
578
- Converts a Float value into an Int by rounding down to the next lower integer.
579
- """
580
- return math.floor(i)
581
-
582
-
583
- def ceil(i: Union[int, float]) -> int:
584
- """
585
- Converts a Float value into an Int by rounding up to the next higher integer.
586
- """
587
- return math.ceil(i)
588
-
589
-
590
- def read_lines(path: str) -> List[str]:
591
- """
592
- Given a file-like object (`String`, `File`) as a parameter, this will read each
593
- line as a string and return an `Array[String]` representation of the lines in
594
- the file.
595
-
596
- WDL syntax: Array[String] read_lines(String|File)
597
- """
598
- # file should already be imported locally via `process_and_read_file`
599
- with open(path) as f:
600
- return f.read().rstrip('\n').split('\n')
601
-
602
-
603
- def read_tsv(path: str, delimiter: str = '\t') -> List[List[str]]:
604
- """
605
- Take a tsv filepath and return an array; e.g. [[],[],[]].
606
-
607
- For example, a file containing:
608
-
609
- 1 2 3
610
- 4 5 6
611
- 7 8 9
612
-
613
- would return the array: [['1','2','3'], ['4','5','6'], ['7','8','9']]
614
-
615
- WDL syntax: Array[Array[String]] read_tsv(String|File)
616
- """
617
- tsv_array = []
618
- with open(path) as f:
619
- data_file = csv.reader(f, delimiter=delimiter)
620
- for line in data_file:
621
- tsv_array.append(line)
622
- return tsv_array
623
-
624
-
625
- def read_csv(path: str) -> List[List[str]]:
626
- """
627
- Take a csv filepath and return an array; e.g. [[],[],[]].
628
-
629
- For example, a file containing:
630
-
631
- 1,2,3
632
- 4,5,6
633
- 7,8,9
634
-
635
- would return the array: [['1','2','3'], ['4','5','6'], ['7','8','9']]
636
- """
637
- return read_tsv(path, delimiter=",")
638
-
639
-
640
- def read_json(path: str) -> Any:
641
- """
642
- The `read_json()` function takes one parameter, which is a file-like object
643
- (`String`, `File`) and returns a data type which matches the data
644
- structure in the JSON file. See
645
- https://github.com/openwdl/wdl/blob/main/versions/development/SPEC.md#mixed-read_jsonstringfile
646
-
647
- WDL syntax: mixed read_json(String|File)
648
- """
649
- with open(path) as f:
650
- return json.load(f)
651
-
652
-
653
- def read_map(path: str) -> Dict[str, str]:
654
- """
655
- Given a file-like object (`String`, `File`) as a parameter, this will read each
656
- line from a file and expect the line to have the format `col1\tcol2`. In other
657
- words, the file-like object must be a two-column TSV file.
658
-
659
- WDL syntax: Map[String, String] read_map(String|File)
660
- """
661
- d = dict()
662
- with open(path) as f:
663
- for line in f:
664
- line = line.rstrip()
665
- if not line:
666
- # remove extra lines
667
- continue
668
- key, value = line.split('\t', 1)
669
- d[key] = value.strip()
670
- return d
671
-
672
-
673
- def read_int(path: Union[str, WDLFile]) -> int:
674
- """
675
- The `read_int()` function takes a file path which is expected to contain 1
676
- line with 1 integer on it. This function returns that integer.
677
-
678
- WDL syntax: Int read_int(String|File)
679
- """
680
- if isinstance(path, WDLFile):
681
- path = path.file_path
682
-
683
- with open(path) as f:
684
- return int(f.read().strip())
685
-
686
-
687
- def read_string(path: Union[str, WDLFile]) -> str:
688
- """
689
- The `read_string()` function takes a file path which is expected to contain 1
690
- line with 1 string on it. This function returns that string.
691
-
692
- WDL syntax: String read_string(String|File)
693
- """
694
- if isinstance(path, WDLFile):
695
- path = path.file_path
696
-
697
- with open(path) as f:
698
- return str(f.read().strip())
699
-
700
-
701
- def read_float(path: Union[str, WDLFile]) -> float:
702
- """
703
- The `read_float()` function takes a file path which is expected to contain 1
704
- line with 1 floating point number on it. This function returns that float.
705
-
706
- WDL syntax: Float read_float(String|File)
707
- """
708
- if isinstance(path, WDLFile):
709
- path = path.file_path
710
-
711
- with open(path) as f:
712
- return float(f.read().strip())
713
-
714
-
715
- def read_boolean(path: Union[str, WDLFile]) -> bool:
716
- """
717
- The `read_boolean()` function takes a file path which is expected to contain 1
718
- line with 1 Boolean value (either "true" or "false" on it). This function
719
- returns that Boolean value.
720
-
721
- WDL syntax: Boolean read_boolean(String|File)
722
- """
723
- if isinstance(path, WDLFile):
724
- path = path.file_path
725
-
726
- with open(path) as f:
727
- return f.read().strip().lower() == 'true'
728
-
729
-
730
- def _get_temp_file_path(function_name: str, temp_dir: Optional[str] = None) -> str:
731
- """
732
- Get a unique path with basename in the format of "{function_name}_{UUID}.tmp".
733
- """
734
-
735
- if not temp_dir:
736
- temp_dir = os.getcwd()
737
-
738
- # Cromwell uses the MD5 checksum of the content as part of the file name. We use a UUID instead
739
- # for now, since we're writing line by line via a context manager.
740
- # md5sum = hashlib.md5(content).hexdigest()
741
- # name = f'{function_name}_{md5sum}.tmp'
742
-
743
- name = f'{function_name}_{uuid.uuid4()}.tmp'
744
- return os.path.join(temp_dir, 'execution', name)
745
-
746
-
747
- def write_lines(in_lines: List[str],
748
- temp_dir: Optional[str] = None,
749
- file_store: Optional[AbstractFileStore] = None) -> str:
750
- """
751
- Given something that's compatible with `Array[String]`, this writes each element
752
- to it's own line on a file. with newline `\n` characters as line separators.
753
-
754
- WDL syntax: File write_lines(Array[String])
755
- """
756
- assert isinstance(in_lines, list), f'write_lines() requires "{in_lines}" to be a list! Not: {type(in_lines)}'
757
-
758
- path = _get_temp_file_path('write_lines', temp_dir)
759
-
760
- with open(path, 'w') as file:
761
- for line in in_lines:
762
- file.write(f'{line}\n')
763
-
764
- if file_store:
765
- file_store.writeGlobalFile(path, cleanup=True)
766
-
767
- return path
768
-
769
-
770
- def write_tsv(in_tsv: List[List[str]],
771
- delimiter: str = '\t',
772
- temp_dir: Optional[str] = None,
773
- file_store: Optional[AbstractFileStore] = None) -> str:
774
- """
775
- Given something that's compatible with `Array[Array[String]]`, this writes a TSV
776
- file of the data structure.
777
-
778
- WDL syntax: File write_tsv(Array[Array[String]])
779
- """
780
- assert isinstance(in_tsv, list), f'write_tsv() requires "{in_tsv}" to be a list! Not: {type(in_tsv)}'
781
-
782
- path = _get_temp_file_path('write_tsv', temp_dir)
783
-
784
- with open(path, 'w') as file:
785
- tsv_writer = csv.writer(file, delimiter=delimiter)
786
- for row in in_tsv:
787
- tsv_writer.writerow(row)
788
-
789
- if file_store:
790
- file_store.writeGlobalFile(path, cleanup=True)
791
-
792
- return path
793
-
794
-
795
- def write_json(in_json: Any,
796
- indent: Union[None, int, str] = None,
797
- separators: Optional[Tuple[str, str]] = (',', ':'),
798
- temp_dir: Optional[str] = None,
799
- file_store: Optional[AbstractFileStore] = None) -> str:
800
- """
801
- Given something with any type, this writes the JSON equivalent to a file. See
802
- the table in the definition of
803
- https://github.com/openwdl/wdl/blob/main/versions/development/SPEC.md#mixed-read_jsonstringfile
804
-
805
- WDL syntax: File write_json(mixed)
806
- """
807
-
808
- path = _get_temp_file_path('write_json', temp_dir)
809
-
810
- with open(path, 'w') as file:
811
- file.write(json.dumps(in_json, indent=indent, separators=separators, cls=WDLJSONEncoder))
812
-
813
- if file_store:
814
- file_store.writeGlobalFile(path, cleanup=True)
815
-
816
- return path
817
-
818
-
819
- def write_map(in_map: Dict[str, str],
820
- temp_dir: Optional[str] = None,
821
- file_store: Optional[AbstractFileStore] = None) -> str:
822
- """
823
- Given something that's compatible with `Map[String, String]`, this writes a TSV
824
- file of the data structure.
825
-
826
- WDL syntax: File write_map(Map[String, String])
827
- """
828
- assert isinstance(in_map, dict), f'write_map() requires "{in_map}" to be a dict! Not: {type(in_map)}'
829
-
830
- path = _get_temp_file_path('write_map', temp_dir)
831
-
832
- with open(path, 'w') as file:
833
- for key, val in in_map.items():
834
- file.write(f'{key}\t{val}\n')
835
-
836
- if file_store:
837
- file_store.writeGlobalFile(path, cleanup=True)
838
-
839
- return path
840
-
841
-
842
- def wdl_range(num: int) -> List[int]:
843
- """
844
- Given an integer argument, the range function creates an array of integers of
845
- length equal to the given argument.
846
-
847
- WDL syntax: Array[Int] range(Int)
848
- """
849
- if not (isinstance(num, int) and num >= 0):
850
- raise WDLRuntimeError(f'range() requires an integer greater than or equal to 0 (but got {num})')
851
-
852
- return list(range(num))
853
-
854
-
855
- def transpose(in_array: List[List[Any]]) -> List[List[Any]]:
856
- """
857
- Given a two dimensional array argument, the transpose function transposes the
858
- two dimensional array according to the standard matrix transpose rules.
859
-
860
- WDL syntax: Array[Array[X]] transpose(Array[Array[X]])
861
- """
862
- assert isinstance(in_array, list), f'transpose() requires "{in_array}" to be a list! Not: {type(in_array)}'
863
-
864
- for arr in in_array:
865
- assert isinstance(arr, list), f'transpose() requires all collections to be a list! Not: {type(arr)}'
866
- # zip() can handle this but Cromwell can not.
867
- assert len(arr) == len(in_array[0]), 'transpose() requires all collections have the same size!'
868
-
869
- return [list(i) for i in zip(*in_array)]
870
-
871
-
872
- def length(in_array: List[Any]) -> int:
873
- """
874
- Given an Array, the `length` function returns the number of elements in the Array
875
- as an Int.
876
- """
877
- if not isinstance(in_array, list):
878
- # Cromwell throws an exception for anything other than a WDL Array
879
- raise WDLRuntimeError(f'length() requires ${in_array} to be a list! Not: {type(in_array)}')
880
-
881
- return len(in_array)
882
-
883
-
884
- def wdl_zip(left: List[Any], right: List[Any]) -> List[WDLPair]:
885
- """
886
- Return the dot product of the two arrays. If the arrays have different lengths
887
- it is an error.
888
-
889
- WDL syntax: Array[Pair[X,Y]] zip(Array[X], Array[Y])
890
- """
891
- if not isinstance(left, list) or not isinstance(right, list):
892
- raise WDLRuntimeError(f'zip() requires both inputs to be lists! Not: {type(left)} and {type(right)}')
893
-
894
- if len(left) != len(right):
895
- raise WDLRuntimeError('zip() requires that input values have the same size!')
896
-
897
- return list(WDLPair(left=left_val, right=right_val) for left_val, right_val in zip(left, right))
898
-
899
-
900
- def cross(left: List[Any], right: List[Any]) -> List[WDLPair]:
901
- """
902
- Return the cross product of the two arrays. Array[Y][1] appears before
903
- Array[X][1] in the output.
904
-
905
- WDL syntax: Array[Pair[X,Y]] cross(Array[X], Array[Y])
906
- """
907
- if not isinstance(left, list) or not isinstance(right, list):
908
- raise WDLRuntimeError(f'cross() requires both inputs to be Array[]! Not: {type(left)} and {type(right)}')
909
-
910
- return list(WDLPair(left=left_val, right=right_val) for left_val in left for right_val in right)
911
-
912
-
913
- def as_pairs(in_map: dict) -> List[WDLPair]:
914
- """
915
- Given a Map, the `as_pairs` function returns an Array containing each element
916
- in the form of a Pair. The key will be the left element of the Pair and the
917
- value the right element. The order of the the Pairs in the resulting Array
918
- is the same as the order of the key/value pairs in the Map.
919
-
920
- WDL syntax: Array[Pair[X,Y]] as_pairs(Map[X,Y])
921
- """
922
- if not isinstance(in_map, dict):
923
- raise WDLRuntimeError(f'as_pairs() requires "{in_map}" to be Map[]! Not: {type(in_map)}')
924
-
925
- return list(WDLPair(left=k, right=v) for k, v in in_map.items())
926
-
927
-
928
- def as_map(in_array: List[WDLPair]) -> dict:
929
- """
930
- Given an Array consisting of Pairs, the `as_map` function returns a Map in
931
- which the left elements of the Pairs are the keys and the right elements the
932
- values.
933
-
934
- WDL syntax: Map[X,Y] as_map(Array[Pair[X,Y]])
935
- """
936
- if not isinstance(in_array, list):
937
- raise WDLRuntimeError(f'as_map() requires "{in_array}" to be a list! Not: {type(in_array)}')
938
-
939
- map = {}
940
-
941
- for pair in in_array:
942
- if map.get(pair.left):
943
- raise WDLRuntimeError('Cannot evaluate "as_map()" with duplicated keys.')
944
-
945
- map[pair.left] = pair.right
946
-
947
- return map
948
-
949
-
950
- def keys(in_map: dict) -> list:
951
- """
952
- Given a Map, the `keys` function returns an Array consisting of the keys in
953
- the Map. The order of the keys in the resulting Array is the same as the
954
- order of the Pairs in the Map.
955
-
956
- WDL syntax: Array[X] keys(Map[X,Y])
957
- """
958
-
959
- return list(in_map.keys())
960
-
961
-
962
- def collect_by_key(in_array: List[WDLPair]) -> dict:
963
- """
964
- Given an Array consisting of Pairs, the `collect_by_key` function returns a Map
965
- in which the left elements of the Pairs are the keys and the right elements the
966
- values.
967
-
968
- WDL syntax: Map[X,Array[Y]] collect_by_key(Array[Pair[X,Y]])
969
- """
970
- if not isinstance(in_array, list):
971
- raise WDLRuntimeError(f'as_map() requires "{in_array}" to be a list! Not: {type(in_array)}')
972
-
973
- map = {}
974
-
975
- for pair in in_array:
976
- map.setdefault(pair.left, []).append(pair.right)
977
-
978
- return map
979
-
980
-
981
- def flatten(in_array: List[list]) -> list:
982
- """
983
- Given an array of arrays, the `flatten` function concatenates all the member
984
- arrays in the order to appearance to give the result. It does not deduplicate
985
- the elements.
986
-
987
- WDL syntax: Array[X] flatten(Array[Array[X]])
988
- """
989
- assert isinstance(in_array, list), f'flatten() requires "{in_array}" to be a list! Not: {type(in_array)}'
990
-
991
- arr = []
992
-
993
- for element in in_array:
994
- assert isinstance(element, list), f'flatten() requires all collections to be a list! Not: {type(element)}'
995
- arr.extend(element)
996
-
997
- return arr