toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/wdl/wdl_functions.py
DELETED
|
@@ -1,997 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2015-2021 Regents of the University of California
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
import csv
|
|
15
|
-
import json
|
|
16
|
-
import logging
|
|
17
|
-
import math
|
|
18
|
-
import os
|
|
19
|
-
import re
|
|
20
|
-
import subprocess
|
|
21
|
-
import textwrap
|
|
22
|
-
import uuid
|
|
23
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
24
|
-
|
|
25
|
-
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
26
|
-
from toil.lib.conversions import bytes_in_unit
|
|
27
|
-
from toil.lib.resources import glob # type: ignore
|
|
28
|
-
from toil.wdl.wdl_types import WDLFile, WDLPair
|
|
29
|
-
|
|
30
|
-
logger = logging.getLogger(__name__)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class WDLRuntimeError(Exception):
|
|
34
|
-
""" WDL-related run-time error."""
|
|
35
|
-
|
|
36
|
-
def __init__(self, message):
|
|
37
|
-
super().__init__(message)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class WDLJSONEncoder(json.JSONEncoder):
|
|
41
|
-
"""
|
|
42
|
-
Extended JSONEncoder to support WDL-specific JSON encoding.
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
def default(self, obj):
|
|
46
|
-
if isinstance(obj, WDLPair):
|
|
47
|
-
return obj.to_dict()
|
|
48
|
-
return json.JSONEncoder.default(self, obj)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def generate_docker_bashscript_file(temp_dir, docker_dir, globs, cmd, job_name):
|
|
52
|
-
'''
|
|
53
|
-
Creates a bashscript to inject into a docker container for the job.
|
|
54
|
-
|
|
55
|
-
This script wraps the job command(s) given in a bash script, hard links the
|
|
56
|
-
outputs and returns an "rc" file containing the exit code. All of this is
|
|
57
|
-
done in an effort to parallel the Broad's cromwell engine, which is the
|
|
58
|
-
native WDL runner. As they've chosen to write and then run a bashscript for
|
|
59
|
-
every command, so shall we.
|
|
60
|
-
|
|
61
|
-
:param temp_dir: The current directory outside of docker to deposit the
|
|
62
|
-
bashscript into, which will be the bind mount that docker
|
|
63
|
-
loads files from into its own containerized filesystem.
|
|
64
|
-
This is usually the tempDir created by this individual job
|
|
65
|
-
using 'tempDir = job.fileStore.getLocalTempDir()'.
|
|
66
|
-
:param docker_dir: The working directory inside of the docker container
|
|
67
|
-
which is bind mounted to 'temp_dir'. By default this is
|
|
68
|
-
'data'.
|
|
69
|
-
:param globs: A list of expected output files to retrieve as glob patterns
|
|
70
|
-
that will be returned as hard links to the current working
|
|
71
|
-
directory.
|
|
72
|
-
:param cmd: A bash command to be written into the bash script and run.
|
|
73
|
-
:param job_name: The job's name, only used to write in a file name
|
|
74
|
-
identifying the script as written for that job.
|
|
75
|
-
Will be used to call the script later.
|
|
76
|
-
:return: Nothing, but it writes and deposits a bash script in temp_dir
|
|
77
|
-
intended to be run inside of a docker container for this job.
|
|
78
|
-
'''
|
|
79
|
-
wdl_copyright = heredoc_wdl(''' \n
|
|
80
|
-
# Borrowed/rewritten from the Broad's Cromwell implementation. As
|
|
81
|
-
# that is under a BSD-ish license, I include here the license off
|
|
82
|
-
# of their GitHub repo. Thank you Broadies!
|
|
83
|
-
|
|
84
|
-
# Copyright (c) 2015, Broad Institute, Inc.
|
|
85
|
-
# All rights reserved.
|
|
86
|
-
|
|
87
|
-
# Redistribution and use in source and binary forms, with or without
|
|
88
|
-
# modification, are permitted provided that the following conditions are met:
|
|
89
|
-
|
|
90
|
-
# * Redistributions of source code must retain the above copyright notice, this
|
|
91
|
-
# list of conditions and the following disclaimer.
|
|
92
|
-
|
|
93
|
-
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
94
|
-
# this list of conditions and the following disclaimer in the documentation
|
|
95
|
-
# and/or other materials provided with the distribution.
|
|
96
|
-
|
|
97
|
-
# * Neither the name Broad Institute, Inc. nor the names of its
|
|
98
|
-
# contributors may be used to endorse or promote products derived from
|
|
99
|
-
# this software without specific prior written permission.
|
|
100
|
-
|
|
101
|
-
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
102
|
-
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
103
|
-
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
104
|
-
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
105
|
-
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
106
|
-
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
107
|
-
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
108
|
-
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
109
|
-
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
110
|
-
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
|
|
111
|
-
|
|
112
|
-
# make a temp directory w/identifier
|
|
113
|
-
''')
|
|
114
|
-
prefix_dict = {"docker_dir": docker_dir,
|
|
115
|
-
"cmd": cmd}
|
|
116
|
-
bashfile_prefix = heredoc_wdl('''
|
|
117
|
-
tmpDir=$(mktemp -d /{docker_dir}/execution/tmp.XXXXXX)
|
|
118
|
-
chmod 777 $tmpDir
|
|
119
|
-
# set destination for java to deposit all of its files
|
|
120
|
-
export _JAVA_OPTIONS=-Djava.io.tmpdir=$tmpDir
|
|
121
|
-
export TMPDIR=$tmpDir
|
|
122
|
-
|
|
123
|
-
(
|
|
124
|
-
cd /{docker_dir}/execution
|
|
125
|
-
{cmd}
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
# gather the input command return code
|
|
129
|
-
echo $? > "$tmpDir/rc.tmp"
|
|
130
|
-
|
|
131
|
-
''', prefix_dict)
|
|
132
|
-
|
|
133
|
-
bashfile_string = '#!/bin/bash' + wdl_copyright + bashfile_prefix
|
|
134
|
-
|
|
135
|
-
begin_globbing_string = heredoc_wdl('''
|
|
136
|
-
(
|
|
137
|
-
mkdir "$tmpDir/globs"
|
|
138
|
-
''')
|
|
139
|
-
|
|
140
|
-
bashfile_string = bashfile_string + begin_globbing_string
|
|
141
|
-
|
|
142
|
-
for glob_input in globs:
|
|
143
|
-
add_this_glob = \
|
|
144
|
-
'( ln -L ' + glob_input + \
|
|
145
|
-
' "$tmpDir/globs" 2> /dev/null ) || ( ln ' + glob_input + \
|
|
146
|
-
' "$tmpDir/globs" )\n'
|
|
147
|
-
bashfile_string = bashfile_string + add_this_glob
|
|
148
|
-
|
|
149
|
-
bashfile_suffix = heredoc_wdl('''
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
# flush RAM to disk
|
|
153
|
-
sync
|
|
154
|
-
|
|
155
|
-
mv "$tmpDir/rc.tmp" "$tmpDir/rc"
|
|
156
|
-
chmod -R 777 $tmpDir
|
|
157
|
-
''')
|
|
158
|
-
|
|
159
|
-
bashfile_string = bashfile_string + bashfile_suffix
|
|
160
|
-
|
|
161
|
-
with open(os.path.join(temp_dir, job_name + '_script.sh'), 'w') as bashfile:
|
|
162
|
-
bashfile.write(bashfile_string)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def process_single_infile(wdl_file: WDLFile, fileStore: AbstractFileStore) -> WDLFile:
|
|
166
|
-
f = wdl_file.file_path
|
|
167
|
-
logger.info(f'Importing {f} into the jobstore.')
|
|
168
|
-
if f.startswith('http://') or f.startswith('https://') or \
|
|
169
|
-
f.startswith('file://') or f.startswith('wasb://'):
|
|
170
|
-
filepath = fileStore.importFile(f)
|
|
171
|
-
preserveThisFilename = os.path.basename(f)
|
|
172
|
-
elif f.startswith('s3://'):
|
|
173
|
-
try:
|
|
174
|
-
filepath = fileStore.importFile(f)
|
|
175
|
-
preserveThisFilename = os.path.basename(f)
|
|
176
|
-
except:
|
|
177
|
-
from toil.lib.ec2nodes import EC2Regions
|
|
178
|
-
success = False
|
|
179
|
-
for region in EC2Regions:
|
|
180
|
-
try:
|
|
181
|
-
html_path = f'http://s3.{region}.amazonaws.com/' + f[5:]
|
|
182
|
-
filepath = fileStore.importFile(html_path)
|
|
183
|
-
preserveThisFilename = os.path.basename(f)
|
|
184
|
-
success = True
|
|
185
|
-
except:
|
|
186
|
-
pass
|
|
187
|
-
if not success:
|
|
188
|
-
raise RuntimeError('Unable to import: ' + f)
|
|
189
|
-
elif f.startswith('gs://'):
|
|
190
|
-
f = 'https://storage.googleapis.com/' + f[5:]
|
|
191
|
-
filepath = fileStore.importFile(f)
|
|
192
|
-
preserveThisFilename = os.path.basename(f)
|
|
193
|
-
else:
|
|
194
|
-
filepath = fileStore.importFile("file://" + os.path.abspath(f))
|
|
195
|
-
preserveThisFilename = os.path.basename(f)
|
|
196
|
-
return WDLFile(file_path=filepath, file_name=preserveThisFilename, imported=True)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def process_infile(f: Any, fileStore: AbstractFileStore):
|
|
200
|
-
"""
|
|
201
|
-
Takes any input and imports the WDLFile into the fileStore.
|
|
202
|
-
|
|
203
|
-
This returns the input importing all WDLFile instances to the fileStore. Toil
|
|
204
|
-
does not preserve a file's original name upon import and so the WDLFile also keeps
|
|
205
|
-
track of this.
|
|
206
|
-
|
|
207
|
-
:param f: A primitive, WDLFile, or a container. A file needs to be a WDLFile instance
|
|
208
|
-
to be imported.
|
|
209
|
-
:param fileStore: The fileStore object that is called to load files into the fileStore.
|
|
210
|
-
"""
|
|
211
|
-
if isinstance(f, WDLFile):
|
|
212
|
-
# check if this has already been imported into the fileStore
|
|
213
|
-
if f.imported:
|
|
214
|
-
return f
|
|
215
|
-
else:
|
|
216
|
-
return process_single_infile(f, fileStore)
|
|
217
|
-
elif isinstance(f, list):
|
|
218
|
-
# recursively call process_infile() to handle cases like Array[Map[String, File]]
|
|
219
|
-
return [process_infile(sf, fileStore) for sf in f]
|
|
220
|
-
elif isinstance(f, WDLPair):
|
|
221
|
-
f.left = process_infile(f.left, fileStore)
|
|
222
|
-
f.right = process_infile(f.right, fileStore)
|
|
223
|
-
return f
|
|
224
|
-
elif isinstance(f, dict):
|
|
225
|
-
return {process_infile(k, fileStore): process_infile(v, fileStore) for k, v in f.items()}
|
|
226
|
-
elif isinstance(f, (int, str, bool, float)):
|
|
227
|
-
return f
|
|
228
|
-
else:
|
|
229
|
-
raise WDLRuntimeError(f'Error processing file: {str(f)}')
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def sub(input_str: str, pattern: str, replace: str) -> str:
|
|
233
|
-
"""
|
|
234
|
-
Given 3 String parameters `input`, `pattern`, `replace`, this function will
|
|
235
|
-
replace any occurrence matching `pattern` in `input` by `replace`.
|
|
236
|
-
`pattern` is expected to be a regular expression. Details of regex evaluation
|
|
237
|
-
will depend on the execution engine running the WDL.
|
|
238
|
-
|
|
239
|
-
WDL syntax: String sub(String, String, String)
|
|
240
|
-
"""
|
|
241
|
-
|
|
242
|
-
if isinstance(input_str, WDLFile):
|
|
243
|
-
input_str = input_str.file_name
|
|
244
|
-
if isinstance(pattern, WDLFile):
|
|
245
|
-
pattern = pattern.file_name
|
|
246
|
-
if isinstance(replace, WDLFile):
|
|
247
|
-
replace = replace.file_name
|
|
248
|
-
|
|
249
|
-
return re.sub(pattern=str(pattern), repl=str(replace), string=str(input_str))
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def defined(i):
|
|
253
|
-
if i:
|
|
254
|
-
return True
|
|
255
|
-
return False
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
def process_single_outfile(wdl_file: WDLFile, fileStore, workDir, outDir) -> WDLFile:
|
|
259
|
-
f = wdl_file.file_path
|
|
260
|
-
if os.path.exists(f):
|
|
261
|
-
output_f_path = f
|
|
262
|
-
elif os.path.exists(os.path.abspath(f)):
|
|
263
|
-
output_f_path = os.path.abspath(f)
|
|
264
|
-
elif os.path.exists(os.path.join(workDir, 'execution', f)):
|
|
265
|
-
output_f_path = os.path.join(workDir, 'execution', f)
|
|
266
|
-
elif os.path.exists(os.path.join('execution', f)):
|
|
267
|
-
output_f_path = os.path.join('execution', f)
|
|
268
|
-
elif os.path.exists(os.path.join(workDir, f)):
|
|
269
|
-
output_f_path = os.path.join(workDir, f)
|
|
270
|
-
elif os.path.exists(os.path.join(outDir, f)):
|
|
271
|
-
output_f_path = os.path.join(outDir, f)
|
|
272
|
-
else:
|
|
273
|
-
tmp = subprocess.check_output(['ls', '-lha', workDir]).decode('utf-8')
|
|
274
|
-
exe = subprocess.check_output(['ls', '-lha', os.path.join(workDir, 'execution')]).decode('utf-8')
|
|
275
|
-
for std_file in ('stdout', 'stderr'):
|
|
276
|
-
std_file = os.path.join(workDir, 'execution', std_file)
|
|
277
|
-
if os.path.exists(std_file):
|
|
278
|
-
with open(std_file, 'rb') as f:
|
|
279
|
-
logger.info(f.read())
|
|
280
|
-
|
|
281
|
-
raise RuntimeError('OUTPUT FILE: {} was not found in {}!\n'
|
|
282
|
-
'{}\n\n'
|
|
283
|
-
'{}\n'.format(f, os.getcwd(), tmp, exe))
|
|
284
|
-
output_file = fileStore.writeGlobalFile(output_f_path)
|
|
285
|
-
preserveThisFilename = os.path.basename(output_f_path)
|
|
286
|
-
fileStore.export_file(output_file, "file://" + os.path.join(os.path.abspath(outDir), preserveThisFilename))
|
|
287
|
-
return WDLFile(file_path=output_file, file_name=preserveThisFilename, imported=True)
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def process_outfile(f, fileStore, workDir, outDir):
|
|
291
|
-
if isinstance(f, WDLFile):
|
|
292
|
-
return process_single_outfile(f, fileStore, workDir, outDir)
|
|
293
|
-
elif isinstance(f, list):
|
|
294
|
-
# recursively call process_outfile() to handle cases like Array[Map[String, File]]
|
|
295
|
-
return [process_outfile(sf, fileStore, workDir, outDir) for sf in f]
|
|
296
|
-
elif isinstance(f, WDLPair):
|
|
297
|
-
f.left = process_outfile(f.left, fileStore, workDir, outDir)
|
|
298
|
-
f.right = process_outfile(f.right, fileStore, workDir, outDir)
|
|
299
|
-
return f
|
|
300
|
-
elif isinstance(f, dict):
|
|
301
|
-
return {process_outfile(k, fileStore, workDir, outDir):
|
|
302
|
-
process_outfile(v, fileStore, workDir, outDir) for k, v in f.items()}
|
|
303
|
-
elif isinstance(f, (int, str, bool, float)):
|
|
304
|
-
return f
|
|
305
|
-
else:
|
|
306
|
-
raise WDLRuntimeError(f'Error processing file: {str(f)}')
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def abspath_single_file(f: WDLFile, cwd: str) -> WDLFile:
|
|
310
|
-
path = f.file_path
|
|
311
|
-
if path != os.path.abspath(path):
|
|
312
|
-
f.file_path = os.path.join(cwd, path)
|
|
313
|
-
return f
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def abspath_file(f: Any, cwd: str):
|
|
317
|
-
if not f:
|
|
318
|
-
# in the case of "optional" files (same treatment in 'process_and_read_file()')
|
|
319
|
-
# TODO: handle this at compile time, not here
|
|
320
|
-
return ''
|
|
321
|
-
if isinstance(f, WDLFile):
|
|
322
|
-
# check if this has already been imported into the fileStore
|
|
323
|
-
if f.imported:
|
|
324
|
-
return f
|
|
325
|
-
path = f.file_path
|
|
326
|
-
if path.startswith('s3://') or path.startswith('http://') or path.startswith('https://') or \
|
|
327
|
-
path.startswith('file://') or path.startswith('wasb://') or path.startswith('gs://'):
|
|
328
|
-
return f
|
|
329
|
-
return abspath_single_file(f, cwd)
|
|
330
|
-
elif isinstance(f, list):
|
|
331
|
-
# recursively call abspath_file() to handle cases like Array[Map[String, File]]
|
|
332
|
-
return [abspath_file(sf, cwd) for sf in f]
|
|
333
|
-
elif isinstance(f, WDLPair):
|
|
334
|
-
f.left = abspath_file(f.left, cwd)
|
|
335
|
-
f.right = abspath_file(f.right, cwd)
|
|
336
|
-
return f
|
|
337
|
-
elif isinstance(f, dict):
|
|
338
|
-
return {abspath_file(k, cwd): abspath_file(v, cwd) for k, v in f.items()}
|
|
339
|
-
elif isinstance(f, (int, str, bool, float)):
|
|
340
|
-
return f
|
|
341
|
-
else:
|
|
342
|
-
raise WDLRuntimeError(f'Error processing file: ({str(f)}) of type: ({str(type(f))}).')
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
def read_single_file(f: WDLFile, tempDir, fileStore, docker=False) -> str:
|
|
346
|
-
import os
|
|
347
|
-
try:
|
|
348
|
-
fpath = fileStore.readGlobalFile(f.file_path, userPath=os.path.join(tempDir, f.file_name))
|
|
349
|
-
except:
|
|
350
|
-
fpath = os.path.join(tempDir, f.file_name)
|
|
351
|
-
return fpath
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
def read_file(f: Any, tempDir: str, fileStore: AbstractFileStore, docker: bool = False):
|
|
355
|
-
if isinstance(f, WDLFile):
|
|
356
|
-
return read_single_file(f, tempDir, fileStore, docker=docker)
|
|
357
|
-
elif isinstance(f, list):
|
|
358
|
-
# recursively call read_file() to handle cases like Array[Map[String, File]]
|
|
359
|
-
return [read_file(sf, tempDir, fileStore, docker=docker) for sf in f]
|
|
360
|
-
elif isinstance(f, WDLPair):
|
|
361
|
-
f.left = read_file(f.left, tempDir, fileStore, docker=docker)
|
|
362
|
-
f.right = read_file(f.right, tempDir, fileStore, docker=docker)
|
|
363
|
-
return f
|
|
364
|
-
elif isinstance(f, dict):
|
|
365
|
-
return {read_file(k, tempDir, fileStore, docker=docker):
|
|
366
|
-
read_file(v, tempDir, fileStore, docker=docker) for k, v in f.items()}
|
|
367
|
-
elif isinstance(f, (int, str, bool, float)):
|
|
368
|
-
return f
|
|
369
|
-
else:
|
|
370
|
-
raise WDLRuntimeError(f'Error processing file: {str(f)}')
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
def process_and_read_file(f, tempDir, fileStore, docker=False):
|
|
374
|
-
if not f:
|
|
375
|
-
# in the case of "optional" files (same treatment in 'abspath_file()')
|
|
376
|
-
# TODO: handle this at compile time, not here and change to the empty string
|
|
377
|
-
return None
|
|
378
|
-
processed_file = process_infile(f, fileStore)
|
|
379
|
-
return read_file(processed_file, tempDir, fileStore, docker=docker)
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
def generate_stdout_file(output, tempDir, fileStore, stderr=False):
|
|
383
|
-
"""
|
|
384
|
-
Create a stdout (or stderr) file from a string or bytes object.
|
|
385
|
-
|
|
386
|
-
:param str|bytes output: A str or bytes object that holds the stdout/stderr text.
|
|
387
|
-
:param str tempDir: The directory to write the stdout file.
|
|
388
|
-
:param fileStore: A fileStore object.
|
|
389
|
-
:param bool stderr: If True, a stderr instead of a stdout file is generated.
|
|
390
|
-
:return: The file path to the generated file.
|
|
391
|
-
"""
|
|
392
|
-
if output is None:
|
|
393
|
-
# write an empty file if there's no stdout/stderr.
|
|
394
|
-
output = b''
|
|
395
|
-
elif isinstance(output, str):
|
|
396
|
-
output = bytes(output, encoding='utf-8')
|
|
397
|
-
|
|
398
|
-
# TODO: we need a way to differentiate the stdout/stderr files in the workflow after execution.
|
|
399
|
-
# Cromwell generates a folder for each task so the file is simply named stdout and lives in
|
|
400
|
-
# the task execution folder. This is not the case with Toil. Though, this would not be a
|
|
401
|
-
# problem with intermediate stdout files as each task has its own temp folder.
|
|
402
|
-
name = 'stderr' if stderr else 'stdout'
|
|
403
|
-
local_path = os.path.join(tempDir, 'execution', name)
|
|
404
|
-
|
|
405
|
-
# import to fileStore then read to local temp file
|
|
406
|
-
with fileStore.writeGlobalFileStream(cleanup=True, basename=name) as (stream, file_id):
|
|
407
|
-
stream.write(output)
|
|
408
|
-
|
|
409
|
-
assert file_id is not None
|
|
410
|
-
return fileStore.readGlobalFile(fileStoreID=file_id, userPath=local_path)
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
def parse_memory(memory):
|
|
414
|
-
"""
|
|
415
|
-
Parses a string representing memory and returns
|
|
416
|
-
an integer # of bytes.
|
|
417
|
-
|
|
418
|
-
:param memory:
|
|
419
|
-
:return:
|
|
420
|
-
"""
|
|
421
|
-
memory = str(memory)
|
|
422
|
-
if 'None' in memory:
|
|
423
|
-
return 2147483648 # toil's default
|
|
424
|
-
try:
|
|
425
|
-
import re
|
|
426
|
-
raw_mem_split = re.split('([a-zA-Z]+)', memory)
|
|
427
|
-
mem_split = []
|
|
428
|
-
|
|
429
|
-
for r in raw_mem_split:
|
|
430
|
-
if r:
|
|
431
|
-
mem_split.append(r.replace(' ', ''))
|
|
432
|
-
|
|
433
|
-
if len(mem_split) == 1:
|
|
434
|
-
return int(memory)
|
|
435
|
-
|
|
436
|
-
if len(mem_split) == 2:
|
|
437
|
-
num = mem_split[0]
|
|
438
|
-
unit = mem_split[1]
|
|
439
|
-
return int(float(num) * bytes_in_unit(unit))
|
|
440
|
-
else:
|
|
441
|
-
raise RuntimeError(f'Memory parsing failed: {memory}')
|
|
442
|
-
except:
|
|
443
|
-
return 2147483648 # toil's default
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
def parse_cores(cores):
|
|
447
|
-
cores = str(cores)
|
|
448
|
-
if 'None' in cores:
|
|
449
|
-
return 1 # toil's default
|
|
450
|
-
if cores:
|
|
451
|
-
return float(cores)
|
|
452
|
-
else:
|
|
453
|
-
return 1
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
def parse_disk(disk):
|
|
457
|
-
disk = str(disk)
|
|
458
|
-
if 'None' in disk:
|
|
459
|
-
return 2147483648 # toil's default
|
|
460
|
-
try:
|
|
461
|
-
total_disk = 0
|
|
462
|
-
disks = disk.split(',')
|
|
463
|
-
for d in disks:
|
|
464
|
-
d = d.strip().split(' ')
|
|
465
|
-
if len(d) > 1:
|
|
466
|
-
for part in d:
|
|
467
|
-
if is_number(part):
|
|
468
|
-
total_disk += parse_memory(f'{part} GB')
|
|
469
|
-
else:
|
|
470
|
-
return parse_memory(d[0]) if parse_memory(d[0]) > 2147483648 else 2147483648
|
|
471
|
-
return total_disk if total_disk > 2147483648 else 2147483648
|
|
472
|
-
except:
|
|
473
|
-
return 2147483648 # toil's default
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
def is_number(s):
|
|
477
|
-
try:
|
|
478
|
-
float(s)
|
|
479
|
-
return True
|
|
480
|
-
except ValueError:
|
|
481
|
-
return False
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
def size(f: Optional[Union[str, WDLFile, List[Union[str, WDLFile]]]] = None,
|
|
485
|
-
unit: Optional[str] = 'B',
|
|
486
|
-
fileStore: Optional[AbstractFileStore] = None) -> float:
|
|
487
|
-
"""
|
|
488
|
-
Given a `File` and a `String` (optional), returns the size of the file in Bytes
|
|
489
|
-
or in the unit specified by the second argument.
|
|
490
|
-
|
|
491
|
-
Supported units are KiloByte ("K", "KB"), MegaByte ("M", "MB"), GigaByte
|
|
492
|
-
("G", "GB"), TeraByte ("T", "TB") (powers of 1000) as well as their binary version
|
|
493
|
-
(https://en.wikipedia.org/wiki/Binary_prefix) "Ki" ("KiB"), "Mi" ("MiB"),
|
|
494
|
-
"Gi" ("GiB"), "Ti" ("TiB") (powers of 1024). Default unit is Bytes ("B").
|
|
495
|
-
|
|
496
|
-
WDL syntax: Float size(File, [String])
|
|
497
|
-
Varieties: Float size(File?, [String])
|
|
498
|
-
Float size(Array[File], [String])
|
|
499
|
-
Float size(Array[File?], [String])
|
|
500
|
-
"""
|
|
501
|
-
|
|
502
|
-
if f is None:
|
|
503
|
-
return 0
|
|
504
|
-
|
|
505
|
-
# it is possible that size() is called directly (e.g.: size('file')) and so it is not treated as a file.
|
|
506
|
-
if isinstance(f, str):
|
|
507
|
-
f = WDLFile(file_path=f)
|
|
508
|
-
elif isinstance(f, list):
|
|
509
|
-
f = [WDLFile(file_path=sf) if isinstance(sf, str) else sf for sf in f]
|
|
510
|
-
|
|
511
|
-
assert isinstance(f, (WDLFile, list)), f'size() excepts a "File" or "File?" argument! Not: {type(f)}'
|
|
512
|
-
|
|
513
|
-
# validate the input. fileStore is only required if the input is not processed.
|
|
514
|
-
f = process_infile(f, fileStore)
|
|
515
|
-
|
|
516
|
-
divisor = bytes_in_unit(unit)
|
|
517
|
-
|
|
518
|
-
if isinstance(f, list):
|
|
519
|
-
total_size = sum(file.file_path.size for file in f)
|
|
520
|
-
return total_size / divisor
|
|
521
|
-
|
|
522
|
-
fileID = f.file_path
|
|
523
|
-
return fileID.size / divisor
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
def select_first(values):
|
|
527
|
-
for var in values:
|
|
528
|
-
if var:
|
|
529
|
-
return var
|
|
530
|
-
raise ValueError(f'No defined variables found for select_first array: {str(values)}')
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
def combine_dicts(dict1, dict2):
|
|
534
|
-
combineddict= {}
|
|
535
|
-
for k, v in dict1.items():
|
|
536
|
-
counter1 = 0
|
|
537
|
-
while isinstance(v, list):
|
|
538
|
-
counter1 += 1
|
|
539
|
-
v = v[0]
|
|
540
|
-
break
|
|
541
|
-
|
|
542
|
-
for k, v in dict2.items():
|
|
543
|
-
counter2 = 0
|
|
544
|
-
while isinstance(v, list):
|
|
545
|
-
counter2 += 1
|
|
546
|
-
v = v[0]
|
|
547
|
-
break
|
|
548
|
-
|
|
549
|
-
for k in dict1:
|
|
550
|
-
if counter1 > counter2:
|
|
551
|
-
combineddict[k] = dict1[k]
|
|
552
|
-
combineddict[k].append(dict2[k])
|
|
553
|
-
elif counter1 < counter2:
|
|
554
|
-
combineddict[k] = dict2[k]
|
|
555
|
-
combineddict[k].append(dict1[k])
|
|
556
|
-
else:
|
|
557
|
-
combineddict[k] = [dict1[k], dict2[k]]
|
|
558
|
-
return combineddict
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
def basename(path, suffix=None):
|
|
562
|
-
"""https://software.broadinstitute.org/wdl/documentation/article?id=10554"""
|
|
563
|
-
path = path.strip()
|
|
564
|
-
if suffix:
|
|
565
|
-
suffix = suffix.strip()
|
|
566
|
-
if path.endswith(suffix):
|
|
567
|
-
path = path[:-len(suffix)]
|
|
568
|
-
return os.path.basename(path)
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
def heredoc_wdl(template, dictionary={}, indent=''):
|
|
572
|
-
template = textwrap.dedent(template).format(**dictionary)
|
|
573
|
-
return template.replace('\n', '\n' + indent) + '\n'
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
def floor(i: Union[int, float]) -> int:
|
|
577
|
-
"""
|
|
578
|
-
Converts a Float value into an Int by rounding down to the next lower integer.
|
|
579
|
-
"""
|
|
580
|
-
return math.floor(i)
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
def ceil(i: Union[int, float]) -> int:
|
|
584
|
-
"""
|
|
585
|
-
Converts a Float value into an Int by rounding up to the next higher integer.
|
|
586
|
-
"""
|
|
587
|
-
return math.ceil(i)
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
def read_lines(path: str) -> List[str]:
|
|
591
|
-
"""
|
|
592
|
-
Given a file-like object (`String`, `File`) as a parameter, this will read each
|
|
593
|
-
line as a string and return an `Array[String]` representation of the lines in
|
|
594
|
-
the file.
|
|
595
|
-
|
|
596
|
-
WDL syntax: Array[String] read_lines(String|File)
|
|
597
|
-
"""
|
|
598
|
-
# file should already be imported locally via `process_and_read_file`
|
|
599
|
-
with open(path) as f:
|
|
600
|
-
return f.read().rstrip('\n').split('\n')
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
def read_tsv(path: str, delimiter: str = '\t') -> List[List[str]]:
|
|
604
|
-
"""
|
|
605
|
-
Take a tsv filepath and return an array; e.g. [[],[],[]].
|
|
606
|
-
|
|
607
|
-
For example, a file containing:
|
|
608
|
-
|
|
609
|
-
1 2 3
|
|
610
|
-
4 5 6
|
|
611
|
-
7 8 9
|
|
612
|
-
|
|
613
|
-
would return the array: [['1','2','3'], ['4','5','6'], ['7','8','9']]
|
|
614
|
-
|
|
615
|
-
WDL syntax: Array[Array[String]] read_tsv(String|File)
|
|
616
|
-
"""
|
|
617
|
-
tsv_array = []
|
|
618
|
-
with open(path) as f:
|
|
619
|
-
data_file = csv.reader(f, delimiter=delimiter)
|
|
620
|
-
for line in data_file:
|
|
621
|
-
tsv_array.append(line)
|
|
622
|
-
return tsv_array
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
def read_csv(path: str) -> List[List[str]]:
|
|
626
|
-
"""
|
|
627
|
-
Take a csv filepath and return an array; e.g. [[],[],[]].
|
|
628
|
-
|
|
629
|
-
For example, a file containing:
|
|
630
|
-
|
|
631
|
-
1,2,3
|
|
632
|
-
4,5,6
|
|
633
|
-
7,8,9
|
|
634
|
-
|
|
635
|
-
would return the array: [['1','2','3'], ['4','5','6'], ['7','8','9']]
|
|
636
|
-
"""
|
|
637
|
-
return read_tsv(path, delimiter=",")
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
def read_json(path: str) -> Any:
|
|
641
|
-
"""
|
|
642
|
-
The `read_json()` function takes one parameter, which is a file-like object
|
|
643
|
-
(`String`, `File`) and returns a data type which matches the data
|
|
644
|
-
structure in the JSON file. See
|
|
645
|
-
https://github.com/openwdl/wdl/blob/main/versions/development/SPEC.md#mixed-read_jsonstringfile
|
|
646
|
-
|
|
647
|
-
WDL syntax: mixed read_json(String|File)
|
|
648
|
-
"""
|
|
649
|
-
with open(path) as f:
|
|
650
|
-
return json.load(f)
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
def read_map(path: str) -> Dict[str, str]:
|
|
654
|
-
"""
|
|
655
|
-
Given a file-like object (`String`, `File`) as a parameter, this will read each
|
|
656
|
-
line from a file and expect the line to have the format `col1\tcol2`. In other
|
|
657
|
-
words, the file-like object must be a two-column TSV file.
|
|
658
|
-
|
|
659
|
-
WDL syntax: Map[String, String] read_map(String|File)
|
|
660
|
-
"""
|
|
661
|
-
d = dict()
|
|
662
|
-
with open(path) as f:
|
|
663
|
-
for line in f:
|
|
664
|
-
line = line.rstrip()
|
|
665
|
-
if not line:
|
|
666
|
-
# remove extra lines
|
|
667
|
-
continue
|
|
668
|
-
key, value = line.split('\t', 1)
|
|
669
|
-
d[key] = value.strip()
|
|
670
|
-
return d
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
def read_int(path: Union[str, WDLFile]) -> int:
|
|
674
|
-
"""
|
|
675
|
-
The `read_int()` function takes a file path which is expected to contain 1
|
|
676
|
-
line with 1 integer on it. This function returns that integer.
|
|
677
|
-
|
|
678
|
-
WDL syntax: Int read_int(String|File)
|
|
679
|
-
"""
|
|
680
|
-
if isinstance(path, WDLFile):
|
|
681
|
-
path = path.file_path
|
|
682
|
-
|
|
683
|
-
with open(path) as f:
|
|
684
|
-
return int(f.read().strip())
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
def read_string(path: Union[str, WDLFile]) -> str:
|
|
688
|
-
"""
|
|
689
|
-
The `read_string()` function takes a file path which is expected to contain 1
|
|
690
|
-
line with 1 string on it. This function returns that string.
|
|
691
|
-
|
|
692
|
-
WDL syntax: String read_string(String|File)
|
|
693
|
-
"""
|
|
694
|
-
if isinstance(path, WDLFile):
|
|
695
|
-
path = path.file_path
|
|
696
|
-
|
|
697
|
-
with open(path) as f:
|
|
698
|
-
return str(f.read().strip())
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
def read_float(path: Union[str, WDLFile]) -> float:
|
|
702
|
-
"""
|
|
703
|
-
The `read_float()` function takes a file path which is expected to contain 1
|
|
704
|
-
line with 1 floating point number on it. This function returns that float.
|
|
705
|
-
|
|
706
|
-
WDL syntax: Float read_float(String|File)
|
|
707
|
-
"""
|
|
708
|
-
if isinstance(path, WDLFile):
|
|
709
|
-
path = path.file_path
|
|
710
|
-
|
|
711
|
-
with open(path) as f:
|
|
712
|
-
return float(f.read().strip())
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
def read_boolean(path: Union[str, WDLFile]) -> bool:
|
|
716
|
-
"""
|
|
717
|
-
The `read_boolean()` function takes a file path which is expected to contain 1
|
|
718
|
-
line with 1 Boolean value (either "true" or "false" on it). This function
|
|
719
|
-
returns that Boolean value.
|
|
720
|
-
|
|
721
|
-
WDL syntax: Boolean read_boolean(String|File)
|
|
722
|
-
"""
|
|
723
|
-
if isinstance(path, WDLFile):
|
|
724
|
-
path = path.file_path
|
|
725
|
-
|
|
726
|
-
with open(path) as f:
|
|
727
|
-
return f.read().strip().lower() == 'true'
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
def _get_temp_file_path(function_name: str, temp_dir: Optional[str] = None) -> str:
|
|
731
|
-
"""
|
|
732
|
-
Get a unique path with basename in the format of "{function_name}_{UUID}.tmp".
|
|
733
|
-
"""
|
|
734
|
-
|
|
735
|
-
if not temp_dir:
|
|
736
|
-
temp_dir = os.getcwd()
|
|
737
|
-
|
|
738
|
-
# Cromwell uses the MD5 checksum of the content as part of the file name. We use a UUID instead
|
|
739
|
-
# for now, since we're writing line by line via a context manager.
|
|
740
|
-
# md5sum = hashlib.md5(content).hexdigest()
|
|
741
|
-
# name = f'{function_name}_{md5sum}.tmp'
|
|
742
|
-
|
|
743
|
-
name = f'{function_name}_{uuid.uuid4()}.tmp'
|
|
744
|
-
return os.path.join(temp_dir, 'execution', name)
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
def write_lines(in_lines: List[str],
|
|
748
|
-
temp_dir: Optional[str] = None,
|
|
749
|
-
file_store: Optional[AbstractFileStore] = None) -> str:
|
|
750
|
-
"""
|
|
751
|
-
Given something that's compatible with `Array[String]`, this writes each element
|
|
752
|
-
to it's own line on a file. with newline `\n` characters as line separators.
|
|
753
|
-
|
|
754
|
-
WDL syntax: File write_lines(Array[String])
|
|
755
|
-
"""
|
|
756
|
-
assert isinstance(in_lines, list), f'write_lines() requires "{in_lines}" to be a list! Not: {type(in_lines)}'
|
|
757
|
-
|
|
758
|
-
path = _get_temp_file_path('write_lines', temp_dir)
|
|
759
|
-
|
|
760
|
-
with open(path, 'w') as file:
|
|
761
|
-
for line in in_lines:
|
|
762
|
-
file.write(f'{line}\n')
|
|
763
|
-
|
|
764
|
-
if file_store:
|
|
765
|
-
file_store.writeGlobalFile(path, cleanup=True)
|
|
766
|
-
|
|
767
|
-
return path
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
def write_tsv(in_tsv: List[List[str]],
|
|
771
|
-
delimiter: str = '\t',
|
|
772
|
-
temp_dir: Optional[str] = None,
|
|
773
|
-
file_store: Optional[AbstractFileStore] = None) -> str:
|
|
774
|
-
"""
|
|
775
|
-
Given something that's compatible with `Array[Array[String]]`, this writes a TSV
|
|
776
|
-
file of the data structure.
|
|
777
|
-
|
|
778
|
-
WDL syntax: File write_tsv(Array[Array[String]])
|
|
779
|
-
"""
|
|
780
|
-
assert isinstance(in_tsv, list), f'write_tsv() requires "{in_tsv}" to be a list! Not: {type(in_tsv)}'
|
|
781
|
-
|
|
782
|
-
path = _get_temp_file_path('write_tsv', temp_dir)
|
|
783
|
-
|
|
784
|
-
with open(path, 'w') as file:
|
|
785
|
-
tsv_writer = csv.writer(file, delimiter=delimiter)
|
|
786
|
-
for row in in_tsv:
|
|
787
|
-
tsv_writer.writerow(row)
|
|
788
|
-
|
|
789
|
-
if file_store:
|
|
790
|
-
file_store.writeGlobalFile(path, cleanup=True)
|
|
791
|
-
|
|
792
|
-
return path
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
def write_json(in_json: Any,
|
|
796
|
-
indent: Union[None, int, str] = None,
|
|
797
|
-
separators: Optional[Tuple[str, str]] = (',', ':'),
|
|
798
|
-
temp_dir: Optional[str] = None,
|
|
799
|
-
file_store: Optional[AbstractFileStore] = None) -> str:
|
|
800
|
-
"""
|
|
801
|
-
Given something with any type, this writes the JSON equivalent to a file. See
|
|
802
|
-
the table in the definition of
|
|
803
|
-
https://github.com/openwdl/wdl/blob/main/versions/development/SPEC.md#mixed-read_jsonstringfile
|
|
804
|
-
|
|
805
|
-
WDL syntax: File write_json(mixed)
|
|
806
|
-
"""
|
|
807
|
-
|
|
808
|
-
path = _get_temp_file_path('write_json', temp_dir)
|
|
809
|
-
|
|
810
|
-
with open(path, 'w') as file:
|
|
811
|
-
file.write(json.dumps(in_json, indent=indent, separators=separators, cls=WDLJSONEncoder))
|
|
812
|
-
|
|
813
|
-
if file_store:
|
|
814
|
-
file_store.writeGlobalFile(path, cleanup=True)
|
|
815
|
-
|
|
816
|
-
return path
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
def write_map(in_map: Dict[str, str],
|
|
820
|
-
temp_dir: Optional[str] = None,
|
|
821
|
-
file_store: Optional[AbstractFileStore] = None) -> str:
|
|
822
|
-
"""
|
|
823
|
-
Given something that's compatible with `Map[String, String]`, this writes a TSV
|
|
824
|
-
file of the data structure.
|
|
825
|
-
|
|
826
|
-
WDL syntax: File write_map(Map[String, String])
|
|
827
|
-
"""
|
|
828
|
-
assert isinstance(in_map, dict), f'write_map() requires "{in_map}" to be a dict! Not: {type(in_map)}'
|
|
829
|
-
|
|
830
|
-
path = _get_temp_file_path('write_map', temp_dir)
|
|
831
|
-
|
|
832
|
-
with open(path, 'w') as file:
|
|
833
|
-
for key, val in in_map.items():
|
|
834
|
-
file.write(f'{key}\t{val}\n')
|
|
835
|
-
|
|
836
|
-
if file_store:
|
|
837
|
-
file_store.writeGlobalFile(path, cleanup=True)
|
|
838
|
-
|
|
839
|
-
return path
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
def wdl_range(num: int) -> List[int]:
|
|
843
|
-
"""
|
|
844
|
-
Given an integer argument, the range function creates an array of integers of
|
|
845
|
-
length equal to the given argument.
|
|
846
|
-
|
|
847
|
-
WDL syntax: Array[Int] range(Int)
|
|
848
|
-
"""
|
|
849
|
-
if not (isinstance(num, int) and num >= 0):
|
|
850
|
-
raise WDLRuntimeError(f'range() requires an integer greater than or equal to 0 (but got {num})')
|
|
851
|
-
|
|
852
|
-
return list(range(num))
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
def transpose(in_array: List[List[Any]]) -> List[List[Any]]:
|
|
856
|
-
"""
|
|
857
|
-
Given a two dimensional array argument, the transpose function transposes the
|
|
858
|
-
two dimensional array according to the standard matrix transpose rules.
|
|
859
|
-
|
|
860
|
-
WDL syntax: Array[Array[X]] transpose(Array[Array[X]])
|
|
861
|
-
"""
|
|
862
|
-
assert isinstance(in_array, list), f'transpose() requires "{in_array}" to be a list! Not: {type(in_array)}'
|
|
863
|
-
|
|
864
|
-
for arr in in_array:
|
|
865
|
-
assert isinstance(arr, list), f'transpose() requires all collections to be a list! Not: {type(arr)}'
|
|
866
|
-
# zip() can handle this but Cromwell can not.
|
|
867
|
-
assert len(arr) == len(in_array[0]), 'transpose() requires all collections have the same size!'
|
|
868
|
-
|
|
869
|
-
return [list(i) for i in zip(*in_array)]
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
def length(in_array: List[Any]) -> int:
|
|
873
|
-
"""
|
|
874
|
-
Given an Array, the `length` function returns the number of elements in the Array
|
|
875
|
-
as an Int.
|
|
876
|
-
"""
|
|
877
|
-
if not isinstance(in_array, list):
|
|
878
|
-
# Cromwell throws an exception for anything other than a WDL Array
|
|
879
|
-
raise WDLRuntimeError(f'length() requires ${in_array} to be a list! Not: {type(in_array)}')
|
|
880
|
-
|
|
881
|
-
return len(in_array)
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
def wdl_zip(left: List[Any], right: List[Any]) -> List[WDLPair]:
|
|
885
|
-
"""
|
|
886
|
-
Return the dot product of the two arrays. If the arrays have different lengths
|
|
887
|
-
it is an error.
|
|
888
|
-
|
|
889
|
-
WDL syntax: Array[Pair[X,Y]] zip(Array[X], Array[Y])
|
|
890
|
-
"""
|
|
891
|
-
if not isinstance(left, list) or not isinstance(right, list):
|
|
892
|
-
raise WDLRuntimeError(f'zip() requires both inputs to be lists! Not: {type(left)} and {type(right)}')
|
|
893
|
-
|
|
894
|
-
if len(left) != len(right):
|
|
895
|
-
raise WDLRuntimeError('zip() requires that input values have the same size!')
|
|
896
|
-
|
|
897
|
-
return list(WDLPair(left=left_val, right=right_val) for left_val, right_val in zip(left, right))
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
def cross(left: List[Any], right: List[Any]) -> List[WDLPair]:
|
|
901
|
-
"""
|
|
902
|
-
Return the cross product of the two arrays. Array[Y][1] appears before
|
|
903
|
-
Array[X][1] in the output.
|
|
904
|
-
|
|
905
|
-
WDL syntax: Array[Pair[X,Y]] cross(Array[X], Array[Y])
|
|
906
|
-
"""
|
|
907
|
-
if not isinstance(left, list) or not isinstance(right, list):
|
|
908
|
-
raise WDLRuntimeError(f'cross() requires both inputs to be Array[]! Not: {type(left)} and {type(right)}')
|
|
909
|
-
|
|
910
|
-
return list(WDLPair(left=left_val, right=right_val) for left_val in left for right_val in right)
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
def as_pairs(in_map: dict) -> List[WDLPair]:
|
|
914
|
-
"""
|
|
915
|
-
Given a Map, the `as_pairs` function returns an Array containing each element
|
|
916
|
-
in the form of a Pair. The key will be the left element of the Pair and the
|
|
917
|
-
value the right element. The order of the the Pairs in the resulting Array
|
|
918
|
-
is the same as the order of the key/value pairs in the Map.
|
|
919
|
-
|
|
920
|
-
WDL syntax: Array[Pair[X,Y]] as_pairs(Map[X,Y])
|
|
921
|
-
"""
|
|
922
|
-
if not isinstance(in_map, dict):
|
|
923
|
-
raise WDLRuntimeError(f'as_pairs() requires "{in_map}" to be Map[]! Not: {type(in_map)}')
|
|
924
|
-
|
|
925
|
-
return list(WDLPair(left=k, right=v) for k, v in in_map.items())
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
def as_map(in_array: List[WDLPair]) -> dict:
|
|
929
|
-
"""
|
|
930
|
-
Given an Array consisting of Pairs, the `as_map` function returns a Map in
|
|
931
|
-
which the left elements of the Pairs are the keys and the right elements the
|
|
932
|
-
values.
|
|
933
|
-
|
|
934
|
-
WDL syntax: Map[X,Y] as_map(Array[Pair[X,Y]])
|
|
935
|
-
"""
|
|
936
|
-
if not isinstance(in_array, list):
|
|
937
|
-
raise WDLRuntimeError(f'as_map() requires "{in_array}" to be a list! Not: {type(in_array)}')
|
|
938
|
-
|
|
939
|
-
map = {}
|
|
940
|
-
|
|
941
|
-
for pair in in_array:
|
|
942
|
-
if map.get(pair.left):
|
|
943
|
-
raise WDLRuntimeError('Cannot evaluate "as_map()" with duplicated keys.')
|
|
944
|
-
|
|
945
|
-
map[pair.left] = pair.right
|
|
946
|
-
|
|
947
|
-
return map
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
def keys(in_map: dict) -> list:
|
|
951
|
-
"""
|
|
952
|
-
Given a Map, the `keys` function returns an Array consisting of the keys in
|
|
953
|
-
the Map. The order of the keys in the resulting Array is the same as the
|
|
954
|
-
order of the Pairs in the Map.
|
|
955
|
-
|
|
956
|
-
WDL syntax: Array[X] keys(Map[X,Y])
|
|
957
|
-
"""
|
|
958
|
-
|
|
959
|
-
return list(in_map.keys())
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
def collect_by_key(in_array: List[WDLPair]) -> dict:
|
|
963
|
-
"""
|
|
964
|
-
Given an Array consisting of Pairs, the `collect_by_key` function returns a Map
|
|
965
|
-
in which the left elements of the Pairs are the keys and the right elements the
|
|
966
|
-
values.
|
|
967
|
-
|
|
968
|
-
WDL syntax: Map[X,Array[Y]] collect_by_key(Array[Pair[X,Y]])
|
|
969
|
-
"""
|
|
970
|
-
if not isinstance(in_array, list):
|
|
971
|
-
raise WDLRuntimeError(f'as_map() requires "{in_array}" to be a list! Not: {type(in_array)}')
|
|
972
|
-
|
|
973
|
-
map = {}
|
|
974
|
-
|
|
975
|
-
for pair in in_array:
|
|
976
|
-
map.setdefault(pair.left, []).append(pair.right)
|
|
977
|
-
|
|
978
|
-
return map
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
def flatten(in_array: List[list]) -> list:
|
|
982
|
-
"""
|
|
983
|
-
Given an array of arrays, the `flatten` function concatenates all the member
|
|
984
|
-
arrays in the order to appearance to give the result. It does not deduplicate
|
|
985
|
-
the elements.
|
|
986
|
-
|
|
987
|
-
WDL syntax: Array[X] flatten(Array[Array[X]])
|
|
988
|
-
"""
|
|
989
|
-
assert isinstance(in_array, list), f'flatten() requires "{in_array}" to be a list! Not: {type(in_array)}'
|
|
990
|
-
|
|
991
|
-
arr = []
|
|
992
|
-
|
|
993
|
-
for element in in_array:
|
|
994
|
-
assert isinstance(element, list), f'flatten() requires all collections to be a list! Not: {type(element)}'
|
|
995
|
-
arr.extend(element)
|
|
996
|
-
|
|
997
|
-
return arr
|