telomore 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- telomore/__init__.py +5 -0
- telomore/_version.py +34 -0
- telomore/app.py +536 -0
- telomore/utils/__init__.py +1 -0
- telomore/utils/arg_parser.py +220 -0
- telomore/utils/classes_and_small_func.py +289 -0
- telomore/utils/cmd_tools.py +732 -0
- telomore/utils/fasta_tools.py +595 -0
- telomore/utils/map_tools.py +1333 -0
- telomore/utils/qc_reports.py +493 -0
- telomore-0.4.1.dist-info/METADATA +149 -0
- telomore-0.4.1.dist-info/RECORD +15 -0
- telomore-0.4.1.dist-info/WHEEL +4 -0
- telomore-0.4.1.dist-info/entry_points.txt +2 -0
- telomore-0.4.1.dist-info/licenses/LICENSE +21 -0
telomore/__init__.py
ADDED
telomore/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.4.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 1)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
telomore/app.py
ADDED
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Telomore main application module.
|
|
3
|
+
|
|
4
|
+
Script for finding and extracting telomeres from nanopore or illumina reads,
|
|
5
|
+
which have been excluded from a de novo assembly.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from argparse import Namespace
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
import traceback
|
|
13
|
+
|
|
14
|
+
from telomore._version import __version__
|
|
15
|
+
from telomore.utils.arg_parser import get_args, setup_logging
|
|
16
|
+
from telomore.utils.classes_and_small_func import Replicon
|
|
17
|
+
from telomore.utils.cmd_tools import (
|
|
18
|
+
generate_consensus_lamassemble,
|
|
19
|
+
generate_consensus_mafft,
|
|
20
|
+
map_and_sort,
|
|
21
|
+
map_and_sort_illumina,
|
|
22
|
+
map_and_sort_illumina_cons,
|
|
23
|
+
train_lastDB,
|
|
24
|
+
)
|
|
25
|
+
from telomore.utils.fasta_tools import (
|
|
26
|
+
build_extended_fasta,
|
|
27
|
+
extract_contig,
|
|
28
|
+
get_fasta_length,
|
|
29
|
+
get_linear_elements,
|
|
30
|
+
strip_fasta,
|
|
31
|
+
)
|
|
32
|
+
from telomore.utils.map_tools import (
|
|
33
|
+
get_left_soft,
|
|
34
|
+
get_right_soft,
|
|
35
|
+
get_terminal_reads,
|
|
36
|
+
revcomp,
|
|
37
|
+
revcomp_reads,
|
|
38
|
+
stitch_telo,
|
|
39
|
+
trim_by_map,
|
|
40
|
+
trim_by_map_illumina,
|
|
41
|
+
)
|
|
42
|
+
from telomore.utils.qc_reports import (
|
|
43
|
+
finalize_log,
|
|
44
|
+
qc_map,
|
|
45
|
+
qc_map_illumina,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def check_dependencies(required_tools: list[str] | None = None) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Check if required external dependencies are available in PATH.
|
|
52
|
+
|
|
53
|
+
Verifies that all bioinformatics tools required by Telomore are installed
|
|
54
|
+
and accessible. Logs the path to each found tool and exits with error if
|
|
55
|
+
any tools are missing.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
required_tools : list of str or None, optional
|
|
60
|
+
List of command-line tool names to check. If None, no tools are checked.
|
|
61
|
+
Common tools include: minimap2, samtools, lamassemble, mafft, bowtie2,
|
|
62
|
+
lastdb, lastal, cons.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
None
|
|
67
|
+
Logs tool locations or exits if dependencies are missing.
|
|
68
|
+
|
|
69
|
+
Raises
|
|
70
|
+
------
|
|
71
|
+
SystemExit
|
|
72
|
+
If any required tools are not found in PATH (exits with code 1)
|
|
73
|
+
|
|
74
|
+
Notes
|
|
75
|
+
-----
|
|
76
|
+
For each tool, this function:
|
|
77
|
+
- Checks if the tool is available using shutil.which()
|
|
78
|
+
- Logs the full path if found
|
|
79
|
+
- Collects missing tools and reports them all at once before exiting
|
|
80
|
+
|
|
81
|
+
This ensures users know about all missing dependencies upfront rather than
|
|
82
|
+
discovering them one at a time during execution.
|
|
83
|
+
"""
|
|
84
|
+
missing_tools = []
|
|
85
|
+
for tool in required_tools:
|
|
86
|
+
if shutil.which(tool) is None:
|
|
87
|
+
# Log missing tool
|
|
88
|
+
missing_tools.append(tool)
|
|
89
|
+
else:
|
|
90
|
+
# Log the path to the tool
|
|
91
|
+
logging.info(f'{tool}\t {shutil.which(tool)}')
|
|
92
|
+
if missing_tools:
|
|
93
|
+
# Log all missing tools and exit
|
|
94
|
+
logging.error(f'Missing required tools: {", ".join(missing_tools)}')
|
|
95
|
+
exit(1)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def entrypoint() -> None:
|
|
99
|
+
"""
|
|
100
|
+
Entry point for the telomore command-line interface.
|
|
101
|
+
|
|
102
|
+
Parses command-line arguments, sets up logging, and calls the main workflow.
|
|
103
|
+
This function serves as the entry point defined in pyproject.toml for the
|
|
104
|
+
'telomore' console script.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
None
|
|
109
|
+
Executes the main workflow or exits with error code 1 on failure.
|
|
110
|
+
|
|
111
|
+
Raises
|
|
112
|
+
------
|
|
113
|
+
SystemExit
|
|
114
|
+
If argument parsing fails or an unhandled exception occurs during workflow
|
|
115
|
+
|
|
116
|
+
Notes
|
|
117
|
+
-----
|
|
118
|
+
Error handling:
|
|
119
|
+
- Captures all exceptions during workflow execution
|
|
120
|
+
- Logs full traceback to log file
|
|
121
|
+
- Exits with code 1 to signal failure to calling process
|
|
122
|
+
|
|
123
|
+
Logging is configured before main() is called, with output to both
|
|
124
|
+
console and telomore.log file (unless --quiet is specified).
|
|
125
|
+
"""
|
|
126
|
+
args = get_args() # Get arguments
|
|
127
|
+
setup_logging(log_file='telomore.log', quiet=args.quiet) # setup logging
|
|
128
|
+
try:
|
|
129
|
+
main(args)
|
|
130
|
+
|
|
131
|
+
except Exception:
|
|
132
|
+
logging.error('An error occurred during the workflow:')
|
|
133
|
+
logging.error(traceback.format_exc())
|
|
134
|
+
exit(1)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def main(args: Namespace) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Execute the main Telomore telomere extension workflow.
|
|
140
|
+
|
|
141
|
+
Orchestrates the complete pipeline for extending linear contigs with
|
|
142
|
+
telomeric sequences identified from unmapped reads. Processes either
|
|
143
|
+
Oxford Nanopore or Illumina sequencing data based on the mode parameter.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
args : argparse.Namespace
|
|
148
|
+
Parsed command-line arguments containing:
|
|
149
|
+
- mode : str - Sequencing platform ('nanopore' or 'illumina')
|
|
150
|
+
- reference : str - Path to reference genome FASTA
|
|
151
|
+
- single : str - Nanopore FASTQ file (if mode='nanopore')
|
|
152
|
+
- read1, read2 : str - Illumina paired FASTQ files (if mode='illumina')
|
|
153
|
+
- threads : int - Number of threads for parallel operations
|
|
154
|
+
- keep : bool - Whether to retain intermediate files
|
|
155
|
+
- quiet : bool - Suppress console logging
|
|
156
|
+
- coverage_threshold : int or None - Minimum coverage for consensus trimming
|
|
157
|
+
- quality_threshold : int or None - Minimum base quality for consensus trimming
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
None
|
|
162
|
+
Creates output directory with extended assemblies and QC files.
|
|
163
|
+
|
|
164
|
+
Raises
|
|
165
|
+
------
|
|
166
|
+
SystemExit
|
|
167
|
+
If output folder exists, no linear contigs found, or dependencies missing
|
|
168
|
+
|
|
169
|
+
Notes
|
|
170
|
+
-----
|
|
171
|
+
Workflow steps:
|
|
172
|
+
1. Check external tool dependencies
|
|
173
|
+
2. Identify linear contigs from reference headers
|
|
174
|
+
3. Map reads to reference genome
|
|
175
|
+
4. Extract terminal extending reads for each linear contig
|
|
176
|
+
5. Generate consensus sequences from extending reads
|
|
177
|
+
6. Align and attach consensus to contig ends
|
|
178
|
+
7. Trim consensus based on read support
|
|
179
|
+
8. Generate QC BAM files for manual inspection
|
|
180
|
+
9. Create final assembly combining extended and unmodified contigs
|
|
181
|
+
10. Clean up intermediate files (unless --keep specified)
|
|
182
|
+
|
|
183
|
+
Platform-specific defaults:
|
|
184
|
+
- Nanopore: coverage_threshold=5, quality_threshold=10
|
|
185
|
+
- Illumina: coverage_threshold=1, quality_threshold=30
|
|
186
|
+
|
|
187
|
+
Output structure: {reference_basename}_{np|ill}_telomore/
|
|
188
|
+
"""
|
|
189
|
+
logging.info(f'Running Telomore: {__version__}')
|
|
190
|
+
|
|
191
|
+
check_dependencies(
|
|
192
|
+
[
|
|
193
|
+
'minimap2',
|
|
194
|
+
'samtools',
|
|
195
|
+
'lamassemble',
|
|
196
|
+
'mafft',
|
|
197
|
+
'bowtie2',
|
|
198
|
+
'lastdb',
|
|
199
|
+
'lastal',
|
|
200
|
+
'cons',
|
|
201
|
+
]
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
ref_name = os.path.splitext(os.path.basename(args.reference))[0]
|
|
205
|
+
folder_content = os.listdir()
|
|
206
|
+
|
|
207
|
+
# Create output folder
|
|
208
|
+
if args.mode == 'nanopore':
|
|
209
|
+
telo_folder = ref_name + '_np_telomore'
|
|
210
|
+
elif args.mode == 'illumina':
|
|
211
|
+
telo_folder = ref_name + '_ill_telomore'
|
|
212
|
+
|
|
213
|
+
if os.path.isdir(telo_folder):
|
|
214
|
+
logging.info('Output folder %s already exists.', telo_folder)
|
|
215
|
+
exit()
|
|
216
|
+
os.mkdir(telo_folder)
|
|
217
|
+
|
|
218
|
+
# Identify linear elements
|
|
219
|
+
linear_elements = get_linear_elements(args.reference)
|
|
220
|
+
if not linear_elements:
|
|
221
|
+
logging.info('No tagged linear elements identified')
|
|
222
|
+
exit()
|
|
223
|
+
logging.info('Identified the following tagged linear elements %s', linear_elements)
|
|
224
|
+
|
|
225
|
+
# Create a list of replicon instances
|
|
226
|
+
replicon_list = [Replicon(element, args.reference) for element in linear_elements]
|
|
227
|
+
|
|
228
|
+
# 0: Map reads and extract terminally-extending sequence
|
|
229
|
+
# -----------------------------------------------------------------
|
|
230
|
+
logging.info('Mapping reads to assembly')
|
|
231
|
+
|
|
232
|
+
map_out = ref_name + '_map.bam'
|
|
233
|
+
|
|
234
|
+
# Use already existing map
|
|
235
|
+
if map_out in folder_content:
|
|
236
|
+
logging.info('Using already identified .bam-file %s', map_out)
|
|
237
|
+
elif args.mode == 'nanopore':
|
|
238
|
+
map_and_sort(
|
|
239
|
+
reference=args.reference,
|
|
240
|
+
fastq=args.single,
|
|
241
|
+
output=map_out,
|
|
242
|
+
threads=args.threads,
|
|
243
|
+
)
|
|
244
|
+
elif args.mode == 'illumina':
|
|
245
|
+
map_and_sort_illumina(
|
|
246
|
+
reference=args.reference,
|
|
247
|
+
read1=args.read1,
|
|
248
|
+
read2=args.read2,
|
|
249
|
+
output=map_out,
|
|
250
|
+
threads=args.threads,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
for replicon in replicon_list:
|
|
254
|
+
logging.info('\tContig %s', replicon.name)
|
|
255
|
+
|
|
256
|
+
get_terminal_reads(
|
|
257
|
+
sorted_bam_file=map_out,
|
|
258
|
+
contig=replicon.name,
|
|
259
|
+
loutput_handle=replicon.left_sam,
|
|
260
|
+
routput_handle=replicon.right_sam,
|
|
261
|
+
)
|
|
262
|
+
get_left_soft(
|
|
263
|
+
sam_file=replicon.left_sam, left_out=replicon.left_filt, offset=500
|
|
264
|
+
)
|
|
265
|
+
get_right_soft(
|
|
266
|
+
sam_file=replicon.right_sam,
|
|
267
|
+
contig=replicon.name,
|
|
268
|
+
right_out=replicon.right_filt,
|
|
269
|
+
offset=500,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# 1: Generate consensus
|
|
273
|
+
# -----------------------------------------------------------------
|
|
274
|
+
logging.info('Generating consensus')
|
|
275
|
+
|
|
276
|
+
# Generate consensus
|
|
277
|
+
for replicon in replicon_list:
|
|
278
|
+
logging.info('\tContig %s', replicon.name)
|
|
279
|
+
|
|
280
|
+
# GENERATE LEFT CONSENSUS
|
|
281
|
+
# To maintain alignment anchor point, the reads are flipped
|
|
282
|
+
# And the resulting consensus must then be flipped again
|
|
283
|
+
revcomp_reads(reads_in=replicon.left_filt_fq, reads_out=replicon.revcomp_out)
|
|
284
|
+
|
|
285
|
+
if args.mode == 'nanopore':
|
|
286
|
+
db_out = ref_name + '.db'
|
|
287
|
+
train_lastDB(
|
|
288
|
+
args.reference, args.single, db_out, args.threads
|
|
289
|
+
) # train on entire reference
|
|
290
|
+
generate_consensus_lamassemble(
|
|
291
|
+
db_name=db_out, reads=replicon.revcomp_out, output=replicon.l_cons_out
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
elif args.mode == 'illumina':
|
|
295
|
+
generate_consensus_mafft(
|
|
296
|
+
reads=replicon.revcomp_out, output=replicon.l_cons_out
|
|
297
|
+
)
|
|
298
|
+
# flip consensus to match original orientation
|
|
299
|
+
revcomp(fasta_in=replicon.l_cons_out, fasta_out=replicon.l_cons_final_out)
|
|
300
|
+
|
|
301
|
+
# GENERATE RIGHT CONSENSUS
|
|
302
|
+
# The right reads are already oriented with the anchor point
|
|
303
|
+
# left-most and does therefore not need to be flipped
|
|
304
|
+
if args.mode == 'nanopore':
|
|
305
|
+
# A last-db should aldready exist from the left-consensus
|
|
306
|
+
generate_consensus_lamassemble(
|
|
307
|
+
db_name=db_out,
|
|
308
|
+
reads=replicon.right_filt_fq,
|
|
309
|
+
output=replicon.r_cons_final_out,
|
|
310
|
+
)
|
|
311
|
+
elif args.mode == 'illumina':
|
|
312
|
+
generate_consensus_mafft(
|
|
313
|
+
reads=replicon.right_filt_fq, output=replicon.r_cons_final_out
|
|
314
|
+
)
|
|
315
|
+
# 2: Extend assembly with consensus by mapping onto chromsome
|
|
316
|
+
# -----------------------------------------------------------------
|
|
317
|
+
logging.info('Extending assembly')
|
|
318
|
+
|
|
319
|
+
for replicon in replicon_list:
|
|
320
|
+
logging.info('\tContig %s', replicon.name)
|
|
321
|
+
|
|
322
|
+
# Produce fasta file of just the contig to be extended
|
|
323
|
+
extract_contig(
|
|
324
|
+
fasta_in=replicon.org_fasta,
|
|
325
|
+
contig_name=replicon.name,
|
|
326
|
+
fasta_out=replicon.contig_fasta,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Discard bases that provide alternative mapping sites
|
|
330
|
+
# for the consensus to map to as Streptomyces have TIRs.
|
|
331
|
+
# discard half the contig
|
|
332
|
+
|
|
333
|
+
strip_size = int(
|
|
334
|
+
get_fasta_length(
|
|
335
|
+
fasta_file=replicon.contig_fasta, contig_name=replicon.name
|
|
336
|
+
)
|
|
337
|
+
/ 2
|
|
338
|
+
)
|
|
339
|
+
strip_fasta(
|
|
340
|
+
input_file=replicon.contig_fasta,
|
|
341
|
+
output_file=replicon.trunc_left_fasta,
|
|
342
|
+
x=strip_size,
|
|
343
|
+
remove_from='end',
|
|
344
|
+
)
|
|
345
|
+
strip_fasta(
|
|
346
|
+
input_file=replicon.contig_fasta,
|
|
347
|
+
output_file=replicon.trunc_right_fasta,
|
|
348
|
+
x=strip_size,
|
|
349
|
+
remove_from='start',
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if args.mode == 'nanopore':
|
|
353
|
+
# Map onto the reduced reference using minimap2
|
|
354
|
+
map_and_sort(
|
|
355
|
+
reference=replicon.trunc_left_fasta,
|
|
356
|
+
fastq=replicon.l_cons_final_out,
|
|
357
|
+
output=replicon.l_map_out,
|
|
358
|
+
threads=args.threads,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
map_and_sort(
|
|
362
|
+
reference=replicon.trunc_right_fasta,
|
|
363
|
+
fastq=replicon.r_cons_final_out,
|
|
364
|
+
output=replicon.r_map_out,
|
|
365
|
+
threads=args.threads,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
elif args.mode == 'illumina':
|
|
369
|
+
# Map onto reduced reference using bowtie2
|
|
370
|
+
map_and_sort_illumina_cons(
|
|
371
|
+
reference=replicon.trunc_left_fasta,
|
|
372
|
+
consensus_fasta=replicon.l_cons_final_out,
|
|
373
|
+
output=replicon.l_map_out,
|
|
374
|
+
threads=args.threads,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
map_and_sort_illumina_cons(
|
|
378
|
+
reference=replicon.trunc_right_fasta,
|
|
379
|
+
consensus_fasta=replicon.r_cons_final_out,
|
|
380
|
+
output=replicon.r_map_out,
|
|
381
|
+
threads=args.threads,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
# Extend the assembly using the map
|
|
385
|
+
if args.mode == 'nanopore':
|
|
386
|
+
cons_log_out = replicon.cons_log_np_out
|
|
387
|
+
elif args.mode == 'illumina':
|
|
388
|
+
cons_log_out = replicon.cons_log_ill_out
|
|
389
|
+
|
|
390
|
+
stitch_telo(
|
|
391
|
+
ref=replicon.contig_fasta,
|
|
392
|
+
left_map=replicon.l_map_out,
|
|
393
|
+
right_map=replicon.r_map_out,
|
|
394
|
+
outfile=replicon.stitch_out,
|
|
395
|
+
logout=cons_log_out,
|
|
396
|
+
tmp_left=replicon.stitch_left_fasta,
|
|
397
|
+
tmp_right=replicon.stitch_right_fasta,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# 3: Trim consensus using a map of terminal reads onto extended
|
|
401
|
+
# assembly
|
|
402
|
+
# -----------------------------------------------------------------
|
|
403
|
+
logging.info('Trimming consensus based on read support')
|
|
404
|
+
|
|
405
|
+
for replicon in replicon_list:
|
|
406
|
+
# Iterate to the correct log
|
|
407
|
+
if args.mode == 'nanopore':
|
|
408
|
+
cons_log_out = replicon.cons_log_np_out
|
|
409
|
+
elif args.mode == 'illumina':
|
|
410
|
+
cons_log_out = replicon.cons_log_ill_out
|
|
411
|
+
|
|
412
|
+
logging.info('\tContig %s', replicon.name)
|
|
413
|
+
|
|
414
|
+
if args.mode == 'nanopore':
|
|
415
|
+
# Set default values for consensus trimming if the User did not
|
|
416
|
+
if args.coverage_threshold is None:
|
|
417
|
+
args.coverage_threshold = 5
|
|
418
|
+
if args.quality_threshold is None:
|
|
419
|
+
args.quality_threshold = 10
|
|
420
|
+
|
|
421
|
+
qc_map(
|
|
422
|
+
extended_assembly=replicon.stitch_out,
|
|
423
|
+
left=replicon.left_sam,
|
|
424
|
+
right=replicon.right_sam,
|
|
425
|
+
output_handle=replicon.trim_map,
|
|
426
|
+
t=args.threads,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
trim_by_map(
|
|
430
|
+
untrimmed_assembly=replicon.stitch_out,
|
|
431
|
+
sorted_bam_file=replicon.trim_map,
|
|
432
|
+
output_handle=replicon.trim_out,
|
|
433
|
+
cons_log=cons_log_out,
|
|
434
|
+
cov_thres=args.coverage_threshold,
|
|
435
|
+
ratio_thres=0.7,
|
|
436
|
+
qual_thres=args.quality_threshold,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
elif args.mode == 'illumina':
|
|
440
|
+
# Set default values for consensus trimming if the User did not
|
|
441
|
+
if args.coverage_threshold is None:
|
|
442
|
+
args.coverage_threshold = 1
|
|
443
|
+
if args.quality_threshold is None:
|
|
444
|
+
args.quality_threshold = 30
|
|
445
|
+
|
|
446
|
+
qc_map_illumina(
|
|
447
|
+
extended_assembly=replicon.stitch_out,
|
|
448
|
+
left_sam=replicon.left_sam,
|
|
449
|
+
right_sam=replicon.right_sam,
|
|
450
|
+
fastq_in1=args.read1,
|
|
451
|
+
fastq_in2=args.read2,
|
|
452
|
+
output_handle=replicon.trim_map,
|
|
453
|
+
t=args.threads,
|
|
454
|
+
)
|
|
455
|
+
trim_by_map_illumina(
|
|
456
|
+
untrimmed_assembly=replicon.stitch_out,
|
|
457
|
+
sorted_bam_file=replicon.trim_map,
|
|
458
|
+
output_handle=replicon.trim_out,
|
|
459
|
+
cons_log=cons_log_out,
|
|
460
|
+
cov_thres=args.coverage_threshold,
|
|
461
|
+
ratio_thres=0.7,
|
|
462
|
+
qual_thres=args.quality_threshold,
|
|
463
|
+
)
|
|
464
|
+
# 4: Generate QC files
|
|
465
|
+
# -----------------------------------------------------------------
|
|
466
|
+
logging.info('Generating QC map and finalizing result-log')
|
|
467
|
+
|
|
468
|
+
for replicon in replicon_list:
|
|
469
|
+
# Iterate to the correct log
|
|
470
|
+
if args.mode == 'nanopore':
|
|
471
|
+
cons_log_out = replicon.cons_log_np_out
|
|
472
|
+
elif args.mode == 'illumina':
|
|
473
|
+
cons_log_out = replicon.cons_log_ill_out
|
|
474
|
+
|
|
475
|
+
logging.info('\tContig %s', replicon.name)
|
|
476
|
+
|
|
477
|
+
if args.mode == 'nanopore':
|
|
478
|
+
qc_map(
|
|
479
|
+
extended_assembly=replicon.trim_out,
|
|
480
|
+
left=replicon.left_sam,
|
|
481
|
+
right=replicon.right_sam,
|
|
482
|
+
output_handle=replicon.qc_out,
|
|
483
|
+
t=args.threads,
|
|
484
|
+
)
|
|
485
|
+
if args.mode == 'illumina':
|
|
486
|
+
qc_map_illumina(
|
|
487
|
+
extended_assembly=replicon.trim_out,
|
|
488
|
+
left_sam=replicon.left_sam,
|
|
489
|
+
right_sam=replicon.right_sam,
|
|
490
|
+
fastq_in1=args.read1,
|
|
491
|
+
fastq_in2=args.read2,
|
|
492
|
+
output_handle=replicon.qc_out,
|
|
493
|
+
t=args.threads,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
finalize_log(
|
|
497
|
+
log=cons_log_out,
|
|
498
|
+
right_fasta=replicon.stitch_right_fasta,
|
|
499
|
+
left_fasta=replicon.stitch_left_fasta,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
# 5: Clean-up
|
|
503
|
+
# -----------------------------------------------------------------
|
|
504
|
+
logging.info('Removing temporary files')
|
|
505
|
+
|
|
506
|
+
finished_fasta = ref_name + '_telomore.fasta'
|
|
507
|
+
build_extended_fasta(
|
|
508
|
+
org_fasta=args.reference,
|
|
509
|
+
linear_elements=linear_elements,
|
|
510
|
+
replicon_list=replicon_list,
|
|
511
|
+
output_handle=finished_fasta,
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
shutil.move(src=finished_fasta, dst=os.path.join(telo_folder, finished_fasta))
|
|
515
|
+
|
|
516
|
+
for replicon in replicon_list:
|
|
517
|
+
replicon.mv_files(telo_folder, args.mode)
|
|
518
|
+
|
|
519
|
+
if args.keep is False:
|
|
520
|
+
# rmv tmp files
|
|
521
|
+
for replicon in replicon_list:
|
|
522
|
+
replicon.cleanup_tmp_files()
|
|
523
|
+
|
|
524
|
+
# rmv lastdb
|
|
525
|
+
last_db_ext = ['.bck', '.des', '.par', '.prj', '.sds', '.ssp', '.suf', '.tis']
|
|
526
|
+
|
|
527
|
+
if args.mode == 'nanopore':
|
|
528
|
+
for ext in last_db_ext:
|
|
529
|
+
db_file = f'{db_out}{ext}'
|
|
530
|
+
os.remove(db_file)
|
|
531
|
+
|
|
532
|
+
# remove map
|
|
533
|
+
os.remove(map_out) # map
|
|
534
|
+
os.remove(f'{map_out}.bai') # index
|
|
535
|
+
|
|
536
|
+
logging.info('Output files moved to %s', telo_folder)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utilities for telomore."""
|