telomore 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- telomore/__init__.py +5 -0
- telomore/_version.py +34 -0
- telomore/app.py +536 -0
- telomore/utils/__init__.py +1 -0
- telomore/utils/arg_parser.py +220 -0
- telomore/utils/classes_and_small_func.py +289 -0
- telomore/utils/cmd_tools.py +732 -0
- telomore/utils/fasta_tools.py +595 -0
- telomore/utils/map_tools.py +1333 -0
- telomore/utils/qc_reports.py +493 -0
- telomore-0.4.1.dist-info/METADATA +149 -0
- telomore-0.4.1.dist-info/RECORD +15 -0
- telomore-0.4.1.dist-info/WHEEL +4 -0
- telomore-0.4.1.dist-info/entry_points.txt +2 -0
- telomore-0.4.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Argument parser for the telomore tool."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from argparse import Namespace
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from telomore._version import __version__
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_args() -> Namespace:
|
|
12
|
+
r"""
|
|
13
|
+
Parse and validate command-line arguments for Telomore.
|
|
14
|
+
|
|
15
|
+
Defines the command-line interface for Telomore, including all required
|
|
16
|
+
and optional arguments. Validates mode-specific requirements and provides
|
|
17
|
+
helpful error messages.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
argparse.Namespace
|
|
22
|
+
Parsed arguments with attributes:
|
|
23
|
+
- mode : str - Sequencing platform ('nanopore' or 'illumina')
|
|
24
|
+
- reference : str - Path to reference genome FASTA
|
|
25
|
+
- single : str or None - Nanopore FASTQ file (required if mode='nanopore')
|
|
26
|
+
- read1 : str or None - Illumina R1 FASTQ (required if mode='illumina')
|
|
27
|
+
- read2 : str or None - Illumina R2 FASTQ (required if mode='illumina')
|
|
28
|
+
- threads : int - Number of threads (default: 1)
|
|
29
|
+
- keep : bool - Retain intermediate files (default: False)
|
|
30
|
+
- quiet : bool - Suppress console output (default: False)
|
|
31
|
+
- coverage_threshold : int or None - Coverage cutoff for trimming
|
|
32
|
+
- quality_threshold : int or None - Quality score cutoff for trimming
|
|
33
|
+
|
|
34
|
+
Raises
|
|
35
|
+
------
|
|
36
|
+
SystemExit
|
|
37
|
+
If no arguments provided, required arguments missing, or mode-specific
|
|
38
|
+
validation fails. Prints help message and exits with code 1.
|
|
39
|
+
|
|
40
|
+
Notes
|
|
41
|
+
-----
|
|
42
|
+
Mode-specific validation:
|
|
43
|
+
- nanopore mode requires --single argument
|
|
44
|
+
- illumina mode requires both --read1 and --read2 arguments
|
|
45
|
+
|
|
46
|
+
Threshold defaults (applied in main workflow if not specified):
|
|
47
|
+
- Nanopore: coverage=5, quality=10
|
|
48
|
+
- Illumina: coverage=1, quality=30
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
Nanopore mode:
|
|
53
|
+
telomore --mode nanopore --single reads.fq.gz --reference genome.fa -t 8
|
|
54
|
+
|
|
55
|
+
Illumina mode:
|
|
56
|
+
telomore --mode illumina --read1 R1.fq.gz --read2 R2.fq.gz \\
|
|
57
|
+
--reference genome.fa -t 8 --coverage_threshold 2
|
|
58
|
+
"""
|
|
59
|
+
parser = argparse.ArgumentParser(
|
|
60
|
+
description="""Telomore: A tool to recover potential telomeric sequences from Streptomyces genomes.
|
|
61
|
+
|
|
62
|
+
This tool processes sequencing data from Oxford Nanopore or Illumina platforms to extend assemblies and generate QC reports.
|
|
63
|
+
|
|
64
|
+
INPUT:
|
|
65
|
+
- For Nanopore mode (--mode=nanopore): Provide a single gzipped FASTQ file using --single.
|
|
66
|
+
- For Illumina mode (--mode=illumina): Provide two gzipped FASTQ files using --read1 and --read2.
|
|
67
|
+
- A reference genome file in FASTA format is required for both modes (--reference).
|
|
68
|
+
|
|
69
|
+
OUTPUT:
|
|
70
|
+
- Extended assembly written to basename.02.trimmed.fasta (basename is the name of the input file without the extension).
|
|
71
|
+
- QC reports saved in a folder named basename_seqtype_QC.
|
|
72
|
+
- Logs are written to telomore.log and basename.seqtype.cons.log.txt. in basename_seqtyope_QC.
|
|
73
|
+
|
|
74
|
+
OPTIONS:
|
|
75
|
+
- Specify the number of threads to use with --threads (default: 1).
|
|
76
|
+
- Use --keep to retain intermediate files (default: False).
|
|
77
|
+
- Use --quiet to suppress console logging.
|
|
78
|
+
|
|
79
|
+
EXAMPLES:
|
|
80
|
+
1. Nanopore mode:
|
|
81
|
+
telomore --mode=nanopore --single reads.fastq.gz --reference genome.fasta -t 8
|
|
82
|
+
|
|
83
|
+
2. Illumina mode:
|
|
84
|
+
telomore --mode=illumina --read1 read1.fastq.gz --read2 read2.fastq.gz --reference genome.fasta -t 8
|
|
85
|
+
""",
|
|
86
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
'-v',
|
|
91
|
+
'--version',
|
|
92
|
+
action='version',
|
|
93
|
+
version=f'telomore {__version__}',
|
|
94
|
+
help='Show version number and exit',
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
'-m',
|
|
99
|
+
'--mode',
|
|
100
|
+
choices=['nanopore', 'illumina'],
|
|
101
|
+
required=True,
|
|
102
|
+
help="""Choose which mode to run.
|
|
103
|
+
--mode=nanopore takes a single read-file, specified using --single
|
|
104
|
+
--mode=illumina-mode takes two read-files specified using --read1 and --read2""",
|
|
105
|
+
)
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
'--single',
|
|
108
|
+
type=str,
|
|
109
|
+
help='Path to a single gzipped nanopore fastq-file',
|
|
110
|
+
)
|
|
111
|
+
parser.add_argument(
|
|
112
|
+
'--read1',
|
|
113
|
+
type=str,
|
|
114
|
+
help='Path to gzipped illumina read1 fastq-file',
|
|
115
|
+
)
|
|
116
|
+
parser.add_argument(
|
|
117
|
+
'--read2',
|
|
118
|
+
type=str,
|
|
119
|
+
help='Path to gzipped illumina read2 fastq-file',
|
|
120
|
+
)
|
|
121
|
+
parser.add_argument(
|
|
122
|
+
'-r',
|
|
123
|
+
'--reference',
|
|
124
|
+
type=str,
|
|
125
|
+
required=True,
|
|
126
|
+
help='Path to reference file (.fasta, .fna, or .fa)',
|
|
127
|
+
)
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
'-t', '--threads', type=int, default=1, help='Threads to use. Default is 1'
|
|
130
|
+
)
|
|
131
|
+
parser.add_argument(
|
|
132
|
+
'-k',
|
|
133
|
+
'--keep',
|
|
134
|
+
action='store_true',
|
|
135
|
+
help='Flag to keep intermediate files. Default is False',
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
'-q', '--quiet', action='store_true', help='Set logging to quiet.'
|
|
139
|
+
)
|
|
140
|
+
parser.add_argument(
|
|
141
|
+
'--coverage_threshold',
|
|
142
|
+
type=int,
|
|
143
|
+
help='Coverage threshold for consensus trimming. Default is coverage=5 for nanopore and coverage=1 for Illumina.',
|
|
144
|
+
)
|
|
145
|
+
parser.add_argument(
|
|
146
|
+
'--quality_threshold',
|
|
147
|
+
type=int,
|
|
148
|
+
help='Quality threshold for consensus trimming. Default is q_score=10 for nanopore and q_score=30 for illumina.',
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Check if no arguments were provided
|
|
152
|
+
if len(sys.argv) == 1:
|
|
153
|
+
parser.print_help(sys.stderr)
|
|
154
|
+
sys.exit(1)
|
|
155
|
+
|
|
156
|
+
args = parser.parse_args()
|
|
157
|
+
|
|
158
|
+
if args.mode == 'illumina':
|
|
159
|
+
if not (args.read1 and args.read2):
|
|
160
|
+
parser.error(
|
|
161
|
+
'Illumina mode requires two FASTQ files, specified by --read1 and --read2'
|
|
162
|
+
)
|
|
163
|
+
elif args.mode == 'nanopore':
|
|
164
|
+
if not args.single:
|
|
165
|
+
parser.error(
|
|
166
|
+
'Nanopore mode takes one collected FASTQ file, specified by --single'
|
|
167
|
+
)
|
|
168
|
+
return args
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def setup_logging(log_file: str = 'telomore.log', quiet: bool = False) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Configure logging for Telomore with file and console output.
|
|
174
|
+
|
|
175
|
+
Sets up Python's logging system to write to both a log file and console
|
|
176
|
+
(unless quiet mode is enabled). Uses INFO level logging with timestamps.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
log_file : str, default='telomore.log'
|
|
181
|
+
Path to log file where all messages will be written
|
|
182
|
+
quiet : bool, default=False
|
|
183
|
+
If True, suppress console output (file logging still occurs)
|
|
184
|
+
|
|
185
|
+
Returns
|
|
186
|
+
-------
|
|
187
|
+
None
|
|
188
|
+
Configures the global logging system
|
|
189
|
+
|
|
190
|
+
Notes
|
|
191
|
+
-----
|
|
192
|
+
Log format: '%(asctime)s - %(message)s'
|
|
193
|
+
Log level: INFO (captures informational messages, warnings, and errors)
|
|
194
|
+
|
|
195
|
+
When quiet=False, logs appear in both:
|
|
196
|
+
- Console (via StreamHandler to stdout)
|
|
197
|
+
- File (via FileHandler to log_file)
|
|
198
|
+
|
|
199
|
+
When quiet=True, logs only appear in:
|
|
200
|
+
- File (via FileHandler to log_file)
|
|
201
|
+
|
|
202
|
+
This allows users to suppress verbose console output while maintaining
|
|
203
|
+
a complete log file for debugging and reproducibility.
|
|
204
|
+
"""
|
|
205
|
+
if quiet is True:
|
|
206
|
+
handlers_to_use = [
|
|
207
|
+
logging.FileHandler(log_file), # Log file
|
|
208
|
+
]
|
|
209
|
+
else:
|
|
210
|
+
handlers_to_use = [
|
|
211
|
+
logging.FileHandler(log_file), # Log file
|
|
212
|
+
logging.StreamHandler(sys.stdout), # Print to console
|
|
213
|
+
]
|
|
214
|
+
logging.basicConfig(
|
|
215
|
+
level=logging.INFO, format='%(asctime)s - %(message)s', handlers=handlers_to_use
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
if __name__ == '__main__':
|
|
220
|
+
get_args()
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"""Class for handling files related to each replicon."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Class
|
|
8
|
+
# Replicon class
|
|
9
|
+
class Replicon:
|
|
10
|
+
"""
|
|
11
|
+
Manage file paths and operations for individual replicon extension.
|
|
12
|
+
|
|
13
|
+
A Replicon object tracks all input, intermediate, and output files
|
|
14
|
+
associated with extending a single linear contig (replicon). It provides
|
|
15
|
+
organized file management and cleanup methods for the Telomore workflow.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
name : str
|
|
20
|
+
Identifier for the replicon (typically contig name from FASTA)
|
|
21
|
+
org_fasta : str
|
|
22
|
+
Path to the original multi-FASTA file containing this replicon
|
|
23
|
+
|
|
24
|
+
Attributes
|
|
25
|
+
----------
|
|
26
|
+
name : str
|
|
27
|
+
Replicon identifier
|
|
28
|
+
org_fasta : str
|
|
29
|
+
Original FASTA file path
|
|
30
|
+
|
|
31
|
+
Mapping files:
|
|
32
|
+
org_map : str
|
|
33
|
+
BAM file of reads mapped to original contig
|
|
34
|
+
org_map_index : str
|
|
35
|
+
BAI index for org_map
|
|
36
|
+
|
|
37
|
+
Terminal read files:
|
|
38
|
+
left_sam, right_sam : str
|
|
39
|
+
SAM files with left/right terminal read alignments
|
|
40
|
+
left_filt, right_filt : str
|
|
41
|
+
Base paths for filtered reads
|
|
42
|
+
left_filt_sam, right_filt_sam : str
|
|
43
|
+
Filtered SAM files
|
|
44
|
+
left_filt_fq, right_filt_fq : str
|
|
45
|
+
Filtered FASTQ files
|
|
46
|
+
|
|
47
|
+
Consensus files:
|
|
48
|
+
l_cons_out : str
|
|
49
|
+
Left consensus (reverse complement, temporary)
|
|
50
|
+
l_cons_final_out : str
|
|
51
|
+
Left consensus (final orientation)
|
|
52
|
+
l_cons_alignment : str
|
|
53
|
+
Alignment file for left consensus
|
|
54
|
+
revcomp_out : str
|
|
55
|
+
Reverse complement of left filtered reads
|
|
56
|
+
r_cons_final_out : str
|
|
57
|
+
Right consensus (final)
|
|
58
|
+
r_cons_alignment : str
|
|
59
|
+
Alignment file for right consensus
|
|
60
|
+
|
|
61
|
+
Extension files:
|
|
62
|
+
contig_fasta : str
|
|
63
|
+
Extracted single-contig FASTA
|
|
64
|
+
cons_log_np_out : str
|
|
65
|
+
Extension log for Nanopore mode
|
|
66
|
+
cons_log_ill_out : str
|
|
67
|
+
Extension log for Illumina mode
|
|
68
|
+
trunc_left_fasta, trunc_right_fasta : str
|
|
69
|
+
Truncated contigs to prevent alternative mappings
|
|
70
|
+
l_map_out, r_map_out : str
|
|
71
|
+
BAM files of consensus mapped to truncated contigs
|
|
72
|
+
l_map_out_index, r_map_out_index : str
|
|
73
|
+
BAI indices for consensus maps
|
|
74
|
+
stitch_out : str
|
|
75
|
+
Extended assembly before trimming
|
|
76
|
+
stitch_left_fasta, stitch_right_fasta : str
|
|
77
|
+
Extracted consensus sequences
|
|
78
|
+
trim_map : str
|
|
79
|
+
BAM of QC reads mapped to untrimmed assembly
|
|
80
|
+
trim_map_index : str
|
|
81
|
+
BAI index for trim_map
|
|
82
|
+
trim_out : str
|
|
83
|
+
Final trimmed extended assembly
|
|
84
|
+
|
|
85
|
+
QC files:
|
|
86
|
+
qc_out : str
|
|
87
|
+
Final QC BAM file
|
|
88
|
+
qc_out_index : str
|
|
89
|
+
BAI index for QC BAM
|
|
90
|
+
|
|
91
|
+
Notes
|
|
92
|
+
-----
|
|
93
|
+
All file paths are automatically generated from the replicon name
|
|
94
|
+
following a consistent naming convention. This ensures files are
|
|
95
|
+
traceable and organized.
|
|
96
|
+
|
|
97
|
+
The class provides methods for:
|
|
98
|
+
- cleanup_tmp_files(): Remove intermediate processing files
|
|
99
|
+
- mv_files(): Move final output files to designated directory
|
|
100
|
+
|
|
101
|
+
Files are categorized as:
|
|
102
|
+
- Temporary: Deleted after successful extension
|
|
103
|
+
- Output: Moved to results directory for user
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, name: str, org_fasta: str):
|
|
107
|
+
self.name = name
|
|
108
|
+
self.org_fasta = org_fasta
|
|
109
|
+
|
|
110
|
+
# Map files
|
|
111
|
+
self.org_map = f'{self.name}_map.bam'
|
|
112
|
+
self.org_map_index = f'{self.name}_map.bam.bai'
|
|
113
|
+
|
|
114
|
+
# Filtered files
|
|
115
|
+
self.left_sam = f'{self.name}_left.sam'
|
|
116
|
+
self.left_filt = f'{self.name}_left_filtered'
|
|
117
|
+
self.left_filt_sam = f'{self.name}_left_filtered.sam'
|
|
118
|
+
self.left_filt_fq = f'{self.name}_left_filtered.fastq'
|
|
119
|
+
|
|
120
|
+
self.right_sam = f'{self.name}_right.sam'
|
|
121
|
+
self.right_filt = f'{self.name}_right_filtered'
|
|
122
|
+
self.right_filt_sam = f'{self.name}_right_filtered.sam'
|
|
123
|
+
self.right_filt_fq = f'{self.name}_right_filtered.fastq'
|
|
124
|
+
|
|
125
|
+
# Consensus files
|
|
126
|
+
# left
|
|
127
|
+
self.l_cons_out = f'rev_{self.name}_left_cons.fasta'
|
|
128
|
+
self.l_cons_final_out = f'{self.name}_left_cons.fasta'
|
|
129
|
+
self.l_cons_alignment = f'{self.l_cons_out}.aln'
|
|
130
|
+
self.revcomp_out = f'rev_{self.left_filt_fq}'
|
|
131
|
+
# right
|
|
132
|
+
self.r_cons_final_out = f'{self.name}_right_cons.fasta'
|
|
133
|
+
self.r_cons_alignment = f'{self.r_cons_final_out}.aln'
|
|
134
|
+
|
|
135
|
+
# Extension files
|
|
136
|
+
self.contig_fasta = f'{name}.fasta'
|
|
137
|
+
|
|
138
|
+
self.cons_log_np_out = f'{self.name}_telomore_ext_np.log'
|
|
139
|
+
self.cons_log_ill_out = f'{self.name}_telomore_ill_ext.log'
|
|
140
|
+
|
|
141
|
+
# Truncated contig which discard alternative mapping points
|
|
142
|
+
self.trunc_left_fasta = f'{self.name}_trunc_left.fa'
|
|
143
|
+
self.trunc_right_fasta = f'{self.name}_trunc_right.fa'
|
|
144
|
+
|
|
145
|
+
# Maps on trunc fasta
|
|
146
|
+
self.l_map_out = f'{self.name}_left_map.bam'
|
|
147
|
+
self.r_map_out = f'{self.name}_right_map.bam'
|
|
148
|
+
self.l_map_out_index = f'{self.l_map_out}.bai'
|
|
149
|
+
self.r_map_out_index = f'{self.r_map_out}.bai'
|
|
150
|
+
|
|
151
|
+
# Extended assembly
|
|
152
|
+
self.stitch_out = f'{self.name}_telomore_untrimmed.fasta'
|
|
153
|
+
self.stitch_left_fasta = f'{self.name}_left.fasta'
|
|
154
|
+
self.stitch_right_fasta = f'{self.name}_right.fasta'
|
|
155
|
+
|
|
156
|
+
# Trim files
|
|
157
|
+
self.trim_map = f'{self.name}_telomore_untrimmed.bam'
|
|
158
|
+
self.trim_map_index = f'{self.trim_map}.bai'
|
|
159
|
+
self.trim_out = f'{self.name}_telomore_extended.fasta'
|
|
160
|
+
|
|
161
|
+
# QC_files
|
|
162
|
+
self.qc_out = f'{self.name}_telomore_QC.bam'
|
|
163
|
+
self.qc_out_index = f'{self.qc_out}.bai'
|
|
164
|
+
|
|
165
|
+
def cleanup_tmp_files(self) -> None:
|
|
166
|
+
"""
|
|
167
|
+
Remove temporary intermediate files after successful extension.
|
|
168
|
+
|
|
169
|
+
Deletes all intermediate files that are not needed in the final output,
|
|
170
|
+
including mapping files, filtered reads, consensus intermediates, and
|
|
171
|
+
truncated references. Preserves only the final extended assemblies and
|
|
172
|
+
QC files.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
None
|
|
177
|
+
Removes files from the filesystem
|
|
178
|
+
|
|
179
|
+
Notes
|
|
180
|
+
-----
|
|
181
|
+
Files removed include:
|
|
182
|
+
- Original mapping: org_map, org_map_index
|
|
183
|
+
- Terminal read SAMs: left_sam, right_sam
|
|
184
|
+
- Filtered reads: left_filt_sam, left_filt_fq, right_filt_sam, right_filt_fq
|
|
185
|
+
- Consensus intermediates: l_cons_out, l_cons_final_out, l_cons_alignment,
|
|
186
|
+
revcomp_out, r_cons_final_out, r_cons_alignment
|
|
187
|
+
- Extracted/truncated contigs: contig_fasta, trunc_left_fasta, trunc_right_fasta
|
|
188
|
+
- Consensus mappings: l_map_out, r_map_out, and their indices
|
|
189
|
+
- Stitching intermediates: stitch_left_fasta, stitch_right_fasta
|
|
190
|
+
- Trimming map: trim_map, trim_map_index
|
|
191
|
+
|
|
192
|
+
Files preserved (not deleted):
|
|
193
|
+
- stitch_out: Untrimmed extended assembly
|
|
194
|
+
- trim_out: Final trimmed extended assembly
|
|
195
|
+
- qc_out, qc_out_index: QC alignment files
|
|
196
|
+
- cons_log_np_out or cons_log_ill_out: Extension logs
|
|
197
|
+
|
|
198
|
+
Only deletes files that exist - missing files are silently skipped.
|
|
199
|
+
Call this method after successful completion of extension workflow
|
|
200
|
+
to reduce disk space usage.
|
|
201
|
+
"""
|
|
202
|
+
tmp_files = [
|
|
203
|
+
self.org_map,
|
|
204
|
+
self.org_map_index,
|
|
205
|
+
self.left_sam,
|
|
206
|
+
self.left_filt_sam,
|
|
207
|
+
self.left_filt_fq,
|
|
208
|
+
self.right_sam,
|
|
209
|
+
self.right_filt_sam,
|
|
210
|
+
self.right_filt_fq,
|
|
211
|
+
self.l_cons_out,
|
|
212
|
+
self.l_cons_final_out,
|
|
213
|
+
self.l_cons_alignment,
|
|
214
|
+
self.revcomp_out,
|
|
215
|
+
self.r_cons_final_out,
|
|
216
|
+
self.r_cons_alignment,
|
|
217
|
+
self.contig_fasta,
|
|
218
|
+
self.trunc_left_fasta,
|
|
219
|
+
self.trunc_right_fasta,
|
|
220
|
+
self.l_map_out,
|
|
221
|
+
self.l_map_out_index,
|
|
222
|
+
self.r_map_out_index,
|
|
223
|
+
self.r_map_out,
|
|
224
|
+
self.stitch_left_fasta,
|
|
225
|
+
self.stitch_right_fasta,
|
|
226
|
+
self.trim_map,
|
|
227
|
+
self.trim_map_index,
|
|
228
|
+
]
|
|
229
|
+
for path in tmp_files:
|
|
230
|
+
if os.path.exists(path):
|
|
231
|
+
os.remove(path)
|
|
232
|
+
|
|
233
|
+
def mv_files(self, folder: str, mode: str) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Move final output files to designated output directory.
|
|
236
|
+
|
|
237
|
+
Relocates the essential output files (extended assemblies, QC BAM, and
|
|
238
|
+
extension log) from the working directory to the specified output folder.
|
|
239
|
+
The log file moved depends on the sequencing mode.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
folder : str
|
|
244
|
+
Path to destination directory for output files
|
|
245
|
+
mode : str
|
|
246
|
+
Sequencing technology mode: 'nanopore' or 'illumina'
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
None
|
|
251
|
+
Moves files to the destination folder
|
|
252
|
+
|
|
253
|
+
Raises
|
|
254
|
+
------
|
|
255
|
+
FileNotFoundError
|
|
256
|
+
If any of the required output files don't exist (implicitly from shutil.move)
|
|
257
|
+
|
|
258
|
+
Notes
|
|
259
|
+
-----
|
|
260
|
+
Files moved for all modes:
|
|
261
|
+
- stitch_out: Untrimmed extended assembly
|
|
262
|
+
- trim_out: Final trimmed extended assembly
|
|
263
|
+
- qc_out: QC alignment BAM
|
|
264
|
+
- qc_out_index: QC alignment BAM index
|
|
265
|
+
|
|
266
|
+
Mode-specific files:
|
|
267
|
+
- If mode='nanopore': moves cons_log_np_out
|
|
268
|
+
- If mode='illumina': moves cons_log_ill_out
|
|
269
|
+
|
|
270
|
+
The destination folder must already exist. Files retain their
|
|
271
|
+
original names in the destination directory.
|
|
272
|
+
|
|
273
|
+
This method should be called after cleanup_tmp_files() to organize
|
|
274
|
+
the final results while removing intermediate files from the working
|
|
275
|
+
directory.
|
|
276
|
+
"""
|
|
277
|
+
keep_files = [self.stitch_out, self.trim_out, self.qc_out, self.qc_out_index]
|
|
278
|
+
|
|
279
|
+
for file in keep_files:
|
|
280
|
+
shutil.move(src=file, dst=os.path.join(folder, file))
|
|
281
|
+
if mode == 'nanopore':
|
|
282
|
+
shutil.move(
|
|
283
|
+
src=self.cons_log_np_out, dst=os.path.join(folder, self.cons_log_np_out)
|
|
284
|
+
)
|
|
285
|
+
elif mode == 'illumina':
|
|
286
|
+
shutil.move(
|
|
287
|
+
src=self.cons_log_ill_out,
|
|
288
|
+
dst=os.path.join(folder, self.cons_log_ill_out),
|
|
289
|
+
)
|