telomore 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,493 @@
1
+ """Functions for generating useful QC metrics from the telomore script."""
2
+
3
+ import csv
4
+ import os
5
+ import tempfile
6
+
7
+ from Bio import SeqIO
8
+ from Bio.Seq import Seq
9
+ from Bio.SeqRecord import SeqRecord
10
+ import pysam
11
+
12
+ from .cmd_tools import map_and_sort, map_and_sort_illumina
13
+ from .fasta_tools import (
14
+ cat_and_derep_fastq,
15
+ check_fastq_order,
16
+ dereplicate_fastq,
17
+ merge_fasta,
18
+ )
19
+ from .map_tools import sam_to_fastq, sam_to_readpair
20
+
21
+
22
+ def qc_map(
23
+ extended_assembly: str, left: str, right: str, output_handle: str, t: int = 1
24
+ ) -> None:
25
+ """
26
+ Generate QC alignment of terminal reads against extended assembly (Nanopore).
27
+
28
+ Collects terminal reads from left and right SAM files, converts them to
29
+ FASTQ, deduplicates, and maps them back to the extended assembly. Used
30
+ to validate the quality of the extension by visualizing read support.
31
+
32
+ Parameters
33
+ ----------
34
+ extended_assembly : str
35
+ Path to FASTA file of extended assembly with consensus attached
36
+ left : str
37
+ Path to SAM file containing left-terminal reads
38
+ right : str
39
+ Path to SAM file containing right-terminal reads
40
+ output_handle : str
41
+ Path for output sorted BAM file with QC alignments
42
+ t : int, default=1
43
+ Number of threads for mapping
44
+
45
+ Returns
46
+ -------
47
+ None
48
+ Writes QC alignment BAM to output_handle
49
+
50
+ Notes
51
+ -----
52
+ QC mapping workflow:
53
+ 1. Creates temporary FASTQ file
54
+ 2. Converts left SAM reads to FASTQ (appends to temp file)
55
+ 3. Converts right SAM reads to FASTQ (appends to temp file)
56
+ 4. Deduplicates the combined FASTQ to remove redundant reads
57
+ 5. Maps deduplicated reads to extended assembly using minimap2
58
+ 6. Sorts and indexes the resulting BAM file
59
+ 7. Cleans up temporary FASTQ file
60
+
61
+ The resulting BAM file can be visualized in IGV or analyzed with
62
+ samtools to assess:
63
+ - Coverage across extended regions
64
+ - Read support for consensus sequences
65
+ - Consistency of read alignments at telomeres
66
+
67
+ Deduplication is critical because the same read may appear in both
68
+ left and right terminal sets if it maps near both ends.
69
+ """
70
+ # The file has to be mode=w to create the correct type of object
71
+ # for sam_to_fastq
72
+ with tempfile.NamedTemporaryFile(
73
+ suffix='.fastq', delete=False, mode='w'
74
+ ) as temp_fastq:
75
+ temp_fastq_path = temp_fastq.name
76
+ sam_to_fastq(left, temp_fastq)
77
+ sam_to_fastq(right, temp_fastq)
78
+ dereplicate_fastq(fastq_in=temp_fastq_path, fastq_out=temp_fastq_path)
79
+ map_and_sort(extended_assembly, temp_fastq_path, output_handle, t)
80
+ os.remove(temp_fastq_path)
81
+
82
+
83
+ def qc_map_illumina(
84
+ extended_assembly: str,
85
+ left_sam: str,
86
+ right_sam: str,
87
+ fastq_in1: str,
88
+ fastq_in2: str,
89
+ output_handle: str,
90
+ t: int = 1,
91
+ ) -> None:
92
+ """
93
+ Generate QC alignment of terminal reads against extended assembly (Illumina).
94
+
95
+ Collects complete paired-end reads for all terminal alignments from left
96
+ and right SAM files, deduplicates, and maps them back to the extended
97
+ assembly. Preserves read pairing for accurate Illumina QC assessment.
98
+
99
+ Parameters
100
+ ----------
101
+ extended_assembly : str
102
+ Path to FASTA file of extended assembly with consensus attached
103
+ left_sam : str
104
+ Path to SAM file containing left-terminal read alignments
105
+ right_sam : str
106
+ Path to SAM file containing right-terminal read alignments
107
+ fastq_in1 : str
108
+ Path to original R1 FASTQ file (gzip compressed)
109
+ fastq_in2 : str
110
+ Path to original R2 FASTQ file (gzip compressed)
111
+ output_handle : str
112
+ Path for output sorted BAM file with QC alignments
113
+ t : int, default=1
114
+ Number of threads for mapping
115
+
116
+ Returns
117
+ -------
118
+ None
119
+ Writes QC alignment BAM to output_handle
120
+
121
+ Raises
122
+ ------
123
+ Exception
124
+ If FASTQ files are not properly paired or ordered
125
+
126
+ Notes
127
+ -----
128
+ QC mapping workflow for paired-end data:
129
+ 1. Creates temporary directory for intermediate files
130
+ 2. Extracts both R1 and R2 for reads in left SAM from original FASTQs
131
+ 3. Extracts both R1 and R2 for reads in right SAM from original FASTQs
132
+ 4. Concatenates and deduplicates R1 files separately
133
+ 5. Concatenates and deduplicates R2 files separately
134
+ 6. Validates that R1 and R2 files are properly synchronized
135
+ 7. Maps paired reads to extended assembly using BWA-MEM
136
+ 8. Sorts and indexes the resulting BAM file
137
+ 9. Cleans up temporary directory
138
+
139
+ The paired-end approach ensures:
140
+ - Proper insert size analysis for extended regions
141
+ - Better mapping quality through paired information
142
+ - Accurate assessment of consensus support from both read ends
143
+
144
+ File order validation is critical - BWA-MEM requires synchronized
145
+ R1/R2 pairs, and the check prevents mapping with mismatched pairs.
146
+ """
147
+ # get left paired read
148
+ with (
149
+ tempfile.TemporaryDirectory()
150
+ ) as temp_dir: # ensures files are deleted after usage
151
+ # Create multiple temporary files in the temporary directory
152
+ l_tmp1 = os.path.join(temp_dir, 'terminal_left_reads_1.fastq')
153
+ l_tmp2 = os.path.join(temp_dir, 'terminal_left_reads_2.fastq')
154
+ r_tmp1 = os.path.join(temp_dir, 'terminal_right_reads_1.fastq')
155
+ r_tmp2 = os.path.join(temp_dir, 'terminal_right_reads_2.fastq')
156
+ a_tmp1 = os.path.join(temp_dir, 'all_terminal_reads_1.fastq')
157
+ a_tmp2 = os.path.join(temp_dir, 'all_terminal_reads_2.fastq')
158
+
159
+ sam_to_readpair(
160
+ sam_in=left_sam,
161
+ fastq_in1=fastq_in1,
162
+ fastq_in2=fastq_in2,
163
+ fastq_out1=l_tmp1,
164
+ fastq_out2=l_tmp2,
165
+ )
166
+
167
+ # get right paired read
168
+
169
+ sam_to_readpair(
170
+ sam_in=right_sam,
171
+ fastq_in1=fastq_in1,
172
+ fastq_in2=fastq_in2,
173
+ fastq_out1=r_tmp1,
174
+ fastq_out2=r_tmp2,
175
+ )
176
+
177
+ # collect the paired read files:
178
+ cat_and_derep_fastq(fastq_in1=l_tmp1, fastq_in2=r_tmp1, fastq_out=a_tmp1)
179
+
180
+ cat_and_derep_fastq(fastq_in1=l_tmp2, fastq_in2=r_tmp2, fastq_out=a_tmp2)
181
+
182
+ if check_fastq_order(a_tmp1, a_tmp2):
183
+ map_and_sort_illumina(
184
+ reference=extended_assembly,
185
+ read1=a_tmp1,
186
+ read2=a_tmp2,
187
+ output=output_handle,
188
+ threads=t,
189
+ )
190
+ else:
191
+ raise Exception('FASTQ files are not properly paired or ordered.')
192
+
193
+
194
+ def cons_genome_map(
195
+ left_cons: str,
196
+ right_cons: str,
197
+ polished_genome: str,
198
+ output_handle: str,
199
+ t: int = 1,
200
+ ) -> None:
201
+ """
202
+ Map consensus sequences against the polished reference genome.
203
+
204
+ Merges left and right consensus sequences and aligns them to the final
205
+ polished genome to verify their placement and identify any issues with
206
+ consensus quality or positioning. Used for QC validation of consensus.
207
+
208
+ Parameters
209
+ ----------
210
+ left_cons : str
211
+ Path to FASTA file containing left consensus sequence
212
+ right_cons : str
213
+ Path to FASTA file containing right consensus sequence
214
+ polished_genome : str
215
+ Path to FASTA file of polished/final genome
216
+ output_handle : str
217
+ Path for output sorted BAM file with consensus alignments
218
+ t : int, default=1
219
+ Number of threads for mapping
220
+
221
+ Returns
222
+ -------
223
+ None
224
+ Writes consensus alignment BAM to output_handle
225
+
226
+ Notes
227
+ -----
228
+ This QC mapping helps identify:
229
+ - Whether consensus sequences map uniquely to their expected locations
230
+ - If consensus contains repeats that map to multiple locations
231
+ - Quality of consensus alignment (mismatches, soft-clipping)
232
+ - Whether consensus extends correctly from the reference
233
+
234
+ The temporary merged FASTA file 'all_cons.fasta' is created in the
235
+ current directory and not automatically cleaned up.
236
+
237
+ Consensus sequences should map with high identity to their respective
238
+ ends. Multiple mappings or poor alignment quality suggests the consensus
239
+ may not represent true telomeric extension.
240
+ """
241
+ merge_fasta(left_cons, right_cons, 'all_cons.fasta')
242
+ map_and_sort(polished_genome, 'all_cons.fasta', output_handle, t)
243
+
244
+
245
+ def cons_cons_map(
246
+ left_cons: str, right_cons: str, output_handle: str, t: int = 1
247
+ ) -> None:
248
+ """
249
+ Map left consensus against right consensus to detect similarity.
250
+
251
+ Aligns left and right consensus sequences against each other to identify
252
+ potential circularization or repetitive telomeric sequences. If consensus
253
+ sequences map to each other, it may indicate the chromosome is circular
254
+ or contains telomeric repeats.
255
+
256
+ Parameters
257
+ ----------
258
+ left_cons : str
259
+ Path to FASTA file containing left consensus (used as reference)
260
+ right_cons : str
261
+ Path to FASTA file containing right consensus (used as query)
262
+ output_handle : str
263
+ Path for output sorted BAM file with cross-consensus alignments
264
+ t : int, default=1
265
+ Number of threads for mapping
266
+
267
+ Returns
268
+ -------
269
+ None
270
+ Writes cross-consensus alignment BAM to output_handle
271
+
272
+ Notes
273
+ -----
274
+ Interpretation of results:
275
+ - No alignment: Linear chromosome with distinct telomeres (expected)
276
+ - High-quality alignment: May indicate:
277
+ * Circular chromosome where ends should connect
278
+ * Telomeric repeat arrays present at both ends
279
+ * Potential artifact if sequences shouldn't match
280
+
281
+ This QC check is particularly useful for:
282
+ - Bacterial genomes where circularity is expected
283
+ - Identifying repetitive telomeric sequences
284
+ - Validating that linear chromosome ends are truly distinct
285
+
286
+ Maps right consensus (query) against left consensus (reference) using
287
+ minimap2 single-read mode with sorting and indexing.
288
+ """
289
+ map_and_sort(left_cons, right_cons, output_handle, t)
290
+
291
+
292
+ def cons_length(cons_file: str, output_handle: str, offset: int = 100) -> None:
293
+ """
294
+ Write consensus sequence length statistics to TSV file.
295
+
296
+ Calculates and records the length of consensus sequences with and without
297
+ an offset adjustment. The offset represents the amount of original reference
298
+ sequence included in the consensus file for context.
299
+
300
+ Parameters
301
+ ----------
302
+ cons_file : str
303
+ Path to FASTA file containing consensus sequences
304
+ output_handle : str
305
+ Path for output TSV file with length statistics
306
+ offset : int, default=100
307
+ Number of bases of original reference included in consensus
308
+
309
+ Returns
310
+ -------
311
+ None
312
+ Writes TSV with columns: seq_id, end_cons, full_cons
313
+
314
+ Notes
315
+ -----
316
+ Output TSV format:
317
+ - Header: seq_id, end_cons, full_cons
318
+ - seq_id: Identifier of the consensus sequence
319
+ - end_cons: Length of true extension (full_cons - offset)
320
+ - full_cons: Total length including offset region
321
+
322
+ The offset adjustment is important because consensus building may
323
+ include some bases from the original reference sequence for context
324
+ and alignment purposes. The 'end_cons' value represents only the
325
+ novel sequence extending beyond the original assembly.
326
+
327
+ Example:
328
+ - Full consensus: 250bp
329
+ - Offset: 100bp
330
+ - True extension: 150bp (reported as end_cons)
331
+
332
+ Used for summarizing extension results across multiple contigs.
333
+ """
334
+ cons_file = SeqIO.parse(cons_file, 'fasta')
335
+ header = ['seq_id', 'end_cons', 'full_cons']
336
+ tsv_log = []
337
+ tsv_log.append(header)
338
+
339
+ for record in cons_file:
340
+ seq_id = record.id
341
+ seq_len = len(record)
342
+ gen_len = int(seq_len) - offset
343
+ tsv_log.append([seq_id, gen_len, seq_len])
344
+
345
+ with open(output_handle, 'w', newline='') as tsv_file:
346
+ writer = csv.writer(tsv_file, delimiter='\t')
347
+ writer.writerows(tsv_log)
348
+
349
+
350
+ def map_to_depth(bam_file: str, output_handle: str) -> None:
351
+ """
352
+ Generate position-by-position depth of coverage from BAM file.
353
+
354
+ Extracts coverage depth at every position in the reference using samtools
355
+ depth. Creates a tab-delimited file showing reference name, position, and
356
+ coverage at each position. Used for visualizing coverage profiles.
357
+
358
+ Parameters
359
+ ----------
360
+ bam_file : str
361
+ Path to input BAM alignment file
362
+ output_handle : str
363
+ Path for output depth file
364
+
365
+ Returns
366
+ -------
367
+ None
368
+ Writes depth information to output_handle
369
+
370
+ Notes
371
+ -----
372
+ Uses 'samtools depth -aa' which:
373
+ - -aa: Output absolutely all positions, including zero-coverage
374
+ - Ensures complete coverage profile even for uncovered regions
375
+
376
+ Output format (tab-delimited):
377
+ - Column 1: Reference sequence name
378
+ - Column 2: Position (1-based)
379
+ - Column 3: Coverage depth at that position
380
+
381
+ The output file can be:
382
+ - Plotted to visualize coverage across genome
383
+ - Used to identify low-coverage regions
384
+ - Analyzed to assess quality of consensus extensions
385
+ - Imported into visualization tools like R or Python
386
+
387
+ Particularly useful for QC visualization to show how coverage
388
+ changes across telomeric regions and consensus extensions.
389
+ """
390
+ pysam.depth('-aa', bam_file, '-o', output_handle)
391
+
392
+
393
+ def finalize_log(log: str, right_fasta: str, left_fasta: str) -> None:
394
+ """
395
+ Finalize extension log by prepending final consensus lengths and sequences.
396
+
397
+ Rewrites the log file with a summary section at the top showing the final
398
+ validated consensus lengths after trimming, followed by the original log
399
+ content documenting the extension process. Extracts trimmed consensus
400
+ sequences and includes them in the final summary.
401
+
402
+ Parameters
403
+ ----------
404
+ log : str
405
+ Path to extension log file to finalize (will be overwritten)
406
+ right_fasta : str
407
+ Path to FASTA file containing right consensus sequence
408
+ left_fasta : str
409
+ Path to FASTA file containing left consensus sequence
410
+
411
+ Returns
412
+ -------
413
+ None
414
+ Overwrites log file with finalized version including summary header
415
+
416
+ Notes
417
+ -----
418
+ Log processing steps:
419
+ 1. Reads existing log content
420
+ 2. Extracts original consensus lengths from line 4
421
+ 3. Extracts trimming information from last two lines
422
+ 4. Calculates final lengths: original_length - trimmed_bases
423
+ 5. Extracts trimmed portion of consensus sequences
424
+ 6. Writes new header section with final results
425
+ 7. Appends original log content below header
426
+
427
+ Final log structure:
428
+ - FINAL GENOME EXTENSION header
429
+ - Final consensus lengths (may show 'rejected' if validation failed)
430
+ - Final consensus sequences (trimmed portions only)
431
+ - Separator line
432
+ - Original log content (initial consensus, trimming details)
433
+ - Closing separator
434
+
435
+ Handles rejected consensus:
436
+ - If 'rejected' in trim log: Shows 'rejected' instead of length
437
+ - Creates empty SeqRecord for rejected consensus
438
+ - For accepted consensus: Shows length and trimmed sequence
439
+
440
+ The final log provides a complete record of:
441
+ - What extensions were added (top section)
442
+ - How they were generated and validated (original log below)
443
+ """
444
+ file = open(log)
445
+ log_cont = file.readlines()
446
+ file.close()
447
+ # Org lengths of consensus added
448
+ length_lines = log_cont[3]
449
+ left_len = int(length_lines.split('\t')[0].split(':')[1])
450
+ right_len = int(length_lines.split('\t')[1].split(':')[1])
451
+
452
+ # get the number of bases trimmed off
453
+ trim_left = log_cont[-2].split(' ')[-1]
454
+ trim_right = log_cont[-1].split(' ')[-1]
455
+ left_seq = SeqIO.read(left_fasta, 'fasta')
456
+ right_seq = SeqIO.read(right_fasta, 'fasta')
457
+
458
+ if trim_left.rstrip() == 'rejected':
459
+ new_left = 'rejected'
460
+ left_seq = SeqRecord(Seq(''))
461
+ else:
462
+ new_left = left_len - int(trim_left)
463
+ left_seq = left_seq[int(trim_left) :]
464
+ if trim_right.rstrip() == 'rejected':
465
+ new_right = 'rejected'
466
+ right_seq = SeqRecord(Seq(''))
467
+ else:
468
+ new_right = right_len - int(trim_right)
469
+ right_seq = right_seq[0:new_right]
470
+
471
+ final_lengths = 'left_cons:{}\tright_consensus:{}'.format(new_left, new_right)
472
+
473
+ # write to log file
474
+ file = open(log, 'w')
475
+ file.write(
476
+ '=============================================================================='
477
+ )
478
+ file.write('\nFINAL GENOME EXTENSION')
479
+ file.write(
480
+ '\n==============================================================================\n'
481
+ )
482
+ file.write(final_lengths)
483
+ file.write('\n>left_cons\n')
484
+ file.write(str(left_seq.seq))
485
+ file.write('\n>right_cons\n')
486
+ file.write(str(right_seq.seq))
487
+ file.write('\n')
488
+ for line in log_cont:
489
+ file.write(line)
490
+ file.write(
491
+ '==============================================================================\n'
492
+ )
493
+ file.close()
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: telomore
3
+ Version: 0.4.1
4
+ Summary: Identify and extract telomeric sequences from Oxford Nanopore or Illumina sequencing reads to extend Streptomycetes assemblies.
5
+ Project-URL: documentation, https://github.com/dalofa/telomore
6
+ Project-URL: homepage, https://github.com/dalofa/telomore
7
+ Project-URL: repository, https://github.com/dalofa/telomore
8
+ Author-email: David Faurdal <dalofa@biosustain.dtu.dk>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
14
+ Requires-Python: >=3.9
15
+ Requires-Dist: biopython
16
+ Requires-Dist: pysam
17
+ Provides-Extra: dev
18
+ Requires-Dist: hatch; extra == 'dev'
19
+ Requires-Dist: isort; extra == 'dev'
20
+ Requires-Dist: numpydoc-validation; extra == 'dev'
21
+ Requires-Dist: pre-commit; extra == 'dev'
22
+ Requires-Dist: pydocstyle; extra == 'dev'
23
+ Requires-Dist: ruff; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # TELOMORE
27
+
28
+ Telomore is a tool for identifying and extracting telomeric sequences from
29
+ **Oxford Nanopore** or **Illumina** sequencing reads of *Streptomycetes spp.*
30
+ that have been excluded from a *de novo* assembly. It processes sequencing data
31
+ to extend assemblies, generate quality control (QC) maps, and produce finalized
32
+ assemblies with the telomere/recessed bases included.
33
+
34
+ ## Before running Telomore
35
+
36
+ Telomore does not identify linear contigs but rather rely on the user to provide
37
+ that information in the header of the fasta-reference file.
38
+
39
+ ## Usage
40
+
41
+ ```bash
42
+ telomore --mode <mode> --reference <reference.fasta> [options]
43
+ ```
44
+
45
+ Required Arguments
46
+
47
+ - `--mode` Specify the sequencing platform. Options: nanopore or illumina.
48
+ - `--reference` Path to the reference genome file in FASTA format.
49
+
50
+ Nanopore-Specific Arguments
51
+
52
+ - `--single` Path to a single gzipped FASTQ file containing Nanopore reads.
53
+
54
+ Illumina-Specific Arguments
55
+
56
+ - `--read1` Path to gzipped FASTQ file for Illumina read 1.
57
+ - `--read2` Path to gzipped FASTQ file for Illumina read 2.
58
+
59
+ Optional Arguments
60
+
61
+ - `--coverage_threshold` Set the threshold for coverage to stop trimming during
62
+ consensus trimming (Default is coverage=5 for ONT reads and coverage=1 for
63
+ Illumina reads).
64
+ - `--quality_threshold` Set the Q-score required to count a read position in the
65
+ coverage calculation during consensus trimming (Default is Q-score=10 for ONT
66
+ reads and Q-score=30 for Illumina reads).
67
+ - `--threads` Number of threads to use (default: 1).
68
+ - `--keep` Retain intermediate files (default: False).
69
+ - `--quiet` Suppress console logging.
70
+
71
+ ## Process overview
72
+
73
+ The process is as follows:
74
+
75
+ 1. **Map Reads:**
76
+ Reads are mapped against all contigs in a reference using either minimap2 or
77
+ Bowtie2.
78
+ 2. **Extract Extending Reads**
79
+ Extending reads that are mapped to the ends of linear contigs are extracted.
80
+ 3. **Build Consensus**
81
+ The terminal extending reads from each end is used to construct a consensus
82
+ using either lamassemble or mafft + EMBOSS cons
83
+ 4. **Align and Attach consensus**
84
+ The consensus for each end is aligned to the reference and used to extend it.
85
+ 5. **Trim Extended Replicon**
86
+ In a final step, all terminally mapped reads are mapped to the new extended
87
+ reference and used to trim away spurious sequence, based on read-support.
88
+
89
+ ## Outputs
90
+
91
+ At the end of a run Telomore produces the following outputs:
92
+
93
+ ```Output
94
+ ├── {fasta_basename}_{seqtype}_telomore
95
+ │ ├── {contig_name}_telomore_extended.fasta
96
+ │ ├── {contig_name}_telomore_ext_{seqtype}.log
97
+ │ ├── {contig_name}_telomore_QC.bam
98
+ │ ├── {contig_name}_telomore_QC.bam.bai
99
+ │ ├── {contig_name}_telomore_untrimmed.fasta
100
+ │ └── {fasta_basename}_telomore.fasta
101
+ └── telomore.log # log containing run information.
102
+ ```
103
+
104
+ In the folder there is a number of files generated for each contig considered:
105
+
106
+ | File Name | Description |
107
+ |-----------|-------------|
108
+ | `{contig_name}_telomore_extended.fasta` | Original contig sequence + added terminal bases - trimmed bases |
109
+ | `{contig_name}_telomore_ext_{seqtype}.log` | Log contianing information about bases added, trimmed off and final result. |
110
+ | `{contig_name}_telomore_QC.bam` | BAM file containing terminal reads mapped to `{contig_name}_telomore_extended.fasta`. Useful for manual inspection of the extension|
111
+ | `{contig_name}_telomore_QC.bam.bai` | Index file for the corresponding BAM file. |
112
+ | `{contig_name}_telomore_untrimmed.fasta` | Original contig sequence + added terminal bases |
113
+
114
+ Additionally, there is a fasta-file collecting all tagged linear contigs as they
115
+ appear in `{contig_name}_telomore_extended.fasta` together with all non-linear
116
+ contigs in the order they appear in the original file.
117
+
118
+ Inspecting the {contig_name}_QC.bam-file in IGV (Integrative Genomics Viewer)
119
+ can be informative in evaluating the extended contig.
120
+
121
+ ## Dependencies (CLI-tools)
122
+
123
+ - Bowtie2
124
+ - Emboss tools (cons specifically)
125
+ - Lamassemble
126
+ - LAST-DB
127
+ - Mafft
128
+ - Minimap2, version 2.25 or higher
129
+ - Samtools
130
+
131
+ These can be installed using the conda recipe in this repo:
132
+
133
+ ```bash
134
+ conda env create -f environment.yml -y
135
+ ```
136
+
137
+ This repo can then be downloaded using git clone, the conda enviroment activated
138
+ and the tool installed
139
+
140
+ ```bash
141
+ # Activate telomore conda env
142
+ conda activate telomore
143
+
144
+ # Clone telomore repo
145
+ git clone https://github.com/dalofa/telomore && cd telomore
146
+
147
+ # Install package
148
+ pip install -e '.[dev]'
149
+ ```
@@ -0,0 +1,15 @@
1
+ telomore/__init__.py,sha256=-yilfTa-JZQ_2VfYWowO54WFEDInH1h-OGWaw88McfI,244
2
+ telomore/_version.py,sha256=k7cu0JKra64gmMNU_UfA5sw2eNc_GRvf3QmesiYAy8g,704
3
+ telomore/app.py,sha256=A3JuFRZHQYxjiVqwofo8w56okmdVDHf1C4zA4klfCl4,17935
4
+ telomore/utils/__init__.py,sha256=6LPHIWv6ARRiIY6ys2_uLJmzmpZYeM8cTuoKWG9ukGU,30
5
+ telomore/utils/arg_parser.py,sha256=UH0sRL14YyuV4PCMzkeR329fD-x5xR6yHA-6UD7q_us,7349
6
+ telomore/utils/classes_and_small_func.py,sha256=Xf3ytB-CSlCYvdFfIVsL74k8jq8nNdZKqEtMy2bMSAw,10173
7
+ telomore/utils/cmd_tools.py,sha256=4EIZLhWZR1fvzIny22KjqRo8IPFEAGL1QHeANTu6ipQ,26949
8
+ telomore/utils/fasta_tools.py,sha256=dZc4lTlkZu6JZoBb9e6EuJ3bC4GnxijiUKV3_JbXcSY,19798
9
+ telomore/utils/map_tools.py,sha256=0cIlIyyjbBWAQT46SYwvQ6eKsF-0RLmyz8udVaJPFaE,47804
10
+ telomore/utils/qc_reports.py,sha256=Mcyn3S1As6Drd9TuTywRyTdA5qfb3msRozs_NhWFYAw,16849
11
+ telomore-0.4.1.dist-info/METADATA,sha256=HxgTEyLYFR9yWpJaARiDCjSojpKFLmOyuqKBnNl2Nzs,5427
12
+ telomore-0.4.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
+ telomore-0.4.1.dist-info/entry_points.txt,sha256=imwQdQxdlhqz5NeIIiqWRbR9jYh1cy6sIJpY-FTiGgA,53
14
+ telomore-0.4.1.dist-info/licenses/LICENSE,sha256=otCsiAo74jRQIibnrWLcyZ9qk-0c2pMP7Xl984uh-Cs,1088
15
+ telomore-0.4.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ telomore = telomore.app:entrypoint
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Technical University of Denmark
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.