split3c 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- split3c/__init__.py +0 -0
- split3c/cli.py +336 -0
- split3c/nssite/__init__.py +0 -0
- split3c/nssite/auxiliary.py +190 -0
- split3c/nssite/bam.py +299 -0
- split3c/nssite/fastq.py +148 -0
- split3c/nssite/main.py +368 -0
- split3c/nssite/processmanager.py +51 -0
- split3c/nssite/split.py +849 -0
- split3c/resite/__init__.py +33 -0
- split3c/resite/frag.py +576 -0
- split3c/resite/header.py +91 -0
- split3c/resite/index.py +236 -0
- split3c/resite/main.py +506 -0
- split3c/resite/pretreatment.py +299 -0
- split3c/resite/read.py +91 -0
- split3c/resite/write_control.py +111 -0
- split3c/resolve/__init__.py +0 -0
- split3c/resolve/bam.py +129 -0
- split3c/resolve/io_utils.py +77 -0
- split3c/resolve/main.py +506 -0
- split3c/resolve/pairs.py +56 -0
- split3c/resolve/parse.py +1218 -0
- split3c-0.0.1.dist-info/METADATA +100 -0
- split3c-0.0.1.dist-info/RECORD +29 -0
- split3c-0.0.1.dist-info/WHEEL +5 -0
- split3c-0.0.1.dist-info/entry_points.txt +5 -0
- split3c-0.0.1.dist-info/licenses/LICENSE +235 -0
- split3c-0.0.1.dist-info/top_level.txt +1 -0
split3c/resolve/main.py
ADDED
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parse and filter remapped multiplexes from Microsplit / Parasplit outputs.
|
|
3
|
+
|
|
4
|
+
Build `.pairs` files from remapped split BAM pairs.
|
|
5
|
+
Also build `.pairs` files from from classic bam.
|
|
6
|
+
|
|
7
|
+
Copyright © 2026 Samir Bertache
|
|
8
|
+
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
from typing import Any, Optional
|
|
17
|
+
|
|
18
|
+
from .parse import parse_to_pairs
|
|
19
|
+
|
|
20
|
+
__version__ = "0.0.4"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _try_rich() -> Optional[dict[str, Any]]:
|
|
24
|
+
try:
|
|
25
|
+
from rich import box
|
|
26
|
+
from rich.console import Console
|
|
27
|
+
from rich.panel import Panel
|
|
28
|
+
from rich.table import Table
|
|
29
|
+
from rich.theme import Theme
|
|
30
|
+
from rich.traceback import install
|
|
31
|
+
from rich_argparse import RichHelpFormatter
|
|
32
|
+
except Exception:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
console = Console(
|
|
36
|
+
theme=Theme(
|
|
37
|
+
{
|
|
38
|
+
"info": "dim cyan",
|
|
39
|
+
"error": "bold red",
|
|
40
|
+
"warning": "magenta",
|
|
41
|
+
"ok": "bold green",
|
|
42
|
+
}
|
|
43
|
+
),
|
|
44
|
+
width=110,
|
|
45
|
+
)
|
|
46
|
+
install(console=console)
|
|
47
|
+
return {
|
|
48
|
+
"console": console,
|
|
49
|
+
"Panel": Panel,
|
|
50
|
+
"Table": Table,
|
|
51
|
+
"box": box,
|
|
52
|
+
"RichHelpFormatter": RichHelpFormatter,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
_R = _try_rich()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MyArgumentParser(argparse.ArgumentParser):
|
|
60
|
+
def error(self, message: str) -> None:
|
|
61
|
+
if _R is not None:
|
|
62
|
+
console = _R["console"]
|
|
63
|
+
Panel = _R["Panel"]
|
|
64
|
+
console.print(
|
|
65
|
+
Panel(
|
|
66
|
+
f"[bold red]Error:[/bold red] {message}",
|
|
67
|
+
title="Incorrect arguments",
|
|
68
|
+
expand=True,
|
|
69
|
+
width=110,
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
self.print_help()
|
|
73
|
+
self.exit(2)
|
|
74
|
+
|
|
75
|
+
self.print_usage(sys.stderr)
|
|
76
|
+
self.exit(2, f"{self.prog}: error: {message}\n")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _formatter_class():
|
|
80
|
+
if _R is None:
|
|
81
|
+
return argparse.RawTextHelpFormatter
|
|
82
|
+
|
|
83
|
+
from rich_argparse import RawTextRichHelpFormatter
|
|
84
|
+
|
|
85
|
+
return lambda prog: RawTextRichHelpFormatter(
|
|
86
|
+
prog,
|
|
87
|
+
max_help_position=42,
|
|
88
|
+
width=110,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _print_banner() -> None:
|
|
93
|
+
if _R is None:
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
console = _R["console"]
|
|
97
|
+
Panel = _R["Panel"]
|
|
98
|
+
console.print(
|
|
99
|
+
Panel(
|
|
100
|
+
"[bold blue]splitparse[/bold blue]\n"
|
|
101
|
+
"Parse remapped split BAM input into `.pairs` files.\n"
|
|
102
|
+
"Supports either two synchronized BAM files or one interleaved BAM.\n\n"
|
|
103
|
+
"Use --help to see detailed options.",
|
|
104
|
+
title="[bold green]splitparse[/bold green]",
|
|
105
|
+
subtitle=f"Version: {__version__}",
|
|
106
|
+
expand=True,
|
|
107
|
+
width=110,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
console.print("")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _print_summary(args: argparse.Namespace) -> None:
|
|
114
|
+
if _R is None:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
console = _R["console"]
|
|
118
|
+
Table = _R["Table"]
|
|
119
|
+
box = _R["box"]
|
|
120
|
+
|
|
121
|
+
table = Table(
|
|
122
|
+
show_edge=True,
|
|
123
|
+
title="[bold green]Run summary[/bold green]",
|
|
124
|
+
box=box.HEAVY,
|
|
125
|
+
width=110,
|
|
126
|
+
)
|
|
127
|
+
table.add_column("Key", style="cyan", no_wrap=True)
|
|
128
|
+
table.add_column("Value", style="magenta")
|
|
129
|
+
|
|
130
|
+
table.add_row("--single-bam", str(args.single_bam))
|
|
131
|
+
table.add_row("--bam-forward", str(args.bam_forward))
|
|
132
|
+
table.add_row("--bam-reverse", str(args.bam_reverse))
|
|
133
|
+
table.add_row("--mode", "simple" if args.simple else "split")
|
|
134
|
+
table.add_row("--out-pairs", str(args.out_pairs))
|
|
135
|
+
table.add_row("-o/--out-true-multiplex", str(args.out_true_multiplex))
|
|
136
|
+
table.add_row("--out-duplex", str(args.out_duplex))
|
|
137
|
+
table.add_row("--min-mapq", str(args.min_mapq))
|
|
138
|
+
table.add_row("--adjacent-gap-max", str(args.adjacent_gap_max))
|
|
139
|
+
table.add_row("--terminal-center-gap-max", str(args.terminal_center_gap_max))
|
|
140
|
+
table.add_row("--strict-complete-cover", str(args.strict_complete_cover))
|
|
141
|
+
table.add_row("--terminal-overlap-tolerance", str(args.terminal_overlap_tolerance))
|
|
142
|
+
table.add_row("--assembly", str(args.assembly))
|
|
143
|
+
table.add_row("--bam-threads", str(args.bam_threads))
|
|
144
|
+
table.add_row("--out-threads", str(args.out_threads))
|
|
145
|
+
table.add_row("--write-batch-size", str(args.write_batch_size))
|
|
146
|
+
table.add_row("--qual-stats", str(args.qual_stats))
|
|
147
|
+
table.add_row("--sam", str(args.sam))
|
|
148
|
+
table.add_row("--filter-stats", str(args.filter_stats))
|
|
149
|
+
table.add_row("--no-flip", str(args.no_flip))
|
|
150
|
+
table.add_row("--force", str(args.force))
|
|
151
|
+
table.add_row("--verbose", str(args.verbose))
|
|
152
|
+
|
|
153
|
+
console.print(table)
|
|
154
|
+
console.print("")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def validate_args(args: argparse.Namespace) -> None:
|
|
158
|
+
def _file_exists(path: str, what: str) -> None:
|
|
159
|
+
if not os.path.exists(path):
|
|
160
|
+
raise ValueError(f"{what}: file not found: {path}")
|
|
161
|
+
if not os.path.isfile(path):
|
|
162
|
+
raise ValueError(f"{what}: not a file: {path}")
|
|
163
|
+
|
|
164
|
+
def _parent_writable(path: str, what: str) -> None:
|
|
165
|
+
parent = os.path.dirname(os.path.abspath(path)) or os.getcwd()
|
|
166
|
+
if not os.path.exists(parent):
|
|
167
|
+
raise ValueError(f"{what}: parent directory does not exist: {parent}")
|
|
168
|
+
if not os.access(parent, os.W_OK):
|
|
169
|
+
raise ValueError(f"{what}: parent directory not writable: {parent}")
|
|
170
|
+
|
|
171
|
+
def _check_output(path: Optional[str], what: str) -> None:
|
|
172
|
+
if path is None:
|
|
173
|
+
return
|
|
174
|
+
_parent_writable(path, what)
|
|
175
|
+
if not args.force and os.path.exists(path):
|
|
176
|
+
raise ValueError(f"{what}: output already exists: {path} (use --force)")
|
|
177
|
+
|
|
178
|
+
_file_exists(args.bam_forward, "Input BAM")
|
|
179
|
+
|
|
180
|
+
if args.single_bam:
|
|
181
|
+
if args.bam_reverse is not None:
|
|
182
|
+
raise ValueError("--bam-reverse must not be provided with --single-bam")
|
|
183
|
+
else:
|
|
184
|
+
if args.bam_reverse is None:
|
|
185
|
+
raise ValueError("--bam-reverse is required unless --single-bam is used")
|
|
186
|
+
_file_exists(args.bam_reverse, "Reverse BAM")
|
|
187
|
+
|
|
188
|
+
# outputs distincts
|
|
189
|
+
paths = [p for p in [args.out_pairs, args.out_duplex, args.out_true_multiplex] if p]
|
|
190
|
+
if len(paths) != len(set(paths)):
|
|
191
|
+
raise ValueError("All output paths must be distinct")
|
|
192
|
+
|
|
193
|
+
if args.simple:
|
|
194
|
+
if args.out_pairs is None:
|
|
195
|
+
raise ValueError("--simple requires --out-pairs")
|
|
196
|
+
if args.out_duplex or args.out_true_multiplex:
|
|
197
|
+
raise ValueError("--simple forbids --out-resolved/--out-true-multiplex")
|
|
198
|
+
|
|
199
|
+
if args.split:
|
|
200
|
+
# strict spec :
|
|
201
|
+
has_out_pairs = args.out_pairs is not None
|
|
202
|
+
has_split_outputs = (args.out_duplex is not None) or (
|
|
203
|
+
args.out_true_multiplex is not None
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# soit out_pairs, soit (resolved + true multiplex)
|
|
207
|
+
if has_out_pairs and has_split_outputs:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
"--split: choose either --out-pairs OR (--out-resolved and --out-true-multiplex), not both"
|
|
210
|
+
)
|
|
211
|
+
if not has_out_pairs:
|
|
212
|
+
if args.out_duplex is None or args.out_true_multiplex is None:
|
|
213
|
+
raise ValueError(
|
|
214
|
+
"--split without --out-pairs requires BOTH --out-resolved and --out-true-multiplex"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if args.out_pairs:
|
|
218
|
+
_check_output(args.out_pairs, "All pairs output")
|
|
219
|
+
|
|
220
|
+
if args.out_true_multiplex:
|
|
221
|
+
_check_output(args.out_true_multiplex, "True multiplex pairs output")
|
|
222
|
+
_check_output(args.out_duplex, "Duplex pairs output")
|
|
223
|
+
|
|
224
|
+
if args.min_mapq < 0:
|
|
225
|
+
raise ValueError("--min-mapq must be >= 0")
|
|
226
|
+
if args.adjacent_gap_max < 0:
|
|
227
|
+
raise ValueError("--adjacent-gap-max must be >= 0")
|
|
228
|
+
if args.terminal_center_gap_max < 0:
|
|
229
|
+
raise ValueError("--terminal-center-gap-max must be >= 0")
|
|
230
|
+
if args.terminal_overlap_tolerance < 0:
|
|
231
|
+
raise ValueError("--terminal-overlap-tolerance must be >= 0")
|
|
232
|
+
if args.bam_threads < 1:
|
|
233
|
+
raise ValueError("--bam-threads must be >= 1")
|
|
234
|
+
if args.out_threads < 1:
|
|
235
|
+
raise ValueError("--out-threads must be >= 1")
|
|
236
|
+
if args.write_batch_size < 1:
|
|
237
|
+
raise ValueError("--write-batch-size must be >= 1")
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
241
|
+
parser = MyArgumentParser(
|
|
242
|
+
description=(
|
|
243
|
+
"Parse remapped/mapped split BAM pairs into `.pairs` outputs.\n"
|
|
244
|
+
"The parser keeps the first alignment seen per tag, filters low-MAPQ slots,\n"
|
|
245
|
+
"merges adjacent same-origin fragments when their genomic gap is small,\n"
|
|
246
|
+
"optionally collapses the terminal forward/reverse pair when compatible,\n"
|
|
247
|
+
"and emits pairs for molecules that remain multiplex or become resolved."
|
|
248
|
+
),
|
|
249
|
+
epilog=(
|
|
250
|
+
"Examples:\n"
|
|
251
|
+
"\n\tOne output (simple event : duplex) : \n\tsplitparse --simple -1 R1.bam -2 R2.bam --out-pairs out.pairs.gz\n"
|
|
252
|
+
"\n\tGlobal output (all pairs) : \n\tsplitparse --split -1 R1_split.bam -2 R2_split.bam --out-pairs all.pairs.gz\n"
|
|
253
|
+
"\n\tSeparate outputs (resolved + true multiplex) : \n\tsplitparse --split -1 R1_split.bam -2 R2_split.bam -o true_multiplex.pairs.gz --out-resolved resolved.pairs.gz\n"
|
|
254
|
+
"\n\tSingle bam : \n\tsplitparse --simple -1 merged.bam --single- --out-pairs out.pairs.gz"
|
|
255
|
+
),
|
|
256
|
+
formatter_class=_formatter_class(),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
req = parser.add_argument_group("Required inputs")
|
|
260
|
+
out = parser.add_argument_group("Outputs")
|
|
261
|
+
flt = parser.add_argument_group("Filtering parameters")
|
|
262
|
+
extra = parser.add_argument_group("Optional pair columns")
|
|
263
|
+
perf = parser.add_argument_group("Performance")
|
|
264
|
+
misc = parser.add_argument_group("Misc")
|
|
265
|
+
|
|
266
|
+
mode = parser.add_mutually_exclusive_group(required=True)
|
|
267
|
+
mode.add_argument(
|
|
268
|
+
"--simple",
|
|
269
|
+
action="store_true",
|
|
270
|
+
help="Classic .pairs from BAM input (ignore split tags). Use it to transform your unsplit bam to .pairs",
|
|
271
|
+
)
|
|
272
|
+
mode.add_argument(
|
|
273
|
+
"--split",
|
|
274
|
+
action="store_true",
|
|
275
|
+
help="Split-aware mode (parse tags if present; fallback simple).",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
req.add_argument(
|
|
279
|
+
"-1",
|
|
280
|
+
"--bam-forward",
|
|
281
|
+
required=True,
|
|
282
|
+
help="Path to R1 split BAM, or to a single interleaved BAM if --single-bam is used.",
|
|
283
|
+
)
|
|
284
|
+
req.add_argument(
|
|
285
|
+
"-2",
|
|
286
|
+
"--bam-reverse",
|
|
287
|
+
default=None,
|
|
288
|
+
help="Path to R2 split BAM. Not required if --single-bam is used.",
|
|
289
|
+
)
|
|
290
|
+
req.add_argument(
|
|
291
|
+
"--single-bam",
|
|
292
|
+
action="store_true",
|
|
293
|
+
help="Use one interleaved BAM instead of two synchronized BAM files.",
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
out.add_argument(
|
|
297
|
+
"--out-pairs",
|
|
298
|
+
default=None,
|
|
299
|
+
help="Output pairs for --simple or global output for --split.",
|
|
300
|
+
)
|
|
301
|
+
out.add_argument(
|
|
302
|
+
"-o",
|
|
303
|
+
"--out-true-multiplex",
|
|
304
|
+
default=None,
|
|
305
|
+
help="Output true multiplex pairs (split mode).",
|
|
306
|
+
)
|
|
307
|
+
out.add_argument(
|
|
308
|
+
"--out-duplex",
|
|
309
|
+
default=None,
|
|
310
|
+
help="Output resolved pairs which become duplex (split mode).",
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
out.add_argument(
|
|
314
|
+
"--force",
|
|
315
|
+
action="store_true",
|
|
316
|
+
help="Overwrite existing output files.",
|
|
317
|
+
)
|
|
318
|
+
out.add_argument(
|
|
319
|
+
"--sam",
|
|
320
|
+
action="store_true",
|
|
321
|
+
help=(
|
|
322
|
+
"Append two extra columns, sam1 and sam2, containing the raw SAM lines "
|
|
323
|
+
"of the emitted pair sides."
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
flt.add_argument(
|
|
328
|
+
"--strict-complete-cover",
|
|
329
|
+
action="store_true",
|
|
330
|
+
help=(
|
|
331
|
+
"Require each molecule block to expose all declared tags (FT+RT)."
|
|
332
|
+
"If not, the block is marked incomplete_cover and no pairs are emitted."
|
|
333
|
+
),
|
|
334
|
+
)
|
|
335
|
+
flt.add_argument(
|
|
336
|
+
"--min-mapq",
|
|
337
|
+
type=int,
|
|
338
|
+
default=1,
|
|
339
|
+
help="Minimum MAPQ required for one alignment to be considered valid.",
|
|
340
|
+
)
|
|
341
|
+
flt.add_argument(
|
|
342
|
+
"--adjacent-gap-max",
|
|
343
|
+
type=int,
|
|
344
|
+
default=5,
|
|
345
|
+
help="Maximum strand-aware genomic gap allowed to merge two adjacent fragments from the same origin (origin means forward or reverse).",
|
|
346
|
+
)
|
|
347
|
+
flt.add_argument(
|
|
348
|
+
"--terminal-center-gap-max",
|
|
349
|
+
type=int,
|
|
350
|
+
default=300,
|
|
351
|
+
help="Maximum central gap allowed to collapse the terminal forward/reverse pair.",
|
|
352
|
+
)
|
|
353
|
+
flt.add_argument(
|
|
354
|
+
"--terminal-overlap-tolerance",
|
|
355
|
+
type=int,
|
|
356
|
+
default=1,
|
|
357
|
+
help="Allowed negative overlap when testing terminal forward/reverse collapse.",
|
|
358
|
+
)
|
|
359
|
+
flt.add_argument(
|
|
360
|
+
"--assembly",
|
|
361
|
+
default=None,
|
|
362
|
+
help="Optional assembly name to store in the `.pairs` header.",
|
|
363
|
+
)
|
|
364
|
+
flt.add_argument(
|
|
365
|
+
"--no-flip",
|
|
366
|
+
action="store_true",
|
|
367
|
+
help="Do not reorder pairs into genomic upper-triangle order.",
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
extra.add_argument(
|
|
371
|
+
"--qual-stats",
|
|
372
|
+
action="store_true",
|
|
373
|
+
help="Add len1 len2 mapq1 mapq2 columns to output pairs.",
|
|
374
|
+
)
|
|
375
|
+
extra.add_argument(
|
|
376
|
+
"--filter-stats",
|
|
377
|
+
action="store_true",
|
|
378
|
+
help="Add hard_merged terminal_aliased columns to output pairs.",
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
perf.add_argument(
|
|
382
|
+
"--bam-threads",
|
|
383
|
+
type=int,
|
|
384
|
+
default=3,
|
|
385
|
+
help="Number of threads used for BAM reading.",
|
|
386
|
+
)
|
|
387
|
+
perf.add_argument(
|
|
388
|
+
"--out-threads",
|
|
389
|
+
type=int,
|
|
390
|
+
default=3,
|
|
391
|
+
help="Number of threads used by the output compression backend.",
|
|
392
|
+
)
|
|
393
|
+
perf.add_argument(
|
|
394
|
+
"--write-batch-size",
|
|
395
|
+
type=int,
|
|
396
|
+
default=50000,
|
|
397
|
+
help="Number of pair lines buffered before flushing to disk.",
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
misc.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
401
|
+
misc.add_argument(
|
|
402
|
+
"-v",
|
|
403
|
+
"--verbose",
|
|
404
|
+
action="count",
|
|
405
|
+
default=0,
|
|
406
|
+
help="Increase verbosity (-v, -vv).",
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
return parser
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def main_cli(argv: list[str] | None = None) -> int:
|
|
413
|
+
_print_banner()
|
|
414
|
+
|
|
415
|
+
parser = build_parser()
|
|
416
|
+
args = parser.parse_args(argv)
|
|
417
|
+
|
|
418
|
+
level = logging.WARNING
|
|
419
|
+
if args.verbose == 1:
|
|
420
|
+
level = logging.INFO
|
|
421
|
+
elif args.verbose >= 2:
|
|
422
|
+
level = logging.DEBUG
|
|
423
|
+
logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
|
|
424
|
+
|
|
425
|
+
try:
|
|
426
|
+
validate_args(args)
|
|
427
|
+
except ValueError as exc:
|
|
428
|
+
if _R is not None:
|
|
429
|
+
Panel = _R["Panel"]
|
|
430
|
+
_R["console"].print(
|
|
431
|
+
Panel(
|
|
432
|
+
f"[bold red]{exc}[/bold red]",
|
|
433
|
+
title="Validation",
|
|
434
|
+
expand=True,
|
|
435
|
+
width=110,
|
|
436
|
+
)
|
|
437
|
+
)
|
|
438
|
+
parser.print_help()
|
|
439
|
+
else:
|
|
440
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
441
|
+
parser.print_help(sys.stderr)
|
|
442
|
+
return 2
|
|
443
|
+
|
|
444
|
+
_print_summary(args)
|
|
445
|
+
mode = "simple" if args.simple else "split"
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
counts = parse_to_pairs(
|
|
449
|
+
bam_for_path=args.bam_forward,
|
|
450
|
+
bam_rev_path=args.bam_reverse,
|
|
451
|
+
mode=mode,
|
|
452
|
+
out_pairs=args.out_pairs,
|
|
453
|
+
out_duplex=args.out_duplex,
|
|
454
|
+
out_true_multiplex_pairs=args.out_true_multiplex,
|
|
455
|
+
min_mapq=args.min_mapq,
|
|
456
|
+
adjacent_gap_max=args.adjacent_gap_max,
|
|
457
|
+
terminal_center_gap_max=args.terminal_center_gap_max,
|
|
458
|
+
terminal_overlap_tolerance=args.terminal_overlap_tolerance,
|
|
459
|
+
assembly=args.assembly,
|
|
460
|
+
bam_threads=args.bam_threads,
|
|
461
|
+
out_threads=args.out_threads,
|
|
462
|
+
flip=not args.no_flip,
|
|
463
|
+
write_batch_size=args.write_batch_size,
|
|
464
|
+
sam_output=args.sam,
|
|
465
|
+
qual_stats=args.qual_stats,
|
|
466
|
+
filter_stats=args.filter_stats,
|
|
467
|
+
strict_complete_cover=args.strict_complete_cover,
|
|
468
|
+
single_bam=args.single_bam,
|
|
469
|
+
version=__version__,
|
|
470
|
+
)
|
|
471
|
+
except KeyboardInterrupt:
|
|
472
|
+
return 130
|
|
473
|
+
except Exception as exc:
|
|
474
|
+
if _R is not None:
|
|
475
|
+
Panel = _R["Panel"]
|
|
476
|
+
_R["console"].print(
|
|
477
|
+
Panel(
|
|
478
|
+
f"[bold red]{exc}[/bold red]",
|
|
479
|
+
title="Runtime error",
|
|
480
|
+
expand=True,
|
|
481
|
+
width=110,
|
|
482
|
+
)
|
|
483
|
+
)
|
|
484
|
+
else:
|
|
485
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
486
|
+
return 1
|
|
487
|
+
|
|
488
|
+
if _R is not None:
|
|
489
|
+
console = _R["console"]
|
|
490
|
+
Panel = _R["Panel"]
|
|
491
|
+
console.print(
|
|
492
|
+
Panel(
|
|
493
|
+
json.dumps(counts, indent=2, sort_keys=True),
|
|
494
|
+
title="[bold green]Run completed[/bold green]",
|
|
495
|
+
expand=True,
|
|
496
|
+
width=110,
|
|
497
|
+
)
|
|
498
|
+
)
|
|
499
|
+
else:
|
|
500
|
+
print(json.dumps(counts, indent=2, sort_keys=True))
|
|
501
|
+
|
|
502
|
+
return 0
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
if __name__ == "__main__":
|
|
506
|
+
raise SystemExit(main_cli())
|
split3c/resolve/pairs.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from typing import Iterable, TextIO
|
|
2
|
+
|
|
3
|
+
DEFAULT_COLUMNS = [
|
|
4
|
+
"readID",
|
|
5
|
+
"chrom1",
|
|
6
|
+
"pos1",
|
|
7
|
+
"chrom2",
|
|
8
|
+
"pos2",
|
|
9
|
+
"strand1",
|
|
10
|
+
"strand2",
|
|
11
|
+
"pair_type",
|
|
12
|
+
"tag1",
|
|
13
|
+
"tag2",
|
|
14
|
+
"alias1",
|
|
15
|
+
"alias2",
|
|
16
|
+
"origin1",
|
|
17
|
+
"origin2",
|
|
18
|
+
"origin_class",
|
|
19
|
+
"FT",
|
|
20
|
+
"RT",
|
|
21
|
+
"status",
|
|
22
|
+
"hard_merged",
|
|
23
|
+
"terminal_aliased",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def make_pairs_header(
|
|
28
|
+
chromsizes: Iterable[tuple[str, int]],
|
|
29
|
+
columns: list[str] | None = None,
|
|
30
|
+
assembly: str | None = None,
|
|
31
|
+
shape: str = "whole matrix",
|
|
32
|
+
sorted_by: str = "none",
|
|
33
|
+
program_id: str = "parse2split",
|
|
34
|
+
program_version: str = "0.0.0",
|
|
35
|
+
) -> list[str]:
|
|
36
|
+
if columns is None:
|
|
37
|
+
columns = DEFAULT_COLUMNS
|
|
38
|
+
|
|
39
|
+
header = [
|
|
40
|
+
"## pairs format v1.0",
|
|
41
|
+
f"#shape: {shape}",
|
|
42
|
+
f"#sorted: {sorted_by}",
|
|
43
|
+
f"#columns: {' '.join(columns)}",
|
|
44
|
+
f"#command: {program_id} {program_version}",
|
|
45
|
+
]
|
|
46
|
+
if assembly:
|
|
47
|
+
header.append(f"#genome_assembly: {assembly}")
|
|
48
|
+
for chrom, length in chromsizes:
|
|
49
|
+
header.append(f"#chromsize: {chrom} {length}")
|
|
50
|
+
return header
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def write_pairs_header(out: TextIO, header_lines: list[str]) -> None:
|
|
54
|
+
for line in header_lines:
|
|
55
|
+
out.write(line)
|
|
56
|
+
out.write("\n")
|