split3c 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- split3c/__init__.py +0 -0
- split3c/cli.py +336 -0
- split3c/nssite/__init__.py +0 -0
- split3c/nssite/auxiliary.py +190 -0
- split3c/nssite/bam.py +299 -0
- split3c/nssite/fastq.py +148 -0
- split3c/nssite/main.py +368 -0
- split3c/nssite/processmanager.py +51 -0
- split3c/nssite/split.py +849 -0
- split3c/resite/__init__.py +33 -0
- split3c/resite/frag.py +576 -0
- split3c/resite/header.py +91 -0
- split3c/resite/index.py +236 -0
- split3c/resite/main.py +506 -0
- split3c/resite/pretreatment.py +299 -0
- split3c/resite/read.py +91 -0
- split3c/resite/write_control.py +111 -0
- split3c/resolve/__init__.py +0 -0
- split3c/resolve/bam.py +129 -0
- split3c/resolve/io_utils.py +77 -0
- split3c/resolve/main.py +506 -0
- split3c/resolve/pairs.py +56 -0
- split3c/resolve/parse.py +1218 -0
- split3c-0.0.1.dist-info/METADATA +100 -0
- split3c-0.0.1.dist-info/RECORD +29 -0
- split3c-0.0.1.dist-info/WHEEL +5 -0
- split3c-0.0.1.dist-info/entry_points.txt +5 -0
- split3c-0.0.1.dist-info/licenses/LICENSE +235 -0
- split3c-0.0.1.dist-info/top_level.txt +1 -0
split3c/resite/main.py
ADDED
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified restriction enzyme sites.
|
|
3
|
+
|
|
4
|
+
Copyright © 2024 Samir Bertache
|
|
5
|
+
|
|
6
|
+
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
7
|
+
|
|
8
|
+
===============================================================================
|
|
9
|
+
|
|
10
|
+
This program is free software: you can redistribute it and/or modify it under
|
|
11
|
+
the terms of the GNU Affero General Public License as published by the
|
|
12
|
+
Free Software Foundation, either version 3 of the License, or (at your option)
|
|
13
|
+
any later version.
|
|
14
|
+
|
|
15
|
+
This program is distributed in the hope that it will be useful,
|
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
18
|
+
See the GNU Affero General Public License for more details.
|
|
19
|
+
|
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
|
21
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import logging
|
|
26
|
+
import os
|
|
27
|
+
import sys
|
|
28
|
+
from multiprocessing import Process, Queue
|
|
29
|
+
from typing import Any, List, Optional
|
|
30
|
+
|
|
31
|
+
from .frag import process_items
|
|
32
|
+
from .pretreatment import partition_threads, search_in_database
|
|
33
|
+
from .read import read_fastq_gzip_simultaneously
|
|
34
|
+
from .write_control import manage_pigz_problems, open_output, write_pairs
|
|
35
|
+
|
|
36
|
+
# Setup logging
|
|
37
|
+
logging.basicConfig(level=logging.INFO)
|
|
38
|
+
|
|
39
|
+
from importlib.metadata import PackageNotFoundError
|
|
40
|
+
from importlib.metadata import version as _version
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
__version__ = _version("parasplit")
|
|
44
|
+
except PackageNotFoundError:
|
|
45
|
+
__version__ = "0+unknown"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _try_rich() -> Optional[dict[str, Any]]:
|
|
49
|
+
try:
|
|
50
|
+
from rich import box
|
|
51
|
+
from rich.console import Console
|
|
52
|
+
from rich.panel import Panel
|
|
53
|
+
from rich.table import Table
|
|
54
|
+
from rich.theme import Theme
|
|
55
|
+
from rich.traceback import install
|
|
56
|
+
from rich_argparse import RichHelpFormatter
|
|
57
|
+
except Exception:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
console = Console(
|
|
61
|
+
theme=Theme({"info": "dim cyan", "error": "bold red", "warning": "magenta"}),
|
|
62
|
+
width=100,
|
|
63
|
+
)
|
|
64
|
+
install(console=console)
|
|
65
|
+
return {
|
|
66
|
+
"console": console,
|
|
67
|
+
"Panel": Panel,
|
|
68
|
+
"Table": Table,
|
|
69
|
+
"box": box,
|
|
70
|
+
"RichHelpFormatter": RichHelpFormatter,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
_R = _try_rich()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class MyArgumentParser(argparse.ArgumentParser):
|
|
78
|
+
def error(self, message: str) -> None:
|
|
79
|
+
if _R is not None:
|
|
80
|
+
console = _R["console"]
|
|
81
|
+
Panel = _R["Panel"]
|
|
82
|
+
console.print(
|
|
83
|
+
Panel(
|
|
84
|
+
f"[bold red]Error:[/bold red] {message}",
|
|
85
|
+
title="Incorrect arguments",
|
|
86
|
+
expand=True,
|
|
87
|
+
width=100,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
self.print_help()
|
|
91
|
+
self.exit(2)
|
|
92
|
+
self.print_usage(sys.stderr)
|
|
93
|
+
self.exit(2, f"{self.prog}: error: {message}\n")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _formatter_class():
|
|
97
|
+
if _R is None:
|
|
98
|
+
return argparse.RawTextHelpFormatter
|
|
99
|
+
|
|
100
|
+
from rich_argparse import RawTextRichHelpFormatter
|
|
101
|
+
|
|
102
|
+
return lambda prog: RawTextRichHelpFormatter(
|
|
103
|
+
prog,
|
|
104
|
+
max_help_position=42,
|
|
105
|
+
width=110,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _print_banner() -> None:
|
|
110
|
+
if _R is None:
|
|
111
|
+
return
|
|
112
|
+
console = _R["console"]
|
|
113
|
+
Panel = _R["Panel"]
|
|
114
|
+
console.print(
|
|
115
|
+
Panel(
|
|
116
|
+
"[bold blue]Parasplit[/bold blue]\n"
|
|
117
|
+
"Split paired FASTQ at restriction enzyme ligation sites.\n\n"
|
|
118
|
+
"""Features \n
|
|
119
|
+
Find and Utilize Restriction Enzyme Sites: Automatically identify ligation sites from provided enzyme names and generate regex patterns to locate these sites in sequences.
|
|
120
|
+
Fragmentation: Split sequences at restriction enzyme sites, creating smaller fragments.
|
|
121
|
+
Multi-threading: Efficiently handle large datasets by utilizing multiple threads for decompression, fragmentation, and compression.
|
|
122
|
+
Custom Modes: Supports different pairing modes for sequence fragments.\n\n
|
|
123
|
+
"""
|
|
124
|
+
"Use --help to see detailed options.",
|
|
125
|
+
title="[bold green]parasplit[/bold green]",
|
|
126
|
+
subtitle=f"Version: {__version__}",
|
|
127
|
+
expand=True,
|
|
128
|
+
width=100,
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
console.print("")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _print_summary(args: argparse.Namespace) -> None:
|
|
135
|
+
if _R is None:
|
|
136
|
+
return
|
|
137
|
+
console = _R["console"]
|
|
138
|
+
Table = _R["Table"]
|
|
139
|
+
box = _R["box"]
|
|
140
|
+
|
|
141
|
+
t = Table(
|
|
142
|
+
show_edge=True,
|
|
143
|
+
title="[bold green]Summary[/bold green]",
|
|
144
|
+
box=box.HEAVY,
|
|
145
|
+
width=100,
|
|
146
|
+
)
|
|
147
|
+
t.add_column("Key", style="cyan", no_wrap=True)
|
|
148
|
+
t.add_column("Value", style="magenta")
|
|
149
|
+
|
|
150
|
+
t.add_row("--source-forward", str(args.source_forward))
|
|
151
|
+
t.add_row("--source-reverse", str(args.source_reverse))
|
|
152
|
+
t.add_row("--output-forward", str(args.output_forward))
|
|
153
|
+
t.add_row("--output-reverse", str(args.output_reverse))
|
|
154
|
+
t.add_row("--enzymes", str(args.enzymes))
|
|
155
|
+
t.add_row("--mode", str(args.mode))
|
|
156
|
+
t.add_row("--seed-size", str(args.seed_size))
|
|
157
|
+
t.add_row("--buffer-size", str(args.buffer_size))
|
|
158
|
+
t.add_row("--num-threads", str(args.num_threads))
|
|
159
|
+
t.add_row("--borderless", str(args.borderless))
|
|
160
|
+
t.add_row("--tags", str(args.tags))
|
|
161
|
+
t.add_row("--force", str(args.force))
|
|
162
|
+
t.add_row("--verbose", str(args.verbose))
|
|
163
|
+
|
|
164
|
+
console.print(t)
|
|
165
|
+
console.print("")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _parse_enzymes(raw: str) -> list[str]:
|
|
169
|
+
raw = (raw or "").strip()
|
|
170
|
+
if not raw or raw == "No restriction enzyme found":
|
|
171
|
+
return []
|
|
172
|
+
return [x.strip() for x in raw.split(",") if x.strip()]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def validate_args(args: argparse.Namespace) -> None:
|
|
176
|
+
def _file_exists(path: str, what: str) -> None:
|
|
177
|
+
if not os.path.exists(path):
|
|
178
|
+
raise ValueError(f"{what}: file not found: {path}")
|
|
179
|
+
if not os.path.isfile(path):
|
|
180
|
+
raise ValueError(f"{what}: not a file: {path}")
|
|
181
|
+
|
|
182
|
+
def _parent_writable(path: str, what: str) -> None:
|
|
183
|
+
parent = os.path.dirname(os.path.abspath(path)) or os.getcwd()
|
|
184
|
+
if not os.path.exists(parent):
|
|
185
|
+
raise ValueError(f"{what}: parent directory does not exist: {parent}")
|
|
186
|
+
if not os.access(parent, os.W_OK):
|
|
187
|
+
raise ValueError(f"{what}: parent directory not writable: {parent}")
|
|
188
|
+
|
|
189
|
+
_file_exists(args.source_forward, "Forward FASTQ")
|
|
190
|
+
_file_exists(args.source_reverse, "Reverse FASTQ")
|
|
191
|
+
_parent_writable(args.output_forward, "Output R1")
|
|
192
|
+
_parent_writable(args.output_reverse, "Output R2")
|
|
193
|
+
|
|
194
|
+
if args.output_forward == args.output_reverse:
|
|
195
|
+
raise ValueError("Output R1 and R2 must be different files")
|
|
196
|
+
|
|
197
|
+
if args.num_threads < 5:
|
|
198
|
+
raise ValueError("--num-threads must be >= 5 (recommended >= 8)")
|
|
199
|
+
|
|
200
|
+
if args.seed_size < 0:
|
|
201
|
+
raise ValueError("--seed-size must be >= 0")
|
|
202
|
+
|
|
203
|
+
if args.buffer_size <= 0:
|
|
204
|
+
raise ValueError("--buffer-size must be >= 1")
|
|
205
|
+
|
|
206
|
+
if args.mode not in ("fr", "all", "cover"):
|
|
207
|
+
raise ValueError("--mode must be one of: fr, all, cover")
|
|
208
|
+
|
|
209
|
+
if not args.force:
|
|
210
|
+
for p in (args.output_forward, args.output_reverse):
|
|
211
|
+
if os.path.exists(p):
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"Output already exists: {p} (use --force to overwrite)"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def cut(
|
|
218
|
+
source_forward: str,
|
|
219
|
+
source_reverse: str,
|
|
220
|
+
output_forward: str,
|
|
221
|
+
output_reverse: str,
|
|
222
|
+
list_enzyme: List[str],
|
|
223
|
+
mode,
|
|
224
|
+
seed_size,
|
|
225
|
+
buffer_size: int = 100,
|
|
226
|
+
num_threads: int = 8,
|
|
227
|
+
borderless: bool = False,
|
|
228
|
+
tags=None,
|
|
229
|
+
) -> None:
|
|
230
|
+
"""
|
|
231
|
+
Main function to process sequences based on enzyme restriction sites.
|
|
232
|
+
|
|
233
|
+
Parameters:
|
|
234
|
+
source_forward (str): Input file path for forward reads.
|
|
235
|
+
source_reverse (str): Input file path for reverse reads.
|
|
236
|
+
output_forward (str): Output file path for processed forward reads.
|
|
237
|
+
output_reverse (str): Output file path for processed reverse reads.
|
|
238
|
+
list_enzyme (List[str]): List of restriction enzymes.
|
|
239
|
+
mode (str): Mode of pairing fragments, "all" or "fr".
|
|
240
|
+
seed_size (int): Minimum length of fragments to keep.
|
|
241
|
+
buffer_size (int, optional): Size of buffer. Defaults to 100.
|
|
242
|
+
num_threads (int, optional): Number of threads to use for processing. Defaults to 8.
|
|
243
|
+
borderless (bool, optional): Whether to discard ligation sites (borders). Defaults to False.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
None
|
|
247
|
+
"""
|
|
248
|
+
# Threads allocations :
|
|
249
|
+
TRead, TFrag, TWrite = partition_threads(num_threads)
|
|
250
|
+
|
|
251
|
+
# Take the enzyme list and make the ligation site list
|
|
252
|
+
ligation_site_list = search_in_database(list_enzyme, borderless)
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
# Input and Output Queues
|
|
256
|
+
Input_Buffer = Queue(maxsize=2048)
|
|
257
|
+
Output_buffer = Queue(maxsize=512)
|
|
258
|
+
|
|
259
|
+
def read_process():
|
|
260
|
+
read_fastq_gzip_simultaneously(
|
|
261
|
+
source_forward, source_reverse, Input_Buffer, TRead, TFrag
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def process_process_all():
|
|
265
|
+
process_items(
|
|
266
|
+
Input_Buffer,
|
|
267
|
+
Output_buffer,
|
|
268
|
+
ligation_site_list,
|
|
269
|
+
seed_size,
|
|
270
|
+
buffer_size,
|
|
271
|
+
mode,
|
|
272
|
+
borderless,
|
|
273
|
+
tags,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def write_process():
|
|
277
|
+
# IMPORTANT: ouvrir/fermer pigz DANS le process writer
|
|
278
|
+
outF, outR = open_output(TWrite, output_forward, output_reverse)
|
|
279
|
+
try:
|
|
280
|
+
write_pairs(Output_buffer, outF, outR, TFrag)
|
|
281
|
+
finally:
|
|
282
|
+
manage_pigz_problems(outF, outR, output_forward, output_reverse)
|
|
283
|
+
|
|
284
|
+
# Read fastq files in parallel and asynchronous
|
|
285
|
+
read_p = Process(target=read_process)
|
|
286
|
+
|
|
287
|
+
# Choose mode and Create the executor and dispatch work to it
|
|
288
|
+
if mode == "all":
|
|
289
|
+
print("Mode ALL selected")
|
|
290
|
+
process_p_list = [Process(target=process_process_all) for _ in range(TFrag)]
|
|
291
|
+
elif mode == "fr":
|
|
292
|
+
print("Mode FR selected")
|
|
293
|
+
process_p_list = [Process(target=process_process_all) for _ in range(TFrag)]
|
|
294
|
+
elif mode == "cover":
|
|
295
|
+
print("Mode COVER selected")
|
|
296
|
+
process_p_list = [Process(target=process_process_all) for _ in range(TFrag)]
|
|
297
|
+
else:
|
|
298
|
+
print(f"Unknown mode: {mode}")
|
|
299
|
+
sys.exit(1)
|
|
300
|
+
|
|
301
|
+
# Create asynchronous writing
|
|
302
|
+
write_p = Process(target=write_process)
|
|
303
|
+
|
|
304
|
+
# Start processes
|
|
305
|
+
read_p.start()
|
|
306
|
+
for p in process_p_list:
|
|
307
|
+
p.start()
|
|
308
|
+
write_p.start()
|
|
309
|
+
|
|
310
|
+
# Wait for all processes to finish
|
|
311
|
+
read_p.join()
|
|
312
|
+
for p in process_p_list:
|
|
313
|
+
p.join()
|
|
314
|
+
write_p.join()
|
|
315
|
+
|
|
316
|
+
procs = [
|
|
317
|
+
("read", read_p),
|
|
318
|
+
*[(f"proc{i}", p) for i, p in enumerate(process_p_list)],
|
|
319
|
+
("write", write_p),
|
|
320
|
+
]
|
|
321
|
+
bad = [(name, p.exitcode) for name, p in procs if p.exitcode not in (0, None)]
|
|
322
|
+
if bad:
|
|
323
|
+
raise RuntimeError(f"Subprocess failure(s): {bad}")
|
|
324
|
+
|
|
325
|
+
except KeyboardInterrupt:
|
|
326
|
+
print("Keyboard interrupt detected. Terminating...")
|
|
327
|
+
sys.exit(0)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
331
|
+
parser = MyArgumentParser(
|
|
332
|
+
description=(
|
|
333
|
+
"Split paired-end FASTQ at restriction enzyme ligation sites.\n"
|
|
334
|
+
"Example: parasplit -sf R1.fq.gz -sr R2.fq.gz -of out_R1.fq.gz -or out_R2.fq.gz -le DpnII,MboI -m fr -nt 12"
|
|
335
|
+
),
|
|
336
|
+
epilog=(
|
|
337
|
+
"Examples:\n"
|
|
338
|
+
" parasplit -sf R1.fq.gz -sr R2.fq.gz -of out_R1.fq.gz -or out_R2.fq.gz -le DpnII -m fr -nt 12 -sz 20\n"
|
|
339
|
+
" parasplit -sf R1.fq.gz -sr R2.fq.gz -of out_R1.fq.gz -or out_R2.fq.gz -le DpnII,MboI -m all -nt 24 --tags o\n"
|
|
340
|
+
),
|
|
341
|
+
formatter_class=_formatter_class(),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
req = parser.add_argument_group("Inputs")
|
|
345
|
+
out = parser.add_argument_group("Outputs")
|
|
346
|
+
perf = parser.add_argument_group("Performance")
|
|
347
|
+
split = parser.add_argument_group("Split parameters")
|
|
348
|
+
misc = parser.add_argument_group("Misc")
|
|
349
|
+
|
|
350
|
+
req.add_argument(
|
|
351
|
+
"-sf", "--source_forward", required=True, help="Input FASTQ R1 (gz)."
|
|
352
|
+
)
|
|
353
|
+
req.add_argument(
|
|
354
|
+
"-sr", "--source_reverse", required=True, help="Input FASTQ R2 (gz)."
|
|
355
|
+
)
|
|
356
|
+
req.add_argument(
|
|
357
|
+
"-le",
|
|
358
|
+
"--list_enzyme",
|
|
359
|
+
default="",
|
|
360
|
+
help="Comma-separated restriction enzyme names (e.g. DpnII,MboI). Empty means 'no enzyme'.",
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
out.add_argument(
|
|
364
|
+
"-of", "--output_forward", required=True, help="Output FASTQ R1 (gz)."
|
|
365
|
+
)
|
|
366
|
+
out.add_argument(
|
|
367
|
+
"-or", "--output_reverse", required=True, help="Output FASTQ R2 (gz)."
|
|
368
|
+
)
|
|
369
|
+
out.add_argument(
|
|
370
|
+
"--force",
|
|
371
|
+
action="store_true",
|
|
372
|
+
help="Overwrite output files if they already exist.",
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
perf.add_argument(
|
|
376
|
+
"-nt", "--num_threads", type=int, default=8, help="Total CPU threads budget."
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
split.add_argument(
|
|
380
|
+
"-m",
|
|
381
|
+
"--mode",
|
|
382
|
+
choices=["fr", "all", "cover"],
|
|
383
|
+
default="cover",
|
|
384
|
+
help=(
|
|
385
|
+
"Pairing mode:\n"
|
|
386
|
+
" fr : one forward fragment + one reverse fragment\n"
|
|
387
|
+
" all : all pairwise fragment combinations\n"
|
|
388
|
+
" cover : minimal / near-minimal pairs so every fragment appears at least once (Sufficient to recover all post-processing multiplexe)"
|
|
389
|
+
),
|
|
390
|
+
)
|
|
391
|
+
split.add_argument(
|
|
392
|
+
"-sz",
|
|
393
|
+
"--seed_size",
|
|
394
|
+
type=int,
|
|
395
|
+
default=20,
|
|
396
|
+
help="Minimum fragment length to keep after splitting (0 disables filtering).",
|
|
397
|
+
)
|
|
398
|
+
split.add_argument(
|
|
399
|
+
"--buffer-size",
|
|
400
|
+
type=int,
|
|
401
|
+
default=100,
|
|
402
|
+
help="Chunk size flushed to writer.",
|
|
403
|
+
)
|
|
404
|
+
split.add_argument(
|
|
405
|
+
"-b",
|
|
406
|
+
"--borderless",
|
|
407
|
+
action="store_true",
|
|
408
|
+
help="Discard ligation site borders.",
|
|
409
|
+
)
|
|
410
|
+
split.add_argument(
|
|
411
|
+
"--tags",
|
|
412
|
+
choices=["origin", "no_annot", "o", "na"],
|
|
413
|
+
default="o",
|
|
414
|
+
help=(
|
|
415
|
+
"Header tagging mode for split reads.\n"
|
|
416
|
+
" origin/o : include fragment origin tags (F1,R1,...)\n"
|
|
417
|
+
" no_annot/na: keep base name only / No annotation (Not recommended if you wish to filter the pairs after the process.)"
|
|
418
|
+
),
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
misc.add_argument(
|
|
422
|
+
"-v",
|
|
423
|
+
"--verbose",
|
|
424
|
+
action="count",
|
|
425
|
+
default=0,
|
|
426
|
+
help="Increase verbosity (-v: INFO, -vv: DEBUG).",
|
|
427
|
+
)
|
|
428
|
+
misc.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
429
|
+
|
|
430
|
+
return parser
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def main_cli(argv: Optional[list[str]] = None) -> int:
|
|
434
|
+
_print_banner()
|
|
435
|
+
|
|
436
|
+
parser = build_parser()
|
|
437
|
+
args = parser.parse_args(argv)
|
|
438
|
+
|
|
439
|
+
# logging level
|
|
440
|
+
level = logging.WARNING
|
|
441
|
+
if args.verbose == 1:
|
|
442
|
+
level = logging.INFO
|
|
443
|
+
elif args.verbose >= 2:
|
|
444
|
+
level = logging.DEBUG
|
|
445
|
+
logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
|
|
446
|
+
|
|
447
|
+
# normalize enzymes
|
|
448
|
+
args.enzymes = _parse_enzymes(args.list_enzyme)
|
|
449
|
+
|
|
450
|
+
try:
|
|
451
|
+
validate_args(args)
|
|
452
|
+
except ValueError as e:
|
|
453
|
+
if _R is not None:
|
|
454
|
+
Panel = _R["Panel"]
|
|
455
|
+
_R["console"].print(
|
|
456
|
+
Panel(
|
|
457
|
+
f"[bold red]{e}[/bold red]",
|
|
458
|
+
title="Validation",
|
|
459
|
+
expand=True,
|
|
460
|
+
width=100,
|
|
461
|
+
)
|
|
462
|
+
)
|
|
463
|
+
parser.print_help()
|
|
464
|
+
else:
|
|
465
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
466
|
+
parser.print_help(sys.stderr)
|
|
467
|
+
return 2
|
|
468
|
+
|
|
469
|
+
_print_summary(args)
|
|
470
|
+
|
|
471
|
+
try:
|
|
472
|
+
cut(
|
|
473
|
+
source_forward=args.source_forward,
|
|
474
|
+
source_reverse=args.source_reverse,
|
|
475
|
+
output_forward=args.output_forward,
|
|
476
|
+
output_reverse=args.output_reverse,
|
|
477
|
+
list_enzyme=args.enzymes,
|
|
478
|
+
mode=args.mode,
|
|
479
|
+
seed_size=args.seed_size,
|
|
480
|
+
tags=args.tags,
|
|
481
|
+
buffer_size=args.buffer_size,
|
|
482
|
+
num_threads=args.num_threads,
|
|
483
|
+
borderless=args.borderless,
|
|
484
|
+
)
|
|
485
|
+
except KeyboardInterrupt:
|
|
486
|
+
return 130
|
|
487
|
+
except Exception as e:
|
|
488
|
+
if _R is not None:
|
|
489
|
+
Panel = _R["Panel"]
|
|
490
|
+
_R["console"].print(
|
|
491
|
+
Panel(
|
|
492
|
+
f"[bold red]{e}[/bold red]",
|
|
493
|
+
title="Runtime error",
|
|
494
|
+
expand=True,
|
|
495
|
+
width=100,
|
|
496
|
+
)
|
|
497
|
+
)
|
|
498
|
+
else:
|
|
499
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
500
|
+
return 1
|
|
501
|
+
|
|
502
|
+
return 0
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
if __name__ == "__main__":
|
|
506
|
+
raise SystemExit(main_cli())
|