split3c 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- split3c/__init__.py +0 -0
- split3c/cli.py +336 -0
- split3c/nssite/__init__.py +0 -0
- split3c/nssite/auxiliary.py +190 -0
- split3c/nssite/bam.py +299 -0
- split3c/nssite/fastq.py +148 -0
- split3c/nssite/main.py +368 -0
- split3c/nssite/processmanager.py +51 -0
- split3c/nssite/split.py +849 -0
- split3c/resite/__init__.py +33 -0
- split3c/resite/frag.py +576 -0
- split3c/resite/header.py +91 -0
- split3c/resite/index.py +236 -0
- split3c/resite/main.py +506 -0
- split3c/resite/pretreatment.py +299 -0
- split3c/resite/read.py +91 -0
- split3c/resite/write_control.py +111 -0
- split3c/resolve/__init__.py +0 -0
- split3c/resolve/bam.py +129 -0
- split3c/resolve/io_utils.py +77 -0
- split3c/resolve/main.py +506 -0
- split3c/resolve/pairs.py +56 -0
- split3c/resolve/parse.py +1218 -0
- split3c-0.0.1.dist-info/METADATA +100 -0
- split3c-0.0.1.dist-info/RECORD +29 -0
- split3c-0.0.1.dist-info/WHEEL +5 -0
- split3c-0.0.1.dist-info/entry_points.txt +5 -0
- split3c-0.0.1.dist-info/licenses/LICENSE +235 -0
- split3c-0.0.1.dist-info/top_level.txt +1 -0
split3c/__init__.py
ADDED
|
File without changes
|
split3c/cli.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This script is a the split3c project, designed to process paired-end FASTQ files by fragmenting DNA sequences at specified restriction enzyme sites.
|
|
3
|
+
|
|
4
|
+
Copyright © 2024 Samir Bertache
|
|
5
|
+
|
|
6
|
+
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
7
|
+
|
|
8
|
+
===============================================================================
|
|
9
|
+
|
|
10
|
+
This program is free software: you can redistribute it and/or modify it under
|
|
11
|
+
the terms of the GNU Affero General Public License as published by the
|
|
12
|
+
Free Software Foundation, either version 3 of the License, or (at your option)
|
|
13
|
+
any later version.
|
|
14
|
+
|
|
15
|
+
This program is distributed in the hope that it will be useful,
|
|
16
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
18
|
+
See the GNU Affero General Public License for more details.
|
|
19
|
+
|
|
20
|
+
You should have received a copy of the GNU Affero General Public License
|
|
21
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
split3c root command-line interface.
|
|
26
|
+
|
|
27
|
+
This module dispatches the three user-facing subcommands:
|
|
28
|
+
|
|
29
|
+
- split3c re-site
|
|
30
|
+
- split3c ns-site
|
|
31
|
+
- split3c resolve
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
import difflib
|
|
35
|
+
import sys
|
|
36
|
+
from importlib.metadata import PackageNotFoundError
|
|
37
|
+
from importlib.metadata import version as _pkg_version
|
|
38
|
+
from typing import Any, Callable, Optional
|
|
39
|
+
|
|
40
|
+
from .nssite.main import main_cli as nssite_main
|
|
41
|
+
from .resite.main import main_cli as resite_main
|
|
42
|
+
from .resolve.main import main_cli as resolve_main
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
__version__ = _pkg_version("split3c")
|
|
46
|
+
except PackageNotFoundError:
|
|
47
|
+
__version__ = "0+unknown"
|
|
48
|
+
|
|
49
|
+
DOC_URL = "https://gitbio.ens-lyon.fr/LBMC/physbio/split3c/-/blob/master/README.md?ref_type=heads"
|
|
50
|
+
|
|
51
|
+
CommandRunner = Callable[[list[str] | None], int]
|
|
52
|
+
|
|
53
|
+
COMMANDS: dict[str, dict[str, object]] = {
|
|
54
|
+
"re-site": {
|
|
55
|
+
"runner": resite_main,
|
|
56
|
+
"summary": (
|
|
57
|
+
"Restriction-based preprocessing for Hi-C / HiChIP / 3C-like libraries."
|
|
58
|
+
),
|
|
59
|
+
"when": (
|
|
60
|
+
"Use when ligation junctions can be derived from known restriction enzymes."
|
|
61
|
+
),
|
|
62
|
+
"inputs": "paired FASTQ",
|
|
63
|
+
"outputs": "split / multiplex FASTQ for remapping",
|
|
64
|
+
},
|
|
65
|
+
"ns-site": {
|
|
66
|
+
"runner": nssite_main,
|
|
67
|
+
"summary": ("Non-specific ligation preprocessing for Micro-C-like libraries."),
|
|
68
|
+
"when": (
|
|
69
|
+
"Use when ligation junctions must be inferred from mapped BAM structure."
|
|
70
|
+
),
|
|
71
|
+
"inputs": "mapped BAM",
|
|
72
|
+
"outputs": "split FASTQ for remapping",
|
|
73
|
+
},
|
|
74
|
+
"resolve": {
|
|
75
|
+
"runner": resolve_main,
|
|
76
|
+
"summary": "Convert mapped or remapped BAM into standard .pairs.",
|
|
77
|
+
"when": (
|
|
78
|
+
"Use after mapping or remapping, in either simple or split-aware mode."
|
|
79
|
+
),
|
|
80
|
+
"inputs": "mapped / remapped BAM",
|
|
81
|
+
"outputs": ".pairs",
|
|
82
|
+
},
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
ALIASES: dict[str, str] = {
|
|
86
|
+
"resite": "re-site",
|
|
87
|
+
"nssite": "ns-site",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _try_rich() -> Optional[dict[str, Any]]:
|
|
92
|
+
try:
|
|
93
|
+
from rich import box
|
|
94
|
+
from rich.console import Console
|
|
95
|
+
from rich.panel import Panel
|
|
96
|
+
from rich.table import Table
|
|
97
|
+
from rich.theme import Theme
|
|
98
|
+
from rich.traceback import install
|
|
99
|
+
except Exception:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
console = Console(
|
|
103
|
+
theme=Theme(
|
|
104
|
+
{
|
|
105
|
+
"info": "dim cyan",
|
|
106
|
+
"error": "bold red",
|
|
107
|
+
"warning": "magenta",
|
|
108
|
+
"ok": "bold green",
|
|
109
|
+
"title": "bold green",
|
|
110
|
+
"cmd": "bold cyan",
|
|
111
|
+
}
|
|
112
|
+
),
|
|
113
|
+
width=110,
|
|
114
|
+
)
|
|
115
|
+
install(console=console)
|
|
116
|
+
return {
|
|
117
|
+
"console": console,
|
|
118
|
+
"Panel": Panel,
|
|
119
|
+
"Table": Table,
|
|
120
|
+
"box": box,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
_R = _try_rich()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _resolve_command_name(name: str) -> str | None:
|
|
128
|
+
if name in COMMANDS:
|
|
129
|
+
return name
|
|
130
|
+
return ALIASES.get(name)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _print_banner() -> None:
|
|
134
|
+
if _R is None:
|
|
135
|
+
print(f"split3c {__version__}")
|
|
136
|
+
print("Preprocess 3C-derived libraries and convert BAM to .pairs.")
|
|
137
|
+
print("")
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
console = _R["console"]
|
|
141
|
+
Panel = _R["Panel"]
|
|
142
|
+
console.print(
|
|
143
|
+
Panel(
|
|
144
|
+
"[bold blue]split3c[/bold blue]\n"
|
|
145
|
+
"Preprocess 3C-derived sequencing libraries and convert mapped/remapped BAM into `.pairs` to retrive multiplexes events.\n\n"
|
|
146
|
+
"Subcommands:\n"
|
|
147
|
+
" [cmd]re-site[/cmd] restriction-enzyme workflow\n"
|
|
148
|
+
" [cmd]ns-site[/cmd] non-specific ligation workflow\n"
|
|
149
|
+
" [cmd]resolve[/cmd] BAM to `.pairs` conversion",
|
|
150
|
+
title="[title]split3c[/title]",
|
|
151
|
+
subtitle=f"Version: {__version__}",
|
|
152
|
+
expand=True,
|
|
153
|
+
width=110,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
console.print("")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _print_command_table() -> None:
|
|
160
|
+
if _R is None:
|
|
161
|
+
print("Commands:")
|
|
162
|
+
for name, meta in COMMANDS.items():
|
|
163
|
+
print(f" {name:<10} {meta['summary']}")
|
|
164
|
+
print("")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
console = _R["console"]
|
|
168
|
+
Table = _R["Table"]
|
|
169
|
+
box = _R["box"]
|
|
170
|
+
|
|
171
|
+
table = Table(
|
|
172
|
+
show_edge=True,
|
|
173
|
+
title="[title]Commands[/title]",
|
|
174
|
+
box=box.HEAVY,
|
|
175
|
+
width=110,
|
|
176
|
+
)
|
|
177
|
+
table.add_column("Command", style="cyan", no_wrap=True)
|
|
178
|
+
table.add_column("Use case", style="magenta")
|
|
179
|
+
table.add_column("Input", style="green", no_wrap=True)
|
|
180
|
+
table.add_column("Output", style="yellow")
|
|
181
|
+
|
|
182
|
+
for name, meta in COMMANDS.items():
|
|
183
|
+
table.add_row(
|
|
184
|
+
name,
|
|
185
|
+
str(meta["summary"]),
|
|
186
|
+
str(meta["inputs"]),
|
|
187
|
+
str(meta["outputs"]),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
console.print(table)
|
|
191
|
+
console.print("")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _print_workflows() -> None:
|
|
195
|
+
if _R is None:
|
|
196
|
+
print("Typical workflows: How to catch multiplexes ")
|
|
197
|
+
print("")
|
|
198
|
+
print(" Restriction-based libraries:")
|
|
199
|
+
print(" FASTQ -> split3c re-site -> MAPPING -> split3c resolve --split")
|
|
200
|
+
print("")
|
|
201
|
+
print(" Micro-C-like libraries:")
|
|
202
|
+
print(
|
|
203
|
+
" first-pass MAPPING -> split3c ns-site -> REMAPPING -> split3c resolve --split"
|
|
204
|
+
)
|
|
205
|
+
print("")
|
|
206
|
+
print(" Classic BAM to .pairs: Only duplex informations")
|
|
207
|
+
print(" mapped BAM -> split3c resolve --simple")
|
|
208
|
+
print("")
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
console = _R["console"]
|
|
212
|
+
Panel = _R["Panel"]
|
|
213
|
+
|
|
214
|
+
console.print(
|
|
215
|
+
Panel(
|
|
216
|
+
"[bold]Restriction-based libraries[/bold]\n"
|
|
217
|
+
" FASTQ -> split3c re-site -> MAPPING -> split3c resolve --split\n\n"
|
|
218
|
+
"[bold]Micro-C-like libraries[/bold]\n"
|
|
219
|
+
" first-pass MAPPING -> split3c ns-site -> REMAPPING -> split3c resolve --split\n\n"
|
|
220
|
+
"[bold]Classic BAM to `.pairs`[/bold]\n"
|
|
221
|
+
" mapped BAM -> split3c resolve --simple",
|
|
222
|
+
title="[title]Typical workflows[/title]",
|
|
223
|
+
expand=True,
|
|
224
|
+
width=110,
|
|
225
|
+
)
|
|
226
|
+
)
|
|
227
|
+
console.print("")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _print_footer() -> None:
|
|
231
|
+
if _R is None:
|
|
232
|
+
print("Examples:")
|
|
233
|
+
print(" split3c re-site --help")
|
|
234
|
+
print(" split3c ns-site --help")
|
|
235
|
+
print(" split3c resolve --help")
|
|
236
|
+
print(" split3c help resolve")
|
|
237
|
+
print("")
|
|
238
|
+
print(f"Documentation:\n {DOC_URL}")
|
|
239
|
+
print("")
|
|
240
|
+
return
|
|
241
|
+
|
|
242
|
+
console = _R["console"]
|
|
243
|
+
Panel = _R["Panel"]
|
|
244
|
+
|
|
245
|
+
console.print(
|
|
246
|
+
Panel(
|
|
247
|
+
"[bold]Examples[/bold]\n"
|
|
248
|
+
" split3c re-site --help\n"
|
|
249
|
+
" split3c ns-site --help\n"
|
|
250
|
+
" split3c resolve --help\n"
|
|
251
|
+
" split3c help resolve\n\n"
|
|
252
|
+
"[bold]Documentation[/bold]\n"
|
|
253
|
+
f" {DOC_URL}",
|
|
254
|
+
title="[title]Getting started[/title]",
|
|
255
|
+
expand=True,
|
|
256
|
+
width=110,
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
console.print("")
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _print_root_help(return_code: int = 0) -> int:
|
|
263
|
+
_print_banner()
|
|
264
|
+
_print_command_table()
|
|
265
|
+
_print_workflows()
|
|
266
|
+
_print_footer()
|
|
267
|
+
return return_code
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _print_error(message: str, title: str = "Error") -> None:
|
|
271
|
+
if _R is None:
|
|
272
|
+
print(f"{title}: {message}", file=sys.stderr)
|
|
273
|
+
return
|
|
274
|
+
|
|
275
|
+
console = _R["console"]
|
|
276
|
+
Panel = _R["Panel"]
|
|
277
|
+
console.print(
|
|
278
|
+
Panel(
|
|
279
|
+
f"[bold red]{message}[/bold red]",
|
|
280
|
+
title=title,
|
|
281
|
+
expand=True,
|
|
282
|
+
width=110,
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _dispatch(command_name: str, argv: list[str]) -> int:
|
|
288
|
+
resolved = _resolve_command_name(command_name)
|
|
289
|
+
if resolved is None:
|
|
290
|
+
known = list(COMMANDS) + list(ALIASES)
|
|
291
|
+
suggestions = difflib.get_close_matches(command_name, known, n=1, cutoff=0.55)
|
|
292
|
+
msg = f"Unknown command: {command_name}"
|
|
293
|
+
if suggestions:
|
|
294
|
+
best = _resolve_command_name(suggestions[0]) or suggestions[0]
|
|
295
|
+
msg += f"\nDid you mean: {best} ?"
|
|
296
|
+
_print_error(msg, title="Unknown command")
|
|
297
|
+
_print_root_help(return_code=2)
|
|
298
|
+
return 2
|
|
299
|
+
|
|
300
|
+
runner = COMMANDS[resolved]["runner"]
|
|
301
|
+
assert callable(runner)
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
return int(runner(argv) or 0)
|
|
305
|
+
except SystemExit as exc:
|
|
306
|
+
if isinstance(exc.code, int):
|
|
307
|
+
return exc.code
|
|
308
|
+
return 1
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def main(argv: list[str] | None = None) -> int:
|
|
312
|
+
if argv is None:
|
|
313
|
+
argv = sys.argv[1:]
|
|
314
|
+
|
|
315
|
+
if not argv:
|
|
316
|
+
return _print_root_help(return_code=1)
|
|
317
|
+
|
|
318
|
+
first = argv[0]
|
|
319
|
+
|
|
320
|
+
if first in {"-h", "--help"}:
|
|
321
|
+
return _print_root_help(return_code=0)
|
|
322
|
+
|
|
323
|
+
if first == "--version":
|
|
324
|
+
print(f"split3c {__version__}")
|
|
325
|
+
return 0
|
|
326
|
+
|
|
327
|
+
if first == "help":
|
|
328
|
+
if len(argv) == 1:
|
|
329
|
+
return _print_root_help(return_code=0)
|
|
330
|
+
return _dispatch(argv[1], ["--help", *argv[2:]])
|
|
331
|
+
|
|
332
|
+
return _dispatch(first, argv[1:])
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
if __name__ == "__main__":
|
|
336
|
+
raise SystemExit(main())
|
|
File without changes
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
def signal_handler(sig, frame, out_f, out_r=None):
|
|
2
|
+
"""
|
|
3
|
+
Handle termination signals to gracefully terminate processes.
|
|
4
|
+
|
|
5
|
+
Parameters:
|
|
6
|
+
sig (int): Signal number.
|
|
7
|
+
frame (frame object): Current stack frame.
|
|
8
|
+
out_f (subprocess.Popen): Process for the forward output.
|
|
9
|
+
out_r (subprocess.Popen | None): Process for the reverse output.
|
|
10
|
+
|
|
11
|
+
Examples
|
|
12
|
+
--------
|
|
13
|
+
>>> class _P:
|
|
14
|
+
... def __init__(self):
|
|
15
|
+
... self.terminated = False
|
|
16
|
+
... def terminate(self):
|
|
17
|
+
... self.terminated = True
|
|
18
|
+
...
|
|
19
|
+
>>> pf, pr = _P(), _P()
|
|
20
|
+
>>> try:
|
|
21
|
+
... signal_handler(None, None, pf, pr)
|
|
22
|
+
... except SystemExit:
|
|
23
|
+
... pass
|
|
24
|
+
>>> pf.terminated, pr.terminated
|
|
25
|
+
(True, True)
|
|
26
|
+
|
|
27
|
+
>>> pf = _P()
|
|
28
|
+
>>> try:
|
|
29
|
+
... signal_handler(None, None, pf, None)
|
|
30
|
+
... except SystemExit:
|
|
31
|
+
... pass
|
|
32
|
+
>>> pf.terminated
|
|
33
|
+
True
|
|
34
|
+
"""
|
|
35
|
+
import sys
|
|
36
|
+
|
|
37
|
+
if out_f is not None:
|
|
38
|
+
out_f.terminate()
|
|
39
|
+
if out_r is not None:
|
|
40
|
+
out_r.terminate()
|
|
41
|
+
sys.exit()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def partitionning(num_threads: int, single_bam: bool = False) -> tuple[int, int, int]:
|
|
45
|
+
"""
|
|
46
|
+
Heuristique empirique de partition des ressources pour microsplit.
|
|
47
|
+
|
|
48
|
+
Retourne:
|
|
49
|
+
pigz_threads_per_file : threads pigz par fichier (F et R)
|
|
50
|
+
compute_processes : nb de workers process_items
|
|
51
|
+
bam_threads : threads pysam/htslib par fichier (lecture ET écriture)
|
|
52
|
+
|
|
53
|
+
IMPORTANT
|
|
54
|
+
---------
|
|
55
|
+
Cette fonction est volontairement empirique (surallocation CPU acceptée).
|
|
56
|
+
`num_threads` est un *hint* de cœurs disponibles, pas un budget strict.
|
|
57
|
+
|
|
58
|
+
Points de calibration (bench observés)
|
|
59
|
+
--------------------------------------
|
|
60
|
+
- 4 cœurs -> (1, 1, 1)
|
|
61
|
+
- 8 cœurs -> (2, 3, 1)
|
|
62
|
+
- 16 cœurs -> (3, 4, 3)
|
|
63
|
+
|
|
64
|
+
En mode single_bam=True :
|
|
65
|
+
- on double les threads BAM, car un seul flux BAM doit alimenter toute la pipeline
|
|
66
|
+
- pigz_per_file et compute_processes restent inchangés
|
|
67
|
+
|
|
68
|
+
Doctests
|
|
69
|
+
--------
|
|
70
|
+
>>> partitionning(3)
|
|
71
|
+
Traceback (most recent call last):
|
|
72
|
+
...
|
|
73
|
+
ValueError: Run with --threads >= 4.
|
|
74
|
+
>>> partitionning(4)
|
|
75
|
+
(1, 1, 1)
|
|
76
|
+
>>> partitionning(8)
|
|
77
|
+
(2, 3, 1)
|
|
78
|
+
>>> partitionning(8, single_bam=True)
|
|
79
|
+
(2, 3, 2)
|
|
80
|
+
>>> partitionning(12)
|
|
81
|
+
(2, 3, 2)
|
|
82
|
+
>>> partitionning(12, single_bam=True)
|
|
83
|
+
(2, 3, 4)
|
|
84
|
+
>>> partitionning(16)
|
|
85
|
+
(3, 4, 3)
|
|
86
|
+
>>> partitionning(16, single_bam=True)
|
|
87
|
+
(3, 4, 6)
|
|
88
|
+
"""
|
|
89
|
+
if num_threads < 4:
|
|
90
|
+
raise ValueError("Run with --threads >= 4.")
|
|
91
|
+
|
|
92
|
+
# 4c
|
|
93
|
+
if num_threads <= 5:
|
|
94
|
+
pigz_threads_per_file, compute_processes, bam_threads = (1, 1, 1)
|
|
95
|
+
|
|
96
|
+
# Transition vers 8c
|
|
97
|
+
elif num_threads <= 7:
|
|
98
|
+
pigz_threads_per_file, compute_processes, bam_threads = (1, 2, 1)
|
|
99
|
+
|
|
100
|
+
# 8c
|
|
101
|
+
elif num_threads <= 10:
|
|
102
|
+
pigz_threads_per_file, compute_processes, bam_threads = (2, 3, 1)
|
|
103
|
+
|
|
104
|
+
# Transition vers 16c
|
|
105
|
+
elif num_threads <= 12:
|
|
106
|
+
pigz_threads_per_file, compute_processes, bam_threads = (2, 3, 2)
|
|
107
|
+
|
|
108
|
+
elif num_threads <= 14:
|
|
109
|
+
pigz_threads_per_file, compute_processes, bam_threads = (3, 4, 2)
|
|
110
|
+
|
|
111
|
+
# 16c et plus
|
|
112
|
+
else:
|
|
113
|
+
pigz_threads_per_file, compute_processes, bam_threads = (3, 4, 3)
|
|
114
|
+
|
|
115
|
+
if single_bam:
|
|
116
|
+
bam_threads *= 2
|
|
117
|
+
|
|
118
|
+
return pigz_threads_per_file, compute_processes, bam_threads
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def check_data(els):
|
|
122
|
+
for element in els:
|
|
123
|
+
if element is None:
|
|
124
|
+
return False
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def write_command_txt(args, resolved: dict):
|
|
129
|
+
"""
|
|
130
|
+
Write a small execution report into ``command.txt``.
|
|
131
|
+
|
|
132
|
+
The report contains:
|
|
133
|
+
- UTC timestamp
|
|
134
|
+
- current working directory
|
|
135
|
+
- reconstructed command line
|
|
136
|
+
- raw CLI arguments
|
|
137
|
+
- resolved runtime values
|
|
138
|
+
|
|
139
|
+
Examples
|
|
140
|
+
--------
|
|
141
|
+
>>> import os, sys, tempfile
|
|
142
|
+
>>> from types import SimpleNamespace
|
|
143
|
+
>>> old_cwd = os.getcwd()
|
|
144
|
+
>>> old_argv = sys.argv[:]
|
|
145
|
+
>>> with tempfile.TemporaryDirectory() as td:
|
|
146
|
+
... os.chdir(td)
|
|
147
|
+
... sys.argv = ["prog", "--foo", "bar"]
|
|
148
|
+
... args = SimpleNamespace(alpha=1, beta="x")
|
|
149
|
+
... write_command_txt(args, {"gamma": 3})
|
|
150
|
+
... txt = open("command.txt", "r", encoding="utf-8").read()
|
|
151
|
+
... ok = all(x in txt for x in ["command:", "cli_args:", "resolved:", "alpha: 1", "beta: x", "gamma: 3"])
|
|
152
|
+
... os.chdir(old_cwd)
|
|
153
|
+
... sys.argv = old_argv
|
|
154
|
+
... ok
|
|
155
|
+
True
|
|
156
|
+
"""
|
|
157
|
+
import os
|
|
158
|
+
import shlex
|
|
159
|
+
import sys
|
|
160
|
+
from datetime import datetime, timezone
|
|
161
|
+
|
|
162
|
+
cmd = " ".join(shlex.quote(x) for x in sys.argv)
|
|
163
|
+
|
|
164
|
+
p = os.path.join("./", "command.txt")
|
|
165
|
+
with open(p, "w") as f:
|
|
166
|
+
f.write(f"timestamp_utc: {datetime.now(timezone.utc).isoformat()}\n")
|
|
167
|
+
f.write(f"cwd: {os.getcwd()}\n\n")
|
|
168
|
+
f.write("command:\n")
|
|
169
|
+
f.write(cmd + "\n\n")
|
|
170
|
+
f.write("cli_args:\n")
|
|
171
|
+
for k, v in sorted(vars(args).items()):
|
|
172
|
+
f.write(f" {k}: {v}\n")
|
|
173
|
+
f.write("\nresolved:\n")
|
|
174
|
+
for k, v in sorted(resolved.items()):
|
|
175
|
+
f.write(f" {k}: {v}\n")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def handle_write_cmd(
|
|
179
|
+
bam_1, bam_2, output_fq1, output_fq2, output_bam1, output_bam2, args
|
|
180
|
+
):
|
|
181
|
+
resolved = {
|
|
182
|
+
"BAM_R1_or_single": bam_1,
|
|
183
|
+
"BAM_R2": bam_2,
|
|
184
|
+
"output_Fastq_R1": output_fq1,
|
|
185
|
+
"output_Fastq_R2": output_fq2,
|
|
186
|
+
"output_bam1_or_single_unsplit": output_bam1,
|
|
187
|
+
"output_bam2_unsplit": output_bam2,
|
|
188
|
+
"single_bam": getattr(args, "single_bam", False),
|
|
189
|
+
}
|
|
190
|
+
write_command_txt(args=args, resolved=resolved)
|