svphaser 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- svphaser-2.0.1/.gitignore +12 -0
- svphaser-2.0.1/LICENSE +21 -0
- svphaser-2.0.1/PKG-INFO +203 -0
- svphaser-2.0.1/README.md +156 -0
- svphaser-2.0.1/pyproject.toml +127 -0
- svphaser-2.0.1/src/svphaser/__init__.py +88 -0
- svphaser-2.0.1/src/svphaser/__main__.py +5 -0
- svphaser-2.0.1/src/svphaser/_version.py +34 -0
- svphaser-2.0.1/src/svphaser/cli.py +177 -0
- svphaser-2.0.1/src/svphaser/logging.py +34 -0
- svphaser-2.0.1/src/svphaser/phasing/__init__.py +20 -0
- svphaser-2.0.1/src/svphaser/phasing/_workers.py +106 -0
- svphaser-2.0.1/src/svphaser/phasing/algorithms.py +71 -0
- svphaser-2.0.1/src/svphaser/phasing/io.py +258 -0
- svphaser-2.0.1/src/svphaser/phasing/types.py +31 -0
- svphaser-2.0.1/src/svphaser/py.typed +0 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Ignore all files in output directories
|
|
2
|
+
output_*/
|
|
3
|
+
output/
|
|
4
|
+
__pycache__/
|
|
5
|
+
# Ignore all CSV and VCF files anywhere (optional, only if you want)
|
|
6
|
+
*.csv
|
|
7
|
+
*.vcf
|
|
8
|
+
# Or, ignore only those in the output directories:
|
|
9
|
+
output_*/**/*.csv
|
|
10
|
+
output_*/**/*.vcf
|
|
11
|
+
output/**/*.csv
|
|
12
|
+
output/**/*.vcf
|
svphaser-2.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Structural and Functional Genomics Laboratory
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
svphaser-2.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: svphaser
|
|
3
|
+
Version: 2.0.1
|
|
4
|
+
Summary: Structural-variant phasing from HP-tagged long-read BAMs
|
|
5
|
+
Project-URL: Homepage, https://github.com/your-org/svphaser
|
|
6
|
+
Project-URL: Issues, https://github.com/your-org/svphaser/issues
|
|
7
|
+
Project-URL: Source, https://github.com/your-org/svphaser
|
|
8
|
+
Author-email: SvPhaser Team <you@lab.org>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: BAM,ONT,VCF,genomics,long-reads,phasing,structural-variants
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: cyvcf2>=0.30
|
|
25
|
+
Requires-Dist: pandas>=2.1
|
|
26
|
+
Requires-Dist: pysam>=0.23
|
|
27
|
+
Requires-Dist: typer>=0.14
|
|
28
|
+
Provides-Extra: bench
|
|
29
|
+
Requires-Dist: py-spy>=0.3; extra == 'bench'
|
|
30
|
+
Requires-Dist: pytest-benchmark>=4.0; extra == 'bench'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: black>=24.3; extra == 'dev'
|
|
33
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
34
|
+
Requires-Dist: hypothesis>=6.90; extra == 'dev'
|
|
35
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
36
|
+
Requires-Dist: pandas-stubs>=2.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pre-commit>=3.6; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-cov>=5; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
41
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
42
|
+
Requires-Dist: tox>=4.10; extra == 'dev'
|
|
43
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
44
|
+
Provides-Extra: plots
|
|
45
|
+
Requires-Dist: matplotlib>=3.7; extra == 'plots'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
# SvPhaser
|
|
49
|
+
|
|
50
|
+
> **Haplotype‑aware structural‑variant genotyper for long‑read data**
|
|
51
|
+
|
|
52
|
+
[](https://pypi.org/project/svphaser)
|
|
53
|
+
[](https://github.com/your‑org/SvPhaser/actions)
|
|
54
|
+
[](LICENSE)
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
`SvPhaser` phases **pre‑called structural variants (SVs)** using *HP‑tagged* long‑read alignments (PacBio HiFi, ONT Q20+, …). Think of it as *WhatsHap* for insertions/deletions/duplications: we do **not** discover SVs; we assign each variant a haplotype genotype (`0|1`, `1|0`, `1|1`, or `./.`) together with a **Genotype Quality (GQ)** score – all in a single, embarrassingly‑parallel pass over the genome.
|
|
59
|
+
|
|
60
|
+
## Key highlights
|
|
61
|
+
|
|
62
|
+
* **Fast, per‑chromosome multiprocessing** – linear scale‑out on 32‑core workstations.
|
|
63
|
+
* **Deterministic Δ‑based decision tree** – no MCMC or hidden state machines.
|
|
64
|
+
* **Friendly CLI** (`svphaser phase …`) and importable Python API.
|
|
65
|
+
* **Seamless VCF injection** – adds `HP_GT`, `HP_GQ`, `HP_GQBIN` INFO tags while copying the original header verbatim.
|
|
66
|
+
* **Configurable confidence bins** and publication‑ready plots (see `result_images/`).
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Requires Python ≥3.9
|
|
74
|
+
pip install svphaser # PyPI (coming soon)
|
|
75
|
+
# or
|
|
76
|
+
pip install git+https://github.com/your‑org/SvPhaser.git@v0.2.0
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
`cyvcf2`, `pysam`, `typer[all]`, and `pandas` are pulled in automatically.
|
|
80
|
+
|
|
81
|
+
## Quick‑start
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
svphaser phase \
|
|
85
|
+
sample_unphased.vcf.gz \
|
|
86
|
+
sample.sorted_phased.bam \
|
|
87
|
+
--out-dir results/ \
|
|
88
|
+
--min-support 10 \
|
|
89
|
+
--major-delta 0.70 \
|
|
90
|
+
--equal-delta 0.25 \
|
|
91
|
+
--gq-bins "30:High,10:Moderate" \
|
|
92
|
+
--threads 32
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Outputs (written inside **`results/`**)
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
sample_unphased_phased.vcf # original VCF + HP_* INFO fields
|
|
99
|
+
sample_unphased_phased.csv # tidy table for plotting / downstream R
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
See [`docs/methodology.md`](docs/Methodology.md) and the flow‑chart below for algorithmic details.
|
|
103
|
+
|
|
104
|
+

|
|
105
|
+
|
|
106
|
+
## Folder layout
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
SvPhaser/
|
|
110
|
+
├─ src/svphaser/ # importable package
|
|
111
|
+
│ ├─ cli.py # Typer entry‑point
|
|
112
|
+
│ ├─ logging.py # unified log setup
|
|
113
|
+
│ └─ phasing/
|
|
114
|
+
│ ├─ algorithms.py # core maths
|
|
115
|
+
│ ├─ io.py # driver & I/O
|
|
116
|
+
│ ├─ _workers.py # per‑chrom processes
|
|
117
|
+
│ └─ types.py # thin dataclasses
|
|
118
|
+
├─ tests/ # pytest suite + mini data
|
|
119
|
+
├─ docs/ # extra documentation
|
|
120
|
+
├─ result_images/ # generated plots & diagrams
|
|
121
|
+
└─ CHANGELOG.md
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Python usage
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from pathlib import Path
|
|
128
|
+
from svphaser.phasing.io import phase_vcf
|
|
129
|
+
|
|
130
|
+
phase_vcf(
|
|
131
|
+
Path("sample.vcf.gz"),
|
|
132
|
+
Path("sample.bam"),
|
|
133
|
+
out_dir=Path("results"),
|
|
134
|
+
min_support=10,
|
|
135
|
+
major_delta=0.70,
|
|
136
|
+
equal_delta=0.25,
|
|
137
|
+
gq_bins="30:High,10:Moderate",
|
|
138
|
+
threads=8,
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The resulting `DataFrame` can be loaded from the CSV for custom analytics.
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
## Development & contributing
|
|
148
|
+
|
|
149
|
+
1. Clone and create a virtual env:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
git clone https://github.com/your‑org/SvPhaser.git && cd SvPhaser
|
|
153
|
+
python -m venv .venv && source .venv/bin/activate
|
|
154
|
+
pip install -e .[dev]
|
|
155
|
+
```
|
|
156
|
+
2. Run the test‑suite & type checks:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
pytest -q
|
|
160
|
+
mypy src/svphaser
|
|
161
|
+
black --check src tests
|
|
162
|
+
```
|
|
163
|
+
3. Send a PR targeting the **`dev`** branch; one topic per PR.
|
|
164
|
+
|
|
165
|
+
Please read `CONTRIBUTING.md` (to come) for style‑guides and the DCO sign‑off.
|
|
166
|
+
|
|
167
|
+
## Citing SvPhaser
|
|
168
|
+
|
|
169
|
+
If SvPhaser contributed to your research, please cite:
|
|
170
|
+
|
|
171
|
+
```bibtex
|
|
172
|
+
@software{svphaser2024,
|
|
173
|
+
author = {Pranjul Mishra, Sachin Ghadak, CeNT Lab},
|
|
174
|
+
title = {SvPhaser: haplotype‑aware SV genotyping},
|
|
175
|
+
version = {0.2.0},
|
|
176
|
+
date = {2024-06-18},
|
|
177
|
+
url = {https://github.com/your‑org/SvPhaser}
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
`SvPhaser` is released under the MIT License – see [`LICENSE`](LICENSE).
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
## 📬 Contact
|
|
192
|
+
|
|
193
|
+
Developed by **Team5** (*BioAI Hackathon*) – Sachin Gadakh & Pranjul Mishra.
|
|
194
|
+
|
|
195
|
+
Lead contacts:
|
|
196
|
+
• [pranjul.mishra@proton.me](mailto:pranjul.mishra@proton.me)
|
|
197
|
+
• [s.gadakh@cent.uw.edu.pl](mailto:s.gadakh@cent.uw.edu.pl)
|
|
198
|
+
|
|
199
|
+
Feedback, feature requests and bug reports are all appreciated — feel free to open a GitHub issue or reach out by e‑mail.
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
*Happy phasing!*
|
svphaser-2.0.1/README.md
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# SvPhaser
|
|
2
|
+
|
|
3
|
+
> **Haplotype‑aware structural‑variant genotyper for long‑read data**
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/svphaser)
|
|
6
|
+
[](https://github.com/your‑org/SvPhaser/actions)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
`SvPhaser` phases **pre‑called structural variants (SVs)** using *HP‑tagged* long‑read alignments (PacBio HiFi, ONT Q20+, …). Think of it as *WhatsHap* for insertions/deletions/duplications: we do **not** discover SVs; we assign each variant a haplotype genotype (`0|1`, `1|0`, `1|1`, or `./.`) together with a **Genotype Quality (GQ)** score – all in a single, embarrassingly‑parallel pass over the genome.
|
|
12
|
+
|
|
13
|
+
## Key highlights
|
|
14
|
+
|
|
15
|
+
* **Fast, per‑chromosome multiprocessing** – linear scale‑out on 32‑core workstations.
|
|
16
|
+
* **Deterministic Δ‑based decision tree** – no MCMC or hidden state machines.
|
|
17
|
+
* **Friendly CLI** (`svphaser phase …`) and importable Python API.
|
|
18
|
+
* **Seamless VCF injection** – adds `HP_GT`, `HP_GQ`, `HP_GQBIN` INFO tags while copying the original header verbatim.
|
|
19
|
+
* **Configurable confidence bins** and publication‑ready plots (see `result_images/`).
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Requires Python ≥3.9
|
|
27
|
+
pip install svphaser # PyPI (coming soon)
|
|
28
|
+
# or
|
|
29
|
+
pip install git+https://github.com/your‑org/SvPhaser.git@v0.2.0
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
`cyvcf2`, `pysam`, `typer[all]`, and `pandas` are pulled in automatically.
|
|
33
|
+
|
|
34
|
+
## Quick‑start
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
svphaser phase \
|
|
38
|
+
sample_unphased.vcf.gz \
|
|
39
|
+
sample.sorted_phased.bam \
|
|
40
|
+
--out-dir results/ \
|
|
41
|
+
--min-support 10 \
|
|
42
|
+
--major-delta 0.70 \
|
|
43
|
+
--equal-delta 0.25 \
|
|
44
|
+
--gq-bins "30:High,10:Moderate" \
|
|
45
|
+
--threads 32
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Outputs (written inside **`results/`**)
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
sample_unphased_phased.vcf # original VCF + HP_* INFO fields
|
|
52
|
+
sample_unphased_phased.csv # tidy table for plotting / downstream R
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
See [`docs/methodology.md`](docs/Methodology.md) and the flow‑chart below for algorithmic details.
|
|
56
|
+
|
|
57
|
+

|
|
58
|
+
|
|
59
|
+
## Folder layout
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
SvPhaser/
|
|
63
|
+
├─ src/svphaser/ # importable package
|
|
64
|
+
│ ├─ cli.py # Typer entry‑point
|
|
65
|
+
│ ├─ logging.py # unified log setup
|
|
66
|
+
│ └─ phasing/
|
|
67
|
+
│ ├─ algorithms.py # core maths
|
|
68
|
+
│ ├─ io.py # driver & I/O
|
|
69
|
+
│ ├─ _workers.py # per‑chrom processes
|
|
70
|
+
│ └─ types.py # thin dataclasses
|
|
71
|
+
├─ tests/ # pytest suite + mini data
|
|
72
|
+
├─ docs/ # extra documentation
|
|
73
|
+
├─ result_images/ # generated plots & diagrams
|
|
74
|
+
└─ CHANGELOG.md
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Python usage
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from pathlib import Path
|
|
81
|
+
from svphaser.phasing.io import phase_vcf
|
|
82
|
+
|
|
83
|
+
phase_vcf(
|
|
84
|
+
Path("sample.vcf.gz"),
|
|
85
|
+
Path("sample.bam"),
|
|
86
|
+
out_dir=Path("results"),
|
|
87
|
+
min_support=10,
|
|
88
|
+
major_delta=0.70,
|
|
89
|
+
equal_delta=0.25,
|
|
90
|
+
gq_bins="30:High,10:Moderate",
|
|
91
|
+
threads=8,
|
|
92
|
+
)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
The resulting `DataFrame` can be loaded from the CSV for custom analytics.
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
## Development & contributing
|
|
101
|
+
|
|
102
|
+
1. Clone and create a virtual env:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
git clone https://github.com/your‑org/SvPhaser.git && cd SvPhaser
|
|
106
|
+
python -m venv .venv && source .venv/bin/activate
|
|
107
|
+
pip install -e .[dev]
|
|
108
|
+
```
|
|
109
|
+
2. Run the test‑suite & type checks:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
pytest -q
|
|
113
|
+
mypy src/svphaser
|
|
114
|
+
black --check src tests
|
|
115
|
+
```
|
|
116
|
+
3. Send a PR targeting the **`dev`** branch; one topic per PR.
|
|
117
|
+
|
|
118
|
+
Please read `CONTRIBUTING.md` (to come) for style‑guides and the DCO sign‑off.
|
|
119
|
+
|
|
120
|
+
## Citing SvPhaser
|
|
121
|
+
|
|
122
|
+
If SvPhaser contributed to your research, please cite:
|
|
123
|
+
|
|
124
|
+
```bibtex
|
|
125
|
+
@software{svphaser2024,
|
|
126
|
+
author = {Pranjul Mishra, Sachin Ghadak, CeNT Lab},
|
|
127
|
+
title = {SvPhaser: haplotype‑aware SV genotyping},
|
|
128
|
+
version = {0.2.0},
|
|
129
|
+
date = {2024-06-18},
|
|
130
|
+
url = {https://github.com/your‑org/SvPhaser}
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
## License
|
|
138
|
+
`SvPhaser` is released under the MIT License – see [`LICENSE`](LICENSE).
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
## 📬 Contact
|
|
145
|
+
|
|
146
|
+
Developed by **Team5** (*BioAI Hackathon*) – Sachin Gadakh & Pranjul Mishra.
|
|
147
|
+
|
|
148
|
+
Lead contacts:
|
|
149
|
+
• [pranjul.mishra@proton.me](mailto:pranjul.mishra@proton.me)
|
|
150
|
+
• [s.gadakh@cent.uw.edu.pl](mailto:s.gadakh@cent.uw.edu.pl)
|
|
151
|
+
|
|
152
|
+
Feedback, feature requests and bug reports are all appreciated — feel free to open a GitHub issue or reach out by e‑mail.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
*Happy phasing!*
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# -------------------------------------------------------------------
|
|
2
|
+
# SvPhaser • project metadata (PEP-621) with hatch-vcs versioning
|
|
3
|
+
# -------------------------------------------------------------------
|
|
4
|
+
[build-system]
|
|
5
|
+
requires = ["hatchling>=1.24", "hatch-vcs>=0.4"]
|
|
6
|
+
build-backend = "hatchling.build"
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "svphaser"
|
|
10
|
+
dynamic = ["version"]
|
|
11
|
+
description = "Structural-variant phasing from HP-tagged long-read BAMs"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.9"
|
|
14
|
+
license = { text = "MIT" }
|
|
15
|
+
authors = [{ name = "SvPhaser Team", email = "you@lab.org" }]
|
|
16
|
+
keywords = ["genomics", "structural-variants", "phasing", "long-reads", "ONT", "BAM", "VCF"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 4 - Beta",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
dependencies = [
|
|
32
|
+
"pysam>=0.23",
|
|
33
|
+
"cyvcf2>=0.30",
|
|
34
|
+
"typer>=0.14",
|
|
35
|
+
"pandas>=2.1",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
plots = ["matplotlib>=3.7"]
|
|
40
|
+
bench = ["pytest-benchmark>=4.0", "py-spy>=0.3"]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=8",
|
|
43
|
+
"pytest-cov>=5",
|
|
44
|
+
"pytest-xdist>=3.5",
|
|
45
|
+
"hypothesis>=6.90",
|
|
46
|
+
"ruff>=0.5",
|
|
47
|
+
"mypy>=1.8",
|
|
48
|
+
"black>=24.3",
|
|
49
|
+
"pre-commit>=3.6",
|
|
50
|
+
"build>=1.2",
|
|
51
|
+
"twine>=5.0",
|
|
52
|
+
"tox>=4.10",
|
|
53
|
+
"pandas-stubs>=2.0",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[project.urls]
|
|
57
|
+
Homepage = "https://github.com/your-org/svphaser"
|
|
58
|
+
Issues = "https://github.com/your-org/svphaser/issues"
|
|
59
|
+
Source = "https://github.com/your-org/svphaser"
|
|
60
|
+
|
|
61
|
+
[project.scripts]
|
|
62
|
+
svphaser = "svphaser.cli:app"
|
|
63
|
+
|
|
64
|
+
# -------------------------------------------------------------------
|
|
65
|
+
# Hatch build config (src/ layout + typed package)
|
|
66
|
+
# -------------------------------------------------------------------
|
|
67
|
+
[tool.hatch.build.targets.wheel]
|
|
68
|
+
packages = ["src/svphaser"]
|
|
69
|
+
include = ["src/svphaser/py.typed"]
|
|
70
|
+
|
|
71
|
+
[tool.hatch.build.targets.sdist]
|
|
72
|
+
include = ["src/**", "README.md", "LICENSE", "pyproject.toml"]
|
|
73
|
+
|
|
74
|
+
# Version from Git tags like v2.0.0 → 2.0.0
|
|
75
|
+
[tool.hatch.version]
|
|
76
|
+
source = "vcs"
|
|
77
|
+
|
|
78
|
+
[tool.hatch.build.hooks.vcs]
|
|
79
|
+
version-file = "src/svphaser/_version.py"
|
|
80
|
+
tag-pattern = "v(?P<version>.+)"
|
|
81
|
+
|
|
82
|
+
# -------------------------------------------------------------------
|
|
83
|
+
# Tooling (ruff / black / mypy / pytest / coverage)
|
|
84
|
+
# -------------------------------------------------------------------
|
|
85
|
+
[tool.black]
|
|
86
|
+
line-length = 100
|
|
87
|
+
target-version = ["py39"]
|
|
88
|
+
|
|
89
|
+
[tool.ruff]
|
|
90
|
+
line-length = 100
|
|
91
|
+
target-version = "py39"
|
|
92
|
+
exclude = [
|
|
93
|
+
"notebooks/**",
|
|
94
|
+
"docs/**/Presentation*/**",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
[tool.ruff.lint]
|
|
98
|
+
select = ["E", "F", "W", "I", "UP", "B", "C90"]
|
|
99
|
+
ignore = ["E203"] # black-compatible slicing
|
|
100
|
+
|
|
101
|
+
[tool.ruff.lint.mccabe]
|
|
102
|
+
max-complexity = 12
|
|
103
|
+
|
|
104
|
+
[tool.mypy]
|
|
105
|
+
python_version = "3.9"
|
|
106
|
+
strict = true
|
|
107
|
+
show_error_codes = true
|
|
108
|
+
warn_unreachable = true
|
|
109
|
+
ignore_missing_imports = true
|
|
110
|
+
|
|
111
|
+
[tool.pytest.ini_options]
|
|
112
|
+
minversion = "8.0"
|
|
113
|
+
addopts = "-q --strict-markers --disable-warnings"
|
|
114
|
+
testpaths = ["tests"]
|
|
115
|
+
xfail_strict = true
|
|
116
|
+
|
|
117
|
+
[tool.coverage.run]
|
|
118
|
+
branch = true
|
|
119
|
+
source = ["svphaser"]
|
|
120
|
+
|
|
121
|
+
[tool.coverage.report]
|
|
122
|
+
exclude_lines = [
|
|
123
|
+
"pragma: no cover",
|
|
124
|
+
"if TYPE_CHECKING:",
|
|
125
|
+
"raise NotImplementedError",
|
|
126
|
+
]
|
|
127
|
+
precision = 1
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Top-level SvPhaser package.
|
|
2
|
+
|
|
3
|
+
Public surface kept tiny: a version string and a convenience helper
|
|
4
|
+
that calls the library’s main phasing routine.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
# --------------------------------------------------------------------
|
|
12
|
+
# Robust version lookup:
|
|
13
|
+
# - Prefer installed package metadata (works for wheels and PEP 660 editables)
|
|
14
|
+
# - Fall back to placeholder in _version.py for raw-source/dev use
|
|
15
|
+
# --------------------------------------------------------------------
|
|
16
|
+
try:
|
|
17
|
+
from importlib.metadata import version as _pkg_version # Python 3.8+
|
|
18
|
+
|
|
19
|
+
__version__ = _pkg_version("svphaser")
|
|
20
|
+
except Exception:
|
|
21
|
+
try:
|
|
22
|
+
from ._version import __version__ # "0+unknown" in repo; overwritten in builds
|
|
23
|
+
except Exception: # highly defensive
|
|
24
|
+
__version__ = "0+unknown"
|
|
25
|
+
|
|
26
|
+
# Centralized defaults (keep CLI in sync)
|
|
27
|
+
DEFAULT_MIN_SUPPORT: int = 10
|
|
28
|
+
DEFAULT_MAJOR_DELTA: float = 0.70
|
|
29
|
+
DEFAULT_EQUAL_DELTA: float = 0.25
|
|
30
|
+
DEFAULT_GQ_BINS: str = "30:High,10:Moderate"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def phase(
|
|
34
|
+
sv_vcf: Path | str,
|
|
35
|
+
bam: Path | str,
|
|
36
|
+
/,
|
|
37
|
+
*,
|
|
38
|
+
out_dir: Path | str = ".",
|
|
39
|
+
min_support: int = DEFAULT_MIN_SUPPORT,
|
|
40
|
+
major_delta: float = DEFAULT_MAJOR_DELTA,
|
|
41
|
+
equal_delta: float = DEFAULT_EQUAL_DELTA,
|
|
42
|
+
gq_bins: str = DEFAULT_GQ_BINS,
|
|
43
|
+
threads: int | None = None,
|
|
44
|
+
) -> tuple[Path, Path]:
|
|
45
|
+
"""Phase *sv_vcf* using HP-tagged *bam*, writing outputs into *out_dir*.
|
|
46
|
+
|
|
47
|
+
Thin wrapper around :py:func:`svphaser.phasing.io.phase_vcf` so users/tests
|
|
48
|
+
can skip importing submodules.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
(out_vcf_path, out_csv_path)
|
|
53
|
+
"""
|
|
54
|
+
from .phasing.io import phase_vcf # local import avoids heavy deps at import-time
|
|
55
|
+
|
|
56
|
+
out_dir_p = Path(out_dir)
|
|
57
|
+
out_dir_p.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
|
|
59
|
+
stem = Path(sv_vcf).name
|
|
60
|
+
if stem.endswith(".vcf.gz"):
|
|
61
|
+
stem = stem[:-7]
|
|
62
|
+
elif stem.endswith(".vcf"):
|
|
63
|
+
stem = stem[:-4]
|
|
64
|
+
|
|
65
|
+
out_vcf = out_dir_p / f"{stem}_phased.vcf"
|
|
66
|
+
out_csv = out_dir_p / f"{stem}_phased.csv"
|
|
67
|
+
|
|
68
|
+
phase_vcf(
|
|
69
|
+
sv_vcf,
|
|
70
|
+
bam,
|
|
71
|
+
out_dir=out_dir_p, # type: ignore[arg-type]
|
|
72
|
+
min_support=min_support,
|
|
73
|
+
major_delta=major_delta,
|
|
74
|
+
equal_delta=equal_delta,
|
|
75
|
+
gq_bins=gq_bins, # type: ignore[arg-type]
|
|
76
|
+
threads=threads,
|
|
77
|
+
)
|
|
78
|
+
return out_vcf, out_csv
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
__all__ = [
|
|
82
|
+
"phase",
|
|
83
|
+
"__version__",
|
|
84
|
+
"DEFAULT_MIN_SUPPORT",
|
|
85
|
+
"DEFAULT_MAJOR_DELTA",
|
|
86
|
+
"DEFAULT_EQUAL_DELTA",
|
|
87
|
+
"DEFAULT_GQ_BINS",
|
|
88
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '2.0.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (2, 0, 1)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
svphaser.cli
|
|
4
|
+
============
|
|
5
|
+
Command-line interface for **SvPhaser**.
|
|
6
|
+
|
|
7
|
+
The program writes two files inside **--out-dir** (or the CWD):
|
|
8
|
+
|
|
9
|
+
* ``<stem>_phased.vcf`` (uncompressed; GT/GQ and optional HP_GQBIN injected)
|
|
10
|
+
* ``<stem>_phased.csv`` (tabular summary including gq_label column)
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Annotated
|
|
16
|
+
|
|
17
|
+
import typer
|
|
18
|
+
|
|
19
|
+
from svphaser import (
|
|
20
|
+
DEFAULT_EQUAL_DELTA,
|
|
21
|
+
DEFAULT_GQ_BINS,
|
|
22
|
+
DEFAULT_MAJOR_DELTA,
|
|
23
|
+
DEFAULT_MIN_SUPPORT,
|
|
24
|
+
__version__,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
app = typer.Typer(add_completion=False, rich_markup_mode="rich")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _version_callback(value: bool):
|
|
31
|
+
if value:
|
|
32
|
+
typer.echo(__version__)
|
|
33
|
+
raise typer.Exit()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.callback()
|
|
37
|
+
def main(
|
|
38
|
+
version: Annotated[
|
|
39
|
+
bool | None,
|
|
40
|
+
typer.Option(
|
|
41
|
+
"--version",
|
|
42
|
+
help="Show SvPhaser version and exit.",
|
|
43
|
+
is_flag=True,
|
|
44
|
+
callback=_version_callback,
|
|
45
|
+
),
|
|
46
|
+
] = None
|
|
47
|
+
):
|
|
48
|
+
"""SvPhaser – Structural-variant phasing from HP-tagged long-read BAMs."""
|
|
49
|
+
# no-op; callback handles --version
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
54
|
+
# phase command
|
|
55
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
56
|
+
@app.command("phase")
|
|
57
|
+
def phase_cmd(
|
|
58
|
+
sv_vcf: Annotated[
|
|
59
|
+
Path,
|
|
60
|
+
typer.Argument(
|
|
61
|
+
exists=True,
|
|
62
|
+
help="Input *un-phased* SV VCF (.vcf or .vcf.gz)",
|
|
63
|
+
),
|
|
64
|
+
],
|
|
65
|
+
bam: Annotated[
|
|
66
|
+
Path,
|
|
67
|
+
typer.Argument(
|
|
68
|
+
exists=True,
|
|
69
|
+
help="Long-read BAM/CRAM with HP tags",
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
out_dir: Annotated[
|
|
73
|
+
Path,
|
|
74
|
+
typer.Option(
|
|
75
|
+
"--out-dir",
|
|
76
|
+
"-o",
|
|
77
|
+
exists=False,
|
|
78
|
+
file_okay=False,
|
|
79
|
+
dir_okay=True,
|
|
80
|
+
writable=True,
|
|
81
|
+
help=(
|
|
82
|
+
"Directory in which to write <stem>_phased.vcf & .csv "
|
|
83
|
+
"(created if missing; defaults to current dir)."
|
|
84
|
+
),
|
|
85
|
+
show_default=True,
|
|
86
|
+
),
|
|
87
|
+
] = Path("."),
|
|
88
|
+
# ---------- thresholds ------------------------------------------------
|
|
89
|
+
min_support: Annotated[
|
|
90
|
+
int,
|
|
91
|
+
typer.Option(
|
|
92
|
+
help=(
|
|
93
|
+
"Minimum HP-tagged reads per haplotype. "
|
|
94
|
+
"SVs where *both* n1 AND n2 fall below this "
|
|
95
|
+
"are dropped entirely."
|
|
96
|
+
),
|
|
97
|
+
show_default=True,
|
|
98
|
+
),
|
|
99
|
+
] = DEFAULT_MIN_SUPPORT,
|
|
100
|
+
major_delta: Annotated[
|
|
101
|
+
float,
|
|
102
|
+
typer.Option(
|
|
103
|
+
help="r >= this ⇒ strong majority ⇒ GT 1|0 or 0|1",
|
|
104
|
+
show_default=True,
|
|
105
|
+
),
|
|
106
|
+
] = DEFAULT_MAJOR_DELTA,
|
|
107
|
+
equal_delta: Annotated[
|
|
108
|
+
float,
|
|
109
|
+
typer.Option(
|
|
110
|
+
help="|n1−n2|/N ≤ this ⇒ near-tie ⇒ GT 1|1",
|
|
111
|
+
show_default=True,
|
|
112
|
+
),
|
|
113
|
+
] = DEFAULT_EQUAL_DELTA,
|
|
114
|
+
# ---------- confidence bins ------------------------------------------
|
|
115
|
+
gq_bins: Annotated[
|
|
116
|
+
str,
|
|
117
|
+
typer.Option(
|
|
118
|
+
help=(
|
|
119
|
+
"Comma-separated GQ≥threshold:Label definitions "
|
|
120
|
+
"(e.g. '30:High,10:Moderate'). Labels appear in the CSV "
|
|
121
|
+
"[gq_label] and in the VCF INFO field HP_GQBIN when set."
|
|
122
|
+
),
|
|
123
|
+
show_default=True,
|
|
124
|
+
),
|
|
125
|
+
] = DEFAULT_GQ_BINS,
|
|
126
|
+
# ---------- multiprocessing ------------------------------------------
|
|
127
|
+
threads: Annotated[
|
|
128
|
+
int | None,
|
|
129
|
+
typer.Option(
|
|
130
|
+
"-t",
|
|
131
|
+
"--threads",
|
|
132
|
+
help="Worker processes to use (defaults to all CPU cores).",
|
|
133
|
+
show_default=True,
|
|
134
|
+
),
|
|
135
|
+
] = None,
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Phase structural variants using HP-tagged read evidence."""
|
|
138
|
+
# Initialise logging BEFORE we import anything that might log
|
|
139
|
+
from svphaser.logging import init as _init_logging
|
|
140
|
+
|
|
141
|
+
_init_logging("INFO") # or "DEBUG" if you want more detail
|
|
142
|
+
|
|
143
|
+
# Resolve output paths
|
|
144
|
+
if not out_dir.exists():
|
|
145
|
+
out_dir.mkdir(parents=True)
|
|
146
|
+
|
|
147
|
+
stem = sv_vcf.name
|
|
148
|
+
if stem.endswith(".vcf.gz"):
|
|
149
|
+
stem = stem[:-7]
|
|
150
|
+
elif stem.endswith(".vcf"):
|
|
151
|
+
stem = stem[:-4]
|
|
152
|
+
|
|
153
|
+
out_vcf = out_dir / f"{stem}_phased.vcf"
|
|
154
|
+
out_csv = out_dir / f"{stem}_phased.csv"
|
|
155
|
+
|
|
156
|
+
# Lazy import so `svphaser --help` works without heavy deps
|
|
157
|
+
from svphaser.phasing.io import phase_vcf
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
phase_vcf(
|
|
161
|
+
sv_vcf,
|
|
162
|
+
bam,
|
|
163
|
+
out_dir=out_dir, # type: ignore[arg-type]
|
|
164
|
+
min_support=min_support,
|
|
165
|
+
major_delta=major_delta,
|
|
166
|
+
equal_delta=equal_delta,
|
|
167
|
+
gq_bins=gq_bins, # type: ignore[arg-type]
|
|
168
|
+
threads=threads,
|
|
169
|
+
)
|
|
170
|
+
typer.secho(f"✔ Phased VCF → {out_vcf}", fg=typer.colors.GREEN)
|
|
171
|
+
typer.secho(f"✔ Phased CSV → {out_csv}", fg=typer.colors.GREEN)
|
|
172
|
+
except Exception: # pragma: no cover
|
|
173
|
+
typer.secho(
|
|
174
|
+
"[SvPhaser] 💥 Unhandled error during phasing",
|
|
175
|
+
fg=typer.colors.RED,
|
|
176
|
+
)
|
|
177
|
+
raise
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
svphaser.logging
|
|
3
|
+
================
|
|
4
|
+
One-liner that gives us colour-free, concise log messages on stderr.
|
|
5
|
+
|
|
6
|
+
Importing this module *once* anywhere in the program is enough.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
_Level = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def init(level: _Level | int = "INFO") -> None:
|
|
19
|
+
"""Install a basic stderr handler – safe to call multiple times."""
|
|
20
|
+
root = logging.getLogger()
|
|
21
|
+
if root.handlers: # already initialised
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
logging.basicConfig(
|
|
25
|
+
level=level,
|
|
26
|
+
stream=sys.stderr,
|
|
27
|
+
format="%(levelname).1s | %(message)s",
|
|
28
|
+
datefmt="%H:%M:%S",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_logger(name: str) -> logging.Logger:
|
|
33
|
+
"""Convenience helper for module-level loggers."""
|
|
34
|
+
return logging.getLogger(name)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# src/svphaser/phasing/__init__.py
|
|
2
|
+
"""Public API for svphaser.phasing."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
from .algorithms import classify_haplotype, phasing_gq
|
|
9
|
+
from .io import phase_vcf
|
|
10
|
+
from .types import WorkerOpts
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"phase_vcf",
|
|
14
|
+
"classify_haplotype",
|
|
15
|
+
"phasing_gq",
|
|
16
|
+
"WorkerOpts",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
# Library logging: don't emit anything unless the app configures it.
|
|
20
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
svphaser.phasing._workers
|
|
3
|
+
=========================
|
|
4
|
+
Worker-process code. Each worker:
|
|
5
|
+
|
|
6
|
+
1. Opens the (possibly un-indexed) SV VCF.
|
|
7
|
+
2. Scans only the records for *its* chromosome.
|
|
8
|
+
3. Counts HP-tagged reads in the long-read BAM/CRAM.
|
|
9
|
+
4. Classifies the haplotype + GQ, adds optional GQ-bin label.
|
|
10
|
+
5. Returns a DataFrame to the parent.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import pysam
|
|
19
|
+
from cyvcf2 import Reader, Variant # type: ignore
|
|
20
|
+
|
|
21
|
+
from .algorithms import classify_haplotype
|
|
22
|
+
from .types import WorkerOpts
|
|
23
|
+
|
|
24
|
+
__all__ = ["_phase_chrom_worker"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _count_hp_reads(
|
|
28
|
+
bam: pysam.AlignmentFile,
|
|
29
|
+
chrom: str,
|
|
30
|
+
start: int,
|
|
31
|
+
end: int,
|
|
32
|
+
) -> tuple[int, int]:
|
|
33
|
+
hp1 = hp2 = 0
|
|
34
|
+
for read in bam.fetch(chrom, max(0, start - 1), end + 1):
|
|
35
|
+
if read.is_unmapped or read.is_secondary or read.is_supplementary:
|
|
36
|
+
continue
|
|
37
|
+
if not read.has_tag("HP"):
|
|
38
|
+
continue
|
|
39
|
+
tag = read.get_tag("HP")
|
|
40
|
+
if tag == 1:
|
|
41
|
+
hp1 += 1
|
|
42
|
+
elif tag == 2:
|
|
43
|
+
hp2 += 1
|
|
44
|
+
return hp1, hp2
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _has_tabix_index(vcf_path: Path) -> bool:
|
|
48
|
+
"""Return True if <file>.tbi or <file>.csi exists (supports .vcf.gz.{tbi,csi})."""
|
|
49
|
+
return (
|
|
50
|
+
vcf_path.with_suffix(vcf_path.suffix + ".tbi").exists()
|
|
51
|
+
or vcf_path.with_suffix(vcf_path.suffix + ".csi").exists()
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _phase_chrom_worker(
|
|
56
|
+
chrom: str,
|
|
57
|
+
vcf_path: Path,
|
|
58
|
+
bam_path: Path,
|
|
59
|
+
opts: WorkerOpts,
|
|
60
|
+
) -> pd.DataFrame:
|
|
61
|
+
bam = pysam.AlignmentFile(str(bam_path), "rb")
|
|
62
|
+
rdr = Reader(str(vcf_path))
|
|
63
|
+
|
|
64
|
+
rows: list[dict[str, object]] = []
|
|
65
|
+
|
|
66
|
+
# Try fast random access first, fall back to linear scan if that fails
|
|
67
|
+
use_region_iter = _has_tabix_index(vcf_path)
|
|
68
|
+
records_iter = (
|
|
69
|
+
rdr(f"{chrom}") if use_region_iter else (rec for rec in rdr if rec.CHROM == chrom)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
for rec in records_iter: # type: ignore[arg-type]
|
|
73
|
+
assert isinstance(rec, Variant)
|
|
74
|
+
sv_end = rec.end if getattr(rec, "end", None) is not None else rec.POS
|
|
75
|
+
n1, n2 = _count_hp_reads(bam, chrom, rec.POS, sv_end)
|
|
76
|
+
|
|
77
|
+
gt, gq = classify_haplotype(
|
|
78
|
+
n1,
|
|
79
|
+
n2,
|
|
80
|
+
min_support=opts.min_support,
|
|
81
|
+
major_delta=opts.major_delta,
|
|
82
|
+
equal_delta=opts.equal_delta,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
row = dict(
|
|
86
|
+
chrom=chrom,
|
|
87
|
+
pos=rec.POS, # cyvcf2 POS is already 1-based
|
|
88
|
+
id=rec.ID or ".",
|
|
89
|
+
svtype=rec.INFO.get("SVTYPE", "NA"),
|
|
90
|
+
n1=n1,
|
|
91
|
+
n2=n2,
|
|
92
|
+
gt=gt,
|
|
93
|
+
gq=gq,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if opts.gq_bins:
|
|
97
|
+
for thr, label in opts.gq_bins:
|
|
98
|
+
if gq >= thr:
|
|
99
|
+
row["gq_label"] = label
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
rows.append(row) # type: ignore[assignment]
|
|
103
|
+
|
|
104
|
+
rdr.close()
|
|
105
|
+
bam.close()
|
|
106
|
+
return pd.DataFrame(rows)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Pure maths for SvPhaser – overflow-safe revision.
|
|
2
|
+
|
|
3
|
+
1) Exact binomial tail for small depth (N ≤ 200).
|
|
4
|
+
2) Continuity-corrected normal approximation for deep coverage (N > 200).
|
|
5
|
+
3) Phred GQ capped at 99.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
|
|
12
|
+
__all__ = ["classify_haplotype", "phasing_gq"]
|
|
13
|
+
|
|
14
|
+
NORMAL_THRESHOLD = 200 # switch to Gaussian SF above this depth
|
|
15
|
+
MAX_GQ = 99
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def phasing_gq(n1: int, n2: int) -> int:
|
|
19
|
+
"""Return Phred-scaled Genotype Quality, overflow-safe for deep data."""
|
|
20
|
+
total = n1 + n2
|
|
21
|
+
if total == 0:
|
|
22
|
+
return 0
|
|
23
|
+
|
|
24
|
+
k = max(n1, n2)
|
|
25
|
+
|
|
26
|
+
if total > NORMAL_THRESHOLD:
|
|
27
|
+
mu = total / 2.0
|
|
28
|
+
sigma = math.sqrt(total * 0.25)
|
|
29
|
+
z = (k - 0.5 - mu) / sigma
|
|
30
|
+
p_err = 0.5 * math.erfc(z / math.sqrt(2.0)) # survival function
|
|
31
|
+
else:
|
|
32
|
+
p = 0.5
|
|
33
|
+
tail = 0.0
|
|
34
|
+
for i in range(k, total + 1):
|
|
35
|
+
tail += math.comb(total, i) * (p**i) * ((1 - p) ** (total - i))
|
|
36
|
+
p_err = tail
|
|
37
|
+
|
|
38
|
+
p_err = max(p_err, 1e-300) # guard log(0)
|
|
39
|
+
gq = int(round(-10.0 * math.log10(p_err)))
|
|
40
|
+
return min(gq, MAX_GQ)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def classify_haplotype(
|
|
44
|
+
n1: int,
|
|
45
|
+
n2: int,
|
|
46
|
+
*,
|
|
47
|
+
min_support: int = 10,
|
|
48
|
+
major_delta: float = 0.70,
|
|
49
|
+
equal_delta: float = 0.25,
|
|
50
|
+
) -> tuple[str, int]:
|
|
51
|
+
"""Return (GT, GQ) using ratio thresholds and an overflow-safe GQ."""
|
|
52
|
+
total = n1 + n2
|
|
53
|
+
|
|
54
|
+
if n1 < min_support and n2 < min_support:
|
|
55
|
+
return "./.", 0
|
|
56
|
+
if total == 0:
|
|
57
|
+
return "./.", 0
|
|
58
|
+
|
|
59
|
+
gq = phasing_gq(n1, n2)
|
|
60
|
+
r1 = n1 / total
|
|
61
|
+
r2 = n2 / total
|
|
62
|
+
|
|
63
|
+
if r1 >= major_delta:
|
|
64
|
+
gt = "1|0"
|
|
65
|
+
elif r2 >= major_delta:
|
|
66
|
+
gt = "0|1"
|
|
67
|
+
elif abs(n1 - n2) / total <= equal_delta:
|
|
68
|
+
gt = "1|1"
|
|
69
|
+
else:
|
|
70
|
+
gt = "./."
|
|
71
|
+
return gt, gq
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""
|
|
2
|
+
svphaser.phasing.io
|
|
3
|
+
===================
|
|
4
|
+
High-level “engine” – orchestrates per-chromosome workers, merges results,
|
|
5
|
+
applies the global depth filter, then writes CSV + VCF.
|
|
6
|
+
|
|
7
|
+
Workers receive only simple (pickle-safe) arguments; each worker opens its
|
|
8
|
+
own BAM/VCF to avoid sharing handles between processes.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import multiprocessing as mp
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from cyvcf2 import Reader
|
|
19
|
+
|
|
20
|
+
from ._workers import _phase_chrom_worker
|
|
21
|
+
from .types import GQBin, WorkerOpts
|
|
22
|
+
|
|
23
|
+
__all__ = ["phase_vcf"]
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def phase_vcf(
|
|
29
|
+
sv_vcf: Path,
|
|
30
|
+
bam: Path,
|
|
31
|
+
*,
|
|
32
|
+
out_dir: Path,
|
|
33
|
+
min_support: int,
|
|
34
|
+
major_delta: float,
|
|
35
|
+
equal_delta: float,
|
|
36
|
+
gq_bins: str,
|
|
37
|
+
threads: int | None,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Phase *sv_vcf* against *bam* and write outputs to *out_dir*.
|
|
40
|
+
|
|
41
|
+
Files:
|
|
42
|
+
- *_phased.vcf
|
|
43
|
+
- *_phased.csv
|
|
44
|
+
- *_dropped_svs.csv
|
|
45
|
+
"""
|
|
46
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
# 1 ─ Parse --gq-bins → list[(int,label)]
|
|
49
|
+
bins: list[GQBin] = []
|
|
50
|
+
if gq_bins.strip():
|
|
51
|
+
for part in gq_bins.split(","):
|
|
52
|
+
thr_lbl = part.strip()
|
|
53
|
+
if not thr_lbl:
|
|
54
|
+
continue
|
|
55
|
+
try:
|
|
56
|
+
thr_s, lbl = thr_lbl.split(":")
|
|
57
|
+
except ValueError as err:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"Invalid gq-bin specifier: '{thr_lbl}'. " "Use '30:High,10:Moderate'."
|
|
60
|
+
) from err
|
|
61
|
+
bins.append((int(thr_s), lbl))
|
|
62
|
+
bins.sort(key=lambda x: x[0], reverse=True)
|
|
63
|
+
|
|
64
|
+
# 2 ─ Build immutable options holder for workers
|
|
65
|
+
opts = WorkerOpts(
|
|
66
|
+
min_support=min_support,
|
|
67
|
+
major_delta=major_delta,
|
|
68
|
+
equal_delta=equal_delta,
|
|
69
|
+
gq_bins=bins,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# 3 ─ Discover chromosomes (cheap – no variants parsed yet)
|
|
73
|
+
rdr = Reader(str(sv_vcf))
|
|
74
|
+
chroms: tuple[str, ...] = tuple(rdr.seqnames)
|
|
75
|
+
rdr.close()
|
|
76
|
+
|
|
77
|
+
# 4 ─ Launch one worker per chromosome (or ≤threads)
|
|
78
|
+
worker_args: list[tuple[str, Path, Path, WorkerOpts]] = [(c, sv_vcf, bam, opts) for c in chroms]
|
|
79
|
+
|
|
80
|
+
threads = threads or mp.cpu_count() or 1
|
|
81
|
+
logger.info("SvPhaser ▶ workers: %d", threads)
|
|
82
|
+
|
|
83
|
+
dataframes: list[pd.DataFrame] = []
|
|
84
|
+
|
|
85
|
+
# Use 'fork' when available (fast on Linux); fall back to 'spawn' elsewhere.
|
|
86
|
+
try:
|
|
87
|
+
ctx = mp.get_context("fork")
|
|
88
|
+
except ValueError:
|
|
89
|
+
ctx = mp.get_context("spawn")
|
|
90
|
+
|
|
91
|
+
if threads == 1:
|
|
92
|
+
# Serial path is handy for debugging
|
|
93
|
+
for args in worker_args:
|
|
94
|
+
df = _phase_chrom_worker(*args)
|
|
95
|
+
dataframes.append(df)
|
|
96
|
+
chrom = df.iloc[0]["chrom"] if not df.empty else "?"
|
|
97
|
+
logger.info("chr %-6s ✔ phased %5d SVs", chrom, len(df))
|
|
98
|
+
else:
|
|
99
|
+
with ctx.Pool(processes=threads) as pool:
|
|
100
|
+
for df in pool.starmap(_phase_chrom_worker, worker_args, chunksize=1):
|
|
101
|
+
dataframes.append(df)
|
|
102
|
+
chrom = df.iloc[0]["chrom"] if not df.empty else "?"
|
|
103
|
+
logger.info("chr %-6s ✔ phased %5d SVs", chrom, len(df))
|
|
104
|
+
|
|
105
|
+
# 5 ─ Merge & apply *global* depth filter
|
|
106
|
+
if dataframes:
|
|
107
|
+
merged = pd.concat(dataframes, ignore_index=True)
|
|
108
|
+
else:
|
|
109
|
+
merged = pd.DataFrame(
|
|
110
|
+
columns=["chrom", "pos", "id", "svtype", "n1", "n2", "gt", "gq", "gq_label"]
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
pre = len(merged)
|
|
114
|
+
keep = ~((merged["n1"] < min_support) & (merged["n2"] < min_support))
|
|
115
|
+
|
|
116
|
+
stem = sv_vcf.name.removesuffix(".vcf.gz").removesuffix(".vcf")
|
|
117
|
+
|
|
118
|
+
# Save dropped SVs for transparency
|
|
119
|
+
dropped_csv = out_dir / f"{stem}_dropped_svs.csv"
|
|
120
|
+
merged.loc[~keep].to_csv(dropped_csv, index=False)
|
|
121
|
+
logger.info("Dropped SVs → %s (%d SVs)", dropped_csv, int((~keep).sum()))
|
|
122
|
+
|
|
123
|
+
kept = merged.loc[keep].reset_index(drop=True)
|
|
124
|
+
if dropped := pre - len(kept):
|
|
125
|
+
logger.info("Depth filter removed %d SVs", dropped)
|
|
126
|
+
|
|
127
|
+
# 6 ─ Write CSV
|
|
128
|
+
out_csv = out_dir / f"{stem}_phased.csv"
|
|
129
|
+
kept.to_csv(out_csv, index=False)
|
|
130
|
+
logger.info("CSV → %s (%d SVs)", out_csv, len(kept))
|
|
131
|
+
|
|
132
|
+
# 7 ─ Write VCF
|
|
133
|
+
out_vcf = out_dir / f"{stem}_phased.vcf"
|
|
134
|
+
_write_phased_vcf(out_vcf, sv_vcf, kept, gqbin_in_header=bool(bins))
|
|
135
|
+
logger.info("VCF → %s", out_vcf)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
139
|
+
# Small helpers to keep complexity down
|
|
140
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
141
|
+
def _vcf_info_lookup(
|
|
142
|
+
in_vcf: Path,
|
|
143
|
+
) -> tuple[dict[tuple[str, int, str], dict[str, object]], list[str], str]:
|
|
144
|
+
"""Scan input VCF once: return (lookup, raw_header_lines, sample_name)."""
|
|
145
|
+
rdr = Reader(str(in_vcf))
|
|
146
|
+
raw_header_lines = rdr.raw_header.strip().splitlines()
|
|
147
|
+
sample_name = rdr.samples[0] if rdr.samples else "SAMPLE"
|
|
148
|
+
|
|
149
|
+
lookup: dict[tuple[str, int, str], dict[str, object]] = {}
|
|
150
|
+
for rec in rdr:
|
|
151
|
+
key = (rec.CHROM, rec.POS, rec.ID or ".")
|
|
152
|
+
info_dict: dict[str, object] = {}
|
|
153
|
+
for k in rec.INFO:
|
|
154
|
+
info_key = k[0] if isinstance(k, tuple) else k
|
|
155
|
+
v = rec.INFO.get(info_key)
|
|
156
|
+
if v is not None:
|
|
157
|
+
info_dict[info_key] = v
|
|
158
|
+
lookup[key] = {
|
|
159
|
+
"REF": rec.REF,
|
|
160
|
+
"ALT": rec.ALT[0] if rec.ALT else "<N>",
|
|
161
|
+
"QUAL": rec.QUAL if rec.QUAL is not None else ".",
|
|
162
|
+
"FILTER": rec.FILTER if rec.FILTER else "PASS",
|
|
163
|
+
"INFO": info_dict,
|
|
164
|
+
}
|
|
165
|
+
rdr.close()
|
|
166
|
+
return lookup, raw_header_lines, sample_name
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _write_headers(
|
|
170
|
+
out,
|
|
171
|
+
raw_header_lines: list[str],
|
|
172
|
+
sample_name: str,
|
|
173
|
+
*,
|
|
174
|
+
gqbin_in_header: bool,
|
|
175
|
+
) -> None:
|
|
176
|
+
"""Write preserved meta headers + ensure GT/GQ/GQBIN, then the column header."""
|
|
177
|
+
have_gt = any("##FORMAT=<ID=GT" in ln for ln in raw_header_lines)
|
|
178
|
+
have_gq = any("##FORMAT=<ID=GQ" in ln for ln in raw_header_lines)
|
|
179
|
+
have_gqbin = any("##INFO=<ID=GQBIN" in ln for ln in raw_header_lines)
|
|
180
|
+
|
|
181
|
+
for line in raw_header_lines:
|
|
182
|
+
if line.startswith("##"):
|
|
183
|
+
out.write(line.rstrip() + "\n")
|
|
184
|
+
|
|
185
|
+
if not have_gt:
|
|
186
|
+
out.write('##FORMAT=<ID=GT,Number=1,Type=String,Description="Phased genotype">\n')
|
|
187
|
+
if not have_gq:
|
|
188
|
+
out.write(
|
|
189
|
+
"##FORMAT=<ID=GQ,Number=1,Type=Integer," 'Description="Genotype Quality (Phred)">\n'
|
|
190
|
+
)
|
|
191
|
+
if gqbin_in_header and not have_gqbin:
|
|
192
|
+
out.write(
|
|
193
|
+
"##INFO=<ID=GQBIN,Number=1,Type=String," 'Description="GQ bin label from SvPhaser">\n'
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
out.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sample_name + "\n")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _compose_info_str(
|
|
200
|
+
orig_info: dict[str, object],
|
|
201
|
+
svtype: object,
|
|
202
|
+
gq_label: object,
|
|
203
|
+
) -> str:
|
|
204
|
+
"""Compose the INFO string with SVTYPE first, original keys (no duplicate), then GQBIN."""
|
|
205
|
+
items: list[str] = []
|
|
206
|
+
for k, v in orig_info.items():
|
|
207
|
+
if k == "SVTYPE":
|
|
208
|
+
continue
|
|
209
|
+
items.append(f"{k}={v}")
|
|
210
|
+
if svtype:
|
|
211
|
+
items.insert(0, f"SVTYPE={svtype}")
|
|
212
|
+
if gq_label is not None and pd.notnull(gq_label):
|
|
213
|
+
items.append(f"GQBIN={gq_label}")
|
|
214
|
+
return ";".join(items) if items else "."
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _write_phased_vcf(
|
|
218
|
+
out_vcf: Path,
|
|
219
|
+
in_vcf: Path,
|
|
220
|
+
df: pd.DataFrame,
|
|
221
|
+
*,
|
|
222
|
+
gqbin_in_header: bool,
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Write a phased VCF: tab-delimited, compliant, with ensured GT/GQ (and GQBIN if used)."""
|
|
225
|
+
lookup, raw_header_lines, sample_name = _vcf_info_lookup(in_vcf)
|
|
226
|
+
|
|
227
|
+
with open(out_vcf, "w", newline="") as out:
|
|
228
|
+
_write_headers(out, raw_header_lines, sample_name, gqbin_in_header=gqbin_in_header)
|
|
229
|
+
|
|
230
|
+
for row in df.itertuples(index=False):
|
|
231
|
+
chrom = str(getattr(row, "chrom", "."))
|
|
232
|
+
pos = int(getattr(row, "pos", 0))
|
|
233
|
+
vid = str(getattr(row, "id", "."))
|
|
234
|
+
gt = str(getattr(row, "gt", "./."))
|
|
235
|
+
gq = str(getattr(row, "gq", "0"))
|
|
236
|
+
svtype = getattr(row, "svtype", None)
|
|
237
|
+
gq_label = getattr(row, "gq_label", None)
|
|
238
|
+
|
|
239
|
+
info = lookup.get((chrom, pos, vid))
|
|
240
|
+
if info is None:
|
|
241
|
+
logger.warning("Could not find VCF info for %s:%s %s", chrom, pos, vid)
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
info_str = _compose_info_str(info["INFO"], svtype, gq_label)
|
|
245
|
+
|
|
246
|
+
fields = [
|
|
247
|
+
chrom,
|
|
248
|
+
str(pos),
|
|
249
|
+
vid,
|
|
250
|
+
str(info["REF"]),
|
|
251
|
+
str(info["ALT"]),
|
|
252
|
+
str(info["QUAL"]),
|
|
253
|
+
str(info["FILTER"]),
|
|
254
|
+
info_str,
|
|
255
|
+
"GT:GQ",
|
|
256
|
+
f"{gt}:{gq}",
|
|
257
|
+
]
|
|
258
|
+
out.write("\t".join(fields) + "\n")
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""svphaser.phasing.types
|
|
2
|
+
========================
|
|
3
|
+
Central place for common type aliases & lightweight data classes.
|
|
4
|
+
Keeping them here avoids circular imports and MyPy noise.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import NamedTuple
|
|
11
|
+
|
|
12
|
+
SVKey = tuple[str, int, str] # (chrom, POS, ID) – ID is "." if empty
|
|
13
|
+
GQBin = tuple[int, str] # (threshold, label), e.g. (30, "High")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(slots=True, frozen=True)
|
|
17
|
+
class WorkerOpts:
|
|
18
|
+
"""Non-changing knobs passed into every worker."""
|
|
19
|
+
|
|
20
|
+
min_support: int
|
|
21
|
+
major_delta: float
|
|
22
|
+
equal_delta: float
|
|
23
|
+
gq_bins: list[GQBin] # already parsed by cli → phase_vcf
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CallTuple(NamedTuple):
|
|
27
|
+
"""Return type per-variant from algorithms.classify_haplotype()."""
|
|
28
|
+
|
|
29
|
+
gt: str
|
|
30
|
+
gq: int
|
|
31
|
+
gq_label: str | None
|
|
File without changes
|