svphaser 2.0.6__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- svphaser-2.1.0/PKG-INFO +231 -0
- svphaser-2.1.0/README.md +184 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/pyproject.toml +15 -5
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/__init__.py +17 -12
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/_version.py +2 -2
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/cli.py +20 -38
- svphaser-2.1.0/src/svphaser/phasing/_workers.py +412 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/phasing/algorithms.py +22 -5
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/phasing/io.py +140 -31
- svphaser-2.1.0/src/svphaser/phasing/types.py +38 -0
- svphaser-2.0.6/PKG-INFO +0 -203
- svphaser-2.0.6/README.md +0 -156
- svphaser-2.0.6/src/svphaser/phasing/_workers.py +0 -106
- svphaser-2.0.6/src/svphaser/phasing/types.py +0 -31
- {svphaser-2.0.6 → svphaser-2.1.0}/.gitignore +0 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/LICENSE +0 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/__main__.py +0 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/logging.py +0 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/phasing/__init__.py +0 -0
- {svphaser-2.0.6 → svphaser-2.1.0}/src/svphaser/py.typed +0 -0
svphaser-2.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: svphaser
|
|
3
|
+
Version: 2.1.0
|
|
4
|
+
Summary: Structural-variant phasing from HP-tagged long-read BAMs
|
|
5
|
+
Project-URL: Homepage, https://github.com/SFGLab/SvPhaser
|
|
6
|
+
Project-URL: Issues, https://github.com/SFGLab/SvPhaser/issues
|
|
7
|
+
Project-URL: Source, https://github.com/SFGLab/SvPhaser
|
|
8
|
+
Author-email: SvPhaser Team <you@lab.org>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: BAM,ONT,VCF,genomics,long-reads,phasing,structural-variants
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: cyvcf2>=0.30
|
|
25
|
+
Requires-Dist: pandas>=2.1
|
|
26
|
+
Requires-Dist: pysam>=0.23
|
|
27
|
+
Requires-Dist: typer>=0.14
|
|
28
|
+
Provides-Extra: bench
|
|
29
|
+
Requires-Dist: py-spy>=0.3; extra == 'bench'
|
|
30
|
+
Requires-Dist: pytest-benchmark>=4.0; extra == 'bench'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: black>=24.3; extra == 'dev'
|
|
33
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
34
|
+
Requires-Dist: hypothesis>=6.90; extra == 'dev'
|
|
35
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
36
|
+
Requires-Dist: pandas-stubs>=2.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pre-commit>=3.6; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-cov>=5; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
41
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
42
|
+
Requires-Dist: tox>=4.10; extra == 'dev'
|
|
43
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
44
|
+
Provides-Extra: plots
|
|
45
|
+
Requires-Dist: matplotlib>=3.7; extra == 'plots'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
# SvPhaser
|
|
49
|
+
|
|
50
|
+
> **Haplotype-aware structural-variant (SV) genotyper for long-read data**
|
|
51
|
+
|
|
52
|
+
[](https://pypi.org/project/svphaser/)
|
|
53
|
+
[](https://pypi.org/project/svphaser/)
|
|
54
|
+
[](LICENSE)
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
**SvPhaser** phases **pre-called structural variants (SVs)** using **HP-tagged** long-read alignments (PacBio HiFi, ONT Q20+, …).
|
|
59
|
+
|
|
60
|
+
Think of it as *WhatsHap* for insertions/deletions/duplications:
|
|
61
|
+
- **we do not discover SVs**
|
|
62
|
+
- **we assign haplotype genotypes** (`0|1`, `1|0`, `1|1`, or `./.`)
|
|
63
|
+
- and compute a **Genotype Quality (GQ)** score
|
|
64
|
+
|
|
65
|
+
All in a single, embarrassingly-parallel pass over the genome.
|
|
66
|
+
|
|
67
|
+
## Highlights
|
|
68
|
+
|
|
69
|
+
- **Fast per-chromosome multiprocessing** (scale-out on multi-core CPUs).
|
|
70
|
+
- **Deterministic Δ-based decision logic** (no MCMC / HMM).
|
|
71
|
+
- **CLI + Python API**.
|
|
72
|
+
- **Non-destructive VCF augmentation**: injects phasing fields while preserving the original header and records.
|
|
73
|
+
- **Configurable confidence bins** + optional plots.
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
### From PyPI (recommended)
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Requires Python >= 3.9
|
|
81
|
+
pip install svphaser
|
|
82
|
+
````
|
|
83
|
+
|
|
84
|
+
Optional extras (if you use them):
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install "svphaser[plots]"
|
|
88
|
+
pip install "svphaser[bench]"
|
|
89
|
+
pip install "svphaser[dev]"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### From source
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
git clone https://github.com/SFGLab/SvPhaser.git
|
|
96
|
+
cd SvPhaser
|
|
97
|
+
pip install -e .
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Inputs & requirements
|
|
101
|
+
|
|
102
|
+
SvPhaser expects:
|
|
103
|
+
|
|
104
|
+
1. **Unphased SV VCF** (`.vcf` / `.vcf.gz`)
|
|
105
|
+
|
|
106
|
+
* SVs should already be called by your preferred SV caller.
|
|
107
|
+
|
|
108
|
+
2. **HP-tagged BAM** (long-read alignments)
|
|
109
|
+
|
|
110
|
+
* Reads must contain haplotype tags (e.g., `HP`) produced by an upstream phasing pipeline.
|
|
111
|
+
|
|
112
|
+
If your BAM is not HP-tagged, SvPhaser cannot assign haplotypes.
|
|
113
|
+
|
|
114
|
+
## Quick start (CLI)
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
svphaser phase \
|
|
118
|
+
sample_unphased.vcf.gz \
|
|
119
|
+
sample.sorted_phased.bam \
|
|
120
|
+
--out-dir results/ \
|
|
121
|
+
--min-support 10 \
|
|
122
|
+
--major-delta 0.70 \
|
|
123
|
+
--equal-delta 0.25 \
|
|
124
|
+
--gq-bins "30:High,10:Moderate" \
|
|
125
|
+
--threads 32
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Outputs
|
|
129
|
+
|
|
130
|
+
Inside `results/`:
|
|
131
|
+
|
|
132
|
+
* `*_phased.vcf` — your original VCF with additional INFO fields:
|
|
133
|
+
|
|
134
|
+
* `HP_GT` — phased genotype
|
|
135
|
+
* `HP_GQ` — genotype quality score
|
|
136
|
+
* `HP_GQBIN` — confidence bin label (based on your `--gq-bins`)
|
|
137
|
+
* `*_phased.csv` — tidy table for plotting / downstream analysis
|
|
138
|
+
|
|
139
|
+
For algorithmic details, see: **`docs/methodology.md`**.
|
|
140
|
+
|
|
141
|
+
## Python API
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from pathlib import Path
|
|
145
|
+
from svphaser.phasing.io import phase_vcf
|
|
146
|
+
|
|
147
|
+
phase_vcf(
|
|
148
|
+
Path("sample.vcf.gz"),
|
|
149
|
+
Path("sample.bam"),
|
|
150
|
+
out_dir=Path("results"),
|
|
151
|
+
min_support=10,
|
|
152
|
+
major_delta=0.70,
|
|
153
|
+
equal_delta=0.25,
|
|
154
|
+
gq_bins="30:High,10:Moderate",
|
|
155
|
+
threads=8,
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The phased table can also be loaded from the generated CSV for custom analytics.
|
|
160
|
+
|
|
161
|
+
## Repository structure (high level)
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
SvPhaser/
|
|
165
|
+
├─ src/svphaser/ # importable package
|
|
166
|
+
├─ tests/ # test suite + small fixtures (if present)
|
|
167
|
+
├─ docs/ # methodology + notes
|
|
168
|
+
├─ notebooks/ # experiments / analysis (if present)
|
|
169
|
+
├─ figures/ # plots & diagrams (if present)
|
|
170
|
+
├─ pyproject.toml
|
|
171
|
+
└─ CHANGELOG.md
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Development
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
git clone https://github.com/SFGLab/SvPhaser.git
|
|
178
|
+
cd SvPhaser
|
|
179
|
+
|
|
180
|
+
python -m venv .venv
|
|
181
|
+
source .venv/bin/activate
|
|
182
|
+
|
|
183
|
+
pip install -e ".[dev]"
|
|
184
|
+
pytest -q
|
|
185
|
+
mypy src/svphaser
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
See `CONTRIBUTING.md` for contribution guidelines.
|
|
189
|
+
|
|
190
|
+
## Citing SvPhaser
|
|
191
|
+
|
|
192
|
+
If SvPhaser contributed to your research, please cite:
|
|
193
|
+
|
|
194
|
+
```bibtex
|
|
195
|
+
@software{svphaser2025,
|
|
196
|
+
author = {Pranjul Mishra and Sachin Gadakh},
|
|
197
|
+
title = {SvPhaser: Haplotype-aware structural-variant genotyping from HP-tagged long-read BAMs},
|
|
198
|
+
version = {2.0.6},
|
|
199
|
+
year = {2025},
|
|
200
|
+
month = nov,
|
|
201
|
+
url = {https://github.com/SFGLab/SvPhaser},
|
|
202
|
+
note = {PyPI: https://pypi.org/project/svphaser/}
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
(If you need maximum rigor for a paper, cite a specific git commit hash too.)
|
|
207
|
+
|
|
208
|
+
## License
|
|
209
|
+
|
|
210
|
+
SvPhaser is released under the **MIT License** — see [LICENSE](LICENSE).
|
|
211
|
+
|
|
212
|
+
## Contact
|
|
213
|
+
|
|
214
|
+
Developed by **Team 5 (BioAI Hackathon)**.
|
|
215
|
+
|
|
216
|
+
* Pranjul Mishra — [pranjul.mishra@proton.me](mailto:pranjul.mishra@proton.me)
|
|
217
|
+
* Sachin Gadakh — [s.gadakh@cent.uw.edu.pl](mailto:s.gadakh@cent.uw.edu.pl)
|
|
218
|
+
|
|
219
|
+
Issues and feature requests: please open a GitHub issue.
|
|
220
|
+
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Two hard notes (don’t ignore)
|
|
224
|
+
- If you **don’t actually have CI set up**, don’t show a CI badge. A fake badge is worse than no badge.
|
|
225
|
+
- If your repo layout doesn’t include `notebooks/figures/tests fixtures`, either adjust that tree block or remove it to avoid “template smell.”
|
|
226
|
+
|
|
227
|
+
If you want, paste your **current `.github/workflows` filenames** (or tell me if you have none) and I’ll add the *correct* CI badge line too—without guessing.
|
|
228
|
+
::contentReference[oaicite:1]{index=1}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
[1]: https://pypi.org/project/svphaser/ "svphaser · PyPI"
|
svphaser-2.1.0/README.md
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# SvPhaser
|
|
2
|
+
|
|
3
|
+
> **Haplotype-aware structural-variant (SV) genotyper for long-read data**
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/svphaser/)
|
|
6
|
+
[](https://pypi.org/project/svphaser/)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
**SvPhaser** phases **pre-called structural variants (SVs)** using **HP-tagged** long-read alignments (PacBio HiFi, ONT Q20+, …).
|
|
12
|
+
|
|
13
|
+
Think of it as *WhatsHap* for insertions/deletions/duplications:
|
|
14
|
+
- **we do not discover SVs**
|
|
15
|
+
- **we assign haplotype genotypes** (`0|1`, `1|0`, `1|1`, or `./.`)
|
|
16
|
+
- and compute a **Genotype Quality (GQ)** score
|
|
17
|
+
|
|
18
|
+
All in a single, embarrassingly-parallel pass over the genome.
|
|
19
|
+
|
|
20
|
+
## Highlights
|
|
21
|
+
|
|
22
|
+
- **Fast per-chromosome multiprocessing** (scale-out on multi-core CPUs).
|
|
23
|
+
- **Deterministic Δ-based decision logic** (no MCMC / HMM).
|
|
24
|
+
- **CLI + Python API**.
|
|
25
|
+
- **Non-destructive VCF augmentation**: injects phasing fields while preserving the original header and records.
|
|
26
|
+
- **Configurable confidence bins** + optional plots.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
### From PyPI (recommended)
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Requires Python >= 3.9
|
|
34
|
+
pip install svphaser
|
|
35
|
+
````
|
|
36
|
+
|
|
37
|
+
Optional extras (if you use them):
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install "svphaser[plots]"
|
|
41
|
+
pip install "svphaser[bench]"
|
|
42
|
+
pip install "svphaser[dev]"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### From source
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
git clone https://github.com/SFGLab/SvPhaser.git
|
|
49
|
+
cd SvPhaser
|
|
50
|
+
pip install -e .
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Inputs & requirements
|
|
54
|
+
|
|
55
|
+
SvPhaser expects:
|
|
56
|
+
|
|
57
|
+
1. **Unphased SV VCF** (`.vcf` / `.vcf.gz`)
|
|
58
|
+
|
|
59
|
+
* SVs should already be called by your preferred SV caller.
|
|
60
|
+
|
|
61
|
+
2. **HP-tagged BAM** (long-read alignments)
|
|
62
|
+
|
|
63
|
+
* Reads must contain haplotype tags (e.g., `HP`) produced by an upstream phasing pipeline.
|
|
64
|
+
|
|
65
|
+
If your BAM is not HP-tagged, SvPhaser cannot assign haplotypes.
|
|
66
|
+
|
|
67
|
+
## Quick start (CLI)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
svphaser phase \
|
|
71
|
+
sample_unphased.vcf.gz \
|
|
72
|
+
sample.sorted_phased.bam \
|
|
73
|
+
--out-dir results/ \
|
|
74
|
+
--min-support 10 \
|
|
75
|
+
--major-delta 0.70 \
|
|
76
|
+
--equal-delta 0.25 \
|
|
77
|
+
--gq-bins "30:High,10:Moderate" \
|
|
78
|
+
--threads 32
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Outputs
|
|
82
|
+
|
|
83
|
+
Inside `results/`:
|
|
84
|
+
|
|
85
|
+
* `*_phased.vcf` — your original VCF with additional INFO fields:
|
|
86
|
+
|
|
87
|
+
* `HP_GT` — phased genotype
|
|
88
|
+
* `HP_GQ` — genotype quality score
|
|
89
|
+
* `HP_GQBIN` — confidence bin label (based on your `--gq-bins`)
|
|
90
|
+
* `*_phased.csv` — tidy table for plotting / downstream analysis
|
|
91
|
+
|
|
92
|
+
For algorithmic details, see: **`docs/methodology.md`**.
|
|
93
|
+
|
|
94
|
+
## Python API
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from pathlib import Path
|
|
98
|
+
from svphaser.phasing.io import phase_vcf
|
|
99
|
+
|
|
100
|
+
phase_vcf(
|
|
101
|
+
Path("sample.vcf.gz"),
|
|
102
|
+
Path("sample.bam"),
|
|
103
|
+
out_dir=Path("results"),
|
|
104
|
+
min_support=10,
|
|
105
|
+
major_delta=0.70,
|
|
106
|
+
equal_delta=0.25,
|
|
107
|
+
gq_bins="30:High,10:Moderate",
|
|
108
|
+
threads=8,
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The phased table can also be loaded from the generated CSV for custom analytics.
|
|
113
|
+
|
|
114
|
+
## Repository structure (high level)
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
SvPhaser/
|
|
118
|
+
├─ src/svphaser/ # importable package
|
|
119
|
+
├─ tests/ # test suite + small fixtures (if present)
|
|
120
|
+
├─ docs/ # methodology + notes
|
|
121
|
+
├─ notebooks/ # experiments / analysis (if present)
|
|
122
|
+
├─ figures/ # plots & diagrams (if present)
|
|
123
|
+
├─ pyproject.toml
|
|
124
|
+
└─ CHANGELOG.md
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Development
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
git clone https://github.com/SFGLab/SvPhaser.git
|
|
131
|
+
cd SvPhaser
|
|
132
|
+
|
|
133
|
+
python -m venv .venv
|
|
134
|
+
source .venv/bin/activate
|
|
135
|
+
|
|
136
|
+
pip install -e ".[dev]"
|
|
137
|
+
pytest -q
|
|
138
|
+
mypy src/svphaser
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
See `CONTRIBUTING.md` for contribution guidelines.
|
|
142
|
+
|
|
143
|
+
## Citing SvPhaser
|
|
144
|
+
|
|
145
|
+
If SvPhaser contributed to your research, please cite:
|
|
146
|
+
|
|
147
|
+
```bibtex
|
|
148
|
+
@software{svphaser2025,
|
|
149
|
+
author = {Pranjul Mishra and Sachin Gadakh},
|
|
150
|
+
title = {SvPhaser: Haplotype-aware structural-variant genotyping from HP-tagged long-read BAMs},
|
|
151
|
+
version = {2.0.6},
|
|
152
|
+
year = {2025},
|
|
153
|
+
month = nov,
|
|
154
|
+
url = {https://github.com/SFGLab/SvPhaser},
|
|
155
|
+
note = {PyPI: https://pypi.org/project/svphaser/}
|
|
156
|
+
}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
(If you need maximum rigor for a paper, cite a specific git commit hash too.)
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
SvPhaser is released under the **MIT License** — see [LICENSE](LICENSE).
|
|
164
|
+
|
|
165
|
+
## Contact
|
|
166
|
+
|
|
167
|
+
Developed by **Team 5 (BioAI Hackathon)**.
|
|
168
|
+
|
|
169
|
+
* Pranjul Mishra — [pranjul.mishra@proton.me](mailto:pranjul.mishra@proton.me)
|
|
170
|
+
* Sachin Gadakh — [s.gadakh@cent.uw.edu.pl](mailto:s.gadakh@cent.uw.edu.pl)
|
|
171
|
+
|
|
172
|
+
Issues and feature requests: please open a GitHub issue.
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Two hard notes (don’t ignore)
|
|
177
|
+
- If you **don’t actually have CI set up**, don’t show a CI badge. A fake badge is worse than no badge.
|
|
178
|
+
- If your repo layout doesn’t include `notebooks/figures/tests fixtures`, either adjust that tree block or remove it to avoid “template smell.”
|
|
179
|
+
|
|
180
|
+
If you want, paste your **current `.github/workflows` filenames** (or tell me if you have none) and I’ll add the *correct* CI badge line too—without guessing.
|
|
181
|
+
::contentReference[oaicite:1]{index=1}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
[1]: https://pypi.org/project/svphaser/ "svphaser · PyPI"
|
|
@@ -54,9 +54,9 @@ dev = [
|
|
|
54
54
|
]
|
|
55
55
|
|
|
56
56
|
[project.urls]
|
|
57
|
-
Homepage = "https://github.com/
|
|
58
|
-
Issues = "https://github.com/
|
|
59
|
-
Source = "https://github.com/
|
|
57
|
+
Homepage = "https://github.com/SFGLab/SvPhaser"
|
|
58
|
+
Issues = "https://github.com/SFGLab/SvPhaser/issues"
|
|
59
|
+
Source = "https://github.com/SFGLab/SvPhaser"
|
|
60
60
|
|
|
61
61
|
[project.scripts]
|
|
62
62
|
svphaser = "svphaser.cli:app"
|
|
@@ -71,13 +71,23 @@ include = ["src/svphaser/py.typed"]
|
|
|
71
71
|
[tool.hatch.build.targets.sdist]
|
|
72
72
|
include = ["src/**", "README.md", "LICENSE", "pyproject.toml"]
|
|
73
73
|
|
|
74
|
-
#
|
|
74
|
+
# -------------------------------------------------------------------
|
|
75
|
+
# Versioning: hatch-vcs (tags like v2.0.8 -> 2.0.8)
|
|
76
|
+
# -------------------------------------------------------------------
|
|
75
77
|
[tool.hatch.version]
|
|
76
78
|
source = "vcs"
|
|
79
|
+
tag-pattern = "v(?P<version>.+)"
|
|
77
80
|
|
|
81
|
+
# These are setuptools-scm options passed through hatch-vcs.
|
|
82
|
+
# local_scheme removes "+g<hash>" which public indexes must not accept.
|
|
83
|
+
# version_scheme avoids auto-bumping to the next patch on non-tag commits.
|
|
84
|
+
[tool.hatch.version.raw-options]
|
|
85
|
+
local_scheme = "no-local-version"
|
|
86
|
+
version_scheme = "no-guess-dev"
|
|
87
|
+
|
|
88
|
+
# Write resolved version to file at build time
|
|
78
89
|
[tool.hatch.build.hooks.vcs]
|
|
79
90
|
version-file = "src/svphaser/_version.py"
|
|
80
|
-
tag-pattern = "v(?P<version>.+)"
|
|
81
91
|
|
|
82
92
|
# -------------------------------------------------------------------
|
|
83
93
|
# Tooling (ruff / black / mypy / pytest / coverage)
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
"""Top-level SvPhaser package.
|
|
2
2
|
|
|
3
|
-
Public surface kept tiny:
|
|
4
|
-
|
|
3
|
+
Public surface kept tiny:
|
|
4
|
+
- __version__
|
|
5
|
+
- a convenience `phase()` wrapper around svphaser.phasing.io.phase_vcf()
|
|
6
|
+
|
|
7
|
+
Defaults are chosen to match the recommended SvPhaser settings for long-read SV phasing.
|
|
5
8
|
"""
|
|
6
9
|
|
|
7
10
|
from __future__ import annotations
|
|
@@ -11,7 +14,7 @@ from pathlib import Path
|
|
|
11
14
|
# --------------------------------------------------------------------
|
|
12
15
|
# Robust version lookup:
|
|
13
16
|
# - Prefer installed package metadata (works for wheels and PEP 660 editables)
|
|
14
|
-
# - Fall back to
|
|
17
|
+
# - Fall back to _version.py for raw-source/dev use
|
|
15
18
|
# --------------------------------------------------------------------
|
|
16
19
|
try:
|
|
17
20
|
from importlib.metadata import version as _pkg_version # Python 3.8+
|
|
@@ -19,14 +22,14 @@ try:
|
|
|
19
22
|
__version__ = _pkg_version("svphaser")
|
|
20
23
|
except Exception:
|
|
21
24
|
try:
|
|
22
|
-
from ._version import __version__ #
|
|
25
|
+
from ._version import __version__ # overwritten in builds when using setuptools-scm
|
|
23
26
|
except Exception: # highly defensive
|
|
24
27
|
__version__ = "0+unknown"
|
|
25
28
|
|
|
26
29
|
# Centralized defaults (keep CLI in sync)
|
|
27
30
|
DEFAULT_MIN_SUPPORT: int = 10
|
|
28
|
-
DEFAULT_MAJOR_DELTA: float = 0.
|
|
29
|
-
DEFAULT_EQUAL_DELTA: float = 0.
|
|
31
|
+
DEFAULT_MAJOR_DELTA: float = 0.60
|
|
32
|
+
DEFAULT_EQUAL_DELTA: float = 0.10
|
|
30
33
|
DEFAULT_GQ_BINS: str = "30:High,10:Moderate"
|
|
31
34
|
|
|
32
35
|
|
|
@@ -44,8 +47,10 @@ def phase(
|
|
|
44
47
|
) -> tuple[Path, Path]:
|
|
45
48
|
"""Phase *sv_vcf* using HP-tagged *bam*, writing outputs into *out_dir*.
|
|
46
49
|
|
|
47
|
-
|
|
48
|
-
|
|
50
|
+
Notes
|
|
51
|
+
-----
|
|
52
|
+
- Step B semantics: `min_support` is applied to TOTAL ALT-supporting reads (n1+n2).
|
|
53
|
+
- Near-ties (<= equal_delta) are treated as ambiguous (./.), not homozygous ALT.
|
|
49
54
|
|
|
50
55
|
Returns
|
|
51
56
|
-------
|
|
@@ -66,13 +71,13 @@ def phase(
|
|
|
66
71
|
out_csv = out_dir_p / f"{stem}_phased.csv"
|
|
67
72
|
|
|
68
73
|
phase_vcf(
|
|
69
|
-
sv_vcf,
|
|
70
|
-
bam,
|
|
71
|
-
out_dir=out_dir_p,
|
|
74
|
+
Path(sv_vcf),
|
|
75
|
+
Path(bam),
|
|
76
|
+
out_dir=out_dir_p,
|
|
72
77
|
min_support=min_support,
|
|
73
78
|
major_delta=major_delta,
|
|
74
79
|
equal_delta=equal_delta,
|
|
75
|
-
gq_bins=gq_bins,
|
|
80
|
+
gq_bins=gq_bins,
|
|
76
81
|
threads=threads,
|
|
77
82
|
)
|
|
78
83
|
return out_vcf, out_csv
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '2.0
|
|
32
|
-
__version_tuple__ = version_tuple = (2,
|
|
31
|
+
__version__ = version = '2.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (2, 1, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
svphaser.cli
|
|
2
|
+
"""svphaser.cli
|
|
4
3
|
============
|
|
5
4
|
Command-line interface for **SvPhaser**.
|
|
6
5
|
|
|
7
6
|
The program writes two files inside **--out-dir** (or the CWD):
|
|
8
7
|
|
|
9
|
-
* ``<stem>_phased.vcf`` (uncompressed; GT/GQ
|
|
10
|
-
* ``<stem>_phased.csv`` (tabular summary
|
|
8
|
+
* ``<stem>_phased.vcf`` (uncompressed; GT/GQ injected; optional INFO=GQBIN)
|
|
9
|
+
* ``<stem>_phased.csv`` (tabular summary incl. n1/n2/gt/gq and optional gq_label)
|
|
11
10
|
"""
|
|
11
|
+
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
14
|
from pathlib import Path
|
|
@@ -27,7 +27,7 @@ from svphaser import (
|
|
|
27
27
|
app = typer.Typer(add_completion=False, rich_markup_mode="rich")
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def _version_callback(value: bool):
|
|
30
|
+
def _version_callback(value: bool) -> None:
|
|
31
31
|
if value:
|
|
32
32
|
typer.echo(__version__)
|
|
33
33
|
raise typer.Exit()
|
|
@@ -44,30 +44,20 @@ def main(
|
|
|
44
44
|
callback=_version_callback,
|
|
45
45
|
),
|
|
46
46
|
] = None
|
|
47
|
-
):
|
|
47
|
+
) -> None:
|
|
48
48
|
"""SvPhaser – Structural-variant phasing from HP-tagged long-read BAMs."""
|
|
49
|
-
# no-op; callback handles --version
|
|
50
49
|
return
|
|
51
50
|
|
|
52
51
|
|
|
53
|
-
# ──────────────────────────────────────────────────────────────────────────
|
|
54
|
-
# phase command
|
|
55
|
-
# ──────────────────────────────────────────────────────────────────────────
|
|
56
52
|
@app.command("phase")
|
|
57
53
|
def phase_cmd(
|
|
58
54
|
sv_vcf: Annotated[
|
|
59
55
|
Path,
|
|
60
|
-
typer.Argument(
|
|
61
|
-
exists=True,
|
|
62
|
-
help="Input *un-phased* SV VCF (.vcf or .vcf.gz)",
|
|
63
|
-
),
|
|
56
|
+
typer.Argument(exists=True, help="Input *un-phased* SV VCF (.vcf or .vcf.gz)"),
|
|
64
57
|
],
|
|
65
58
|
bam: Annotated[
|
|
66
59
|
Path,
|
|
67
|
-
typer.Argument(
|
|
68
|
-
exists=True,
|
|
69
|
-
help="Long-read BAM/CRAM with HP tags",
|
|
70
|
-
),
|
|
60
|
+
typer.Argument(exists=True, help="Long-read BAM/CRAM with HP tags"),
|
|
71
61
|
],
|
|
72
62
|
out_dir: Annotated[
|
|
73
63
|
Path,
|
|
@@ -90,9 +80,8 @@ def phase_cmd(
|
|
|
90
80
|
int,
|
|
91
81
|
typer.Option(
|
|
92
82
|
help=(
|
|
93
|
-
"Minimum
|
|
94
|
-
"
|
|
95
|
-
"are dropped entirely."
|
|
83
|
+
"Minimum TOTAL ALT-supporting reads required to keep an SV (n1+n2). "
|
|
84
|
+
"If (n1+n2) < min_support the SV is dropped (written to *_dropped_svs.csv)."
|
|
96
85
|
),
|
|
97
86
|
show_default=True,
|
|
98
87
|
),
|
|
@@ -100,14 +89,14 @@ def phase_cmd(
|
|
|
100
89
|
major_delta: Annotated[
|
|
101
90
|
float,
|
|
102
91
|
typer.Option(
|
|
103
|
-
help="
|
|
92
|
+
help="max(n1,n2)/N >= this ⇒ strong majority ⇒ GT 1|0 or 0|1",
|
|
104
93
|
show_default=True,
|
|
105
94
|
),
|
|
106
95
|
] = DEFAULT_MAJOR_DELTA,
|
|
107
96
|
equal_delta: Annotated[
|
|
108
97
|
float,
|
|
109
98
|
typer.Option(
|
|
110
|
-
help="|n1−n2|/N
|
|
99
|
+
help="|n1−n2|/N <= this ⇒ near-tie ⇒ GT ./. (ambiguous)",
|
|
111
100
|
show_default=True,
|
|
112
101
|
),
|
|
113
102
|
] = DEFAULT_EQUAL_DELTA,
|
|
@@ -116,9 +105,8 @@ def phase_cmd(
|
|
|
116
105
|
str,
|
|
117
106
|
typer.Option(
|
|
118
107
|
help=(
|
|
119
|
-
"Comma-separated GQ≥threshold:Label definitions "
|
|
120
|
-
"
|
|
121
|
-
"[gq_label] and in the VCF INFO field HP_GQBIN when set."
|
|
108
|
+
"Comma-separated GQ≥threshold:Label definitions (e.g. '30:High,10:Moderate'). "
|
|
109
|
+
"Labels appear in CSV column [gq_label] and in the VCF INFO field GQBIN."
|
|
122
110
|
),
|
|
123
111
|
show_default=True,
|
|
124
112
|
),
|
|
@@ -134,13 +122,11 @@ def phase_cmd(
|
|
|
134
122
|
),
|
|
135
123
|
] = None,
|
|
136
124
|
) -> None:
|
|
137
|
-
"""Phase structural variants using
|
|
138
|
-
# Initialise logging BEFORE we import anything that might log
|
|
125
|
+
"""Phase structural variants using SV-type-aware ALT-support evidence."""
|
|
139
126
|
from svphaser.logging import init as _init_logging
|
|
140
127
|
|
|
141
|
-
_init_logging("INFO")
|
|
128
|
+
_init_logging("INFO")
|
|
142
129
|
|
|
143
|
-
# Resolve output paths
|
|
144
130
|
if not out_dir.exists():
|
|
145
131
|
out_dir.mkdir(parents=True)
|
|
146
132
|
|
|
@@ -153,25 +139,21 @@ def phase_cmd(
|
|
|
153
139
|
out_vcf = out_dir / f"{stem}_phased.vcf"
|
|
154
140
|
out_csv = out_dir / f"{stem}_phased.csv"
|
|
155
141
|
|
|
156
|
-
# Lazy import so `svphaser --help` works without heavy deps
|
|
157
142
|
from svphaser.phasing.io import phase_vcf
|
|
158
143
|
|
|
159
144
|
try:
|
|
160
145
|
phase_vcf(
|
|
161
146
|
sv_vcf,
|
|
162
147
|
bam,
|
|
163
|
-
out_dir=out_dir,
|
|
148
|
+
out_dir=out_dir,
|
|
164
149
|
min_support=min_support,
|
|
165
150
|
major_delta=major_delta,
|
|
166
151
|
equal_delta=equal_delta,
|
|
167
|
-
gq_bins=gq_bins,
|
|
152
|
+
gq_bins=gq_bins,
|
|
168
153
|
threads=threads,
|
|
169
154
|
)
|
|
170
155
|
typer.secho(f"✔ Phased VCF → {out_vcf}", fg=typer.colors.GREEN)
|
|
171
156
|
typer.secho(f"✔ Phased CSV → {out_csv}", fg=typer.colors.GREEN)
|
|
172
|
-
except Exception:
|
|
173
|
-
typer.secho(
|
|
174
|
-
"[SvPhaser] 💥 Unhandled error during phasing",
|
|
175
|
-
fg=typer.colors.RED,
|
|
176
|
-
)
|
|
157
|
+
except Exception:
|
|
158
|
+
typer.secho("[SvPhaser] 💥 Unhandled error during phasing", fg=typer.colors.RED)
|
|
177
159
|
raise
|