svphaser 2.0.4__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,12 @@
3
3
  1) Exact binomial tail for small depth (N ≤ 200).
4
4
  2) Continuity-corrected normal approximation for deep coverage (N > 200).
5
5
  3) Phred GQ capped at 99.
6
+
7
+ Step B semantics:
8
+ - `min_support` is interpreted as a *total ALT-support* threshold (n1+n2).
9
+ - Near-ties are treated as *ambiguous* (./.), not homozygous ALT.
10
+ Homozygous ALT (1|1) should come from the caller's genotype (input VCF),
11
+ not from a balance test.
6
12
  """
7
13
 
8
14
  from __future__ import annotations
@@ -45,27 +51,39 @@ def classify_haplotype(
45
51
  n2: int,
46
52
  *,
47
53
  min_support: int = 10,
48
- major_delta: float = 0.70,
49
- equal_delta: float = 0.25,
54
+ major_delta: float = 0.60,
55
+ equal_delta: float = 0.10,
50
56
  ) -> tuple[str, int]:
51
- """Return (GT, GQ) using ratio thresholds and an overflow-safe GQ."""
52
- total = n1 + n2
57
+ """Classify which haplotype carries the ALT allele.
58
+
59
+ Returns:
60
+ - GT: "1|0" (ALT on hap1) or "0|1" (ALT on hap2) or "./." (ambiguous)
61
+ - GQ: phred-scaled confidence based on haplotype imbalance
53
62
 
54
- if n1 < min_support and n2 < min_support:
63
+ Notes:
64
+ - `min_support` is applied to total ALT-support reads.
65
+ - Near-ties (<= equal_delta) are treated as ambiguous (./.).
66
+ """
67
+
68
+ total = n1 + n2
69
+ if total <= 0:
55
70
  return "./.", 0
56
- if total == 0:
71
+ if total < min_support:
57
72
  return "./.", 0
58
73
 
59
74
  gq = phasing_gq(n1, n2)
75
+
76
+ # 1) near-tie → ambiguous
77
+ if abs(n1 - n2) / total <= equal_delta:
78
+ return "./.", gq
79
+
80
+ # 2) strong majority → heterozygous phased
60
81
  r1 = n1 / total
61
82
  r2 = n2 / total
62
-
63
83
  if r1 >= major_delta:
64
- gt = "1|0"
65
- elif r2 >= major_delta:
66
- gt = "0|1"
67
- elif abs(n1 - n2) / total <= equal_delta:
68
- gt = "1|1"
69
- else:
70
- gt = "./."
71
- return gt, gq
84
+ return "1|0", gq
85
+ if r2 >= major_delta:
86
+ return "0|1", gq
87
+
88
+ # 3) otherwise ambiguous
89
+ return "./.", gq
svphaser/phasing/io.py CHANGED
@@ -1,30 +1,56 @@
1
- """
2
- svphaser.phasing.io
1
+ """svphaser.phasing.io
3
2
  ===================
4
3
  High-level “engine” – orchestrates per-chromosome workers, merges results,
5
- applies the global depth filter, then writes CSV + VCF.
4
+ applies the global support filter, then writes CSV + VCF.
5
+
6
+ Step B update (biological correctness):
7
+ - `min_support` is interpreted as a *total ALT-support* threshold (n1+n2).
8
+ The filter drops an SV only if (n1+n2) < min_support.
6
9
 
7
- Workers receive only simple (pickle-safe) arguments; each worker opens its
8
- own BAM/VCF to avoid sharing handles between processes.
10
+ Step A fixes retained:
11
+ - collision-resistant VCF record matching using (CHROM, POS, ID, END, ALT)
12
+ - correct INFO composition (no duplicated keys; proper FLAG handling)
13
+ - typing fixes to satisfy Pylance/Mypy
9
14
  """
10
15
 
11
16
  from __future__ import annotations
12
17
 
13
18
  import logging
19
+ import math
14
20
  import multiprocessing as mp
15
21
  from pathlib import Path
22
+ from typing import Any, TypedDict
16
23
 
17
24
  import pandas as pd
18
25
  from cyvcf2 import Reader
19
26
 
20
27
  from ._workers import _phase_chrom_worker
21
- from .types import GQBin, WorkerOpts
28
+ from .types import GQBin, SVKey, SVKeyLegacy, WorkerOpts
22
29
 
23
30
  __all__ = ["phase_vcf"]
24
31
 
25
32
  logger = logging.getLogger(__name__)
26
33
 
27
34
 
35
+ class VcfRec(TypedDict):
36
+ REF: str
37
+ ALT: str
38
+ QUAL: object
39
+ FILTER: str
40
+ INFO: dict[str, Any]
41
+
42
+
43
+ def _is_missing_scalar(x: Any) -> bool:
44
+ """True for None / NaN / empty string."""
45
+ if x is None:
46
+ return True
47
+ if isinstance(x, float) and math.isnan(x):
48
+ return True
49
+ if isinstance(x, str) and x.strip() == "":
50
+ return True
51
+ return False
52
+
53
+
28
54
  def phase_vcf(
29
55
  sv_vcf: Path,
30
56
  bam: Path,
@@ -56,7 +82,7 @@ def phase_vcf(
56
82
  thr_s, lbl = thr_lbl.split(":")
57
83
  except ValueError as err:
58
84
  raise ValueError(
59
- f"Invalid gq-bin specifier: '{thr_lbl}'. " "Use '30:High,10:Moderate'."
85
+ f"Invalid gq-bin specifier: '{thr_lbl}'. Use '30:High,10:Moderate'."
60
86
  ) from err
61
87
  bins.append((int(thr_s), lbl))
62
88
  bins.sort(key=lambda x: x[0], reverse=True)
@@ -89,7 +115,6 @@ def phase_vcf(
89
115
  ctx = mp.get_context("spawn")
90
116
 
91
117
  if threads == 1:
92
- # Serial path is handy for debugging
93
118
  for args in worker_args:
94
119
  df = _phase_chrom_worker(*args)
95
120
  dataframes.append(df)
@@ -102,16 +127,29 @@ def phase_vcf(
102
127
  chrom = df.iloc[0]["chrom"] if not df.empty else "?"
103
128
  logger.info("chr %-6s ✔ phased %5d SVs", chrom, len(df))
104
129
 
105
- # 5 ─ Merge & apply *global* depth filter
130
+ # 5 ─ Merge & apply *global* support filter (Step B: total ALT-support)
106
131
  if dataframes:
107
132
  merged = pd.concat(dataframes, ignore_index=True)
108
133
  else:
109
134
  merged = pd.DataFrame(
110
- columns=["chrom", "pos", "id", "svtype", "n1", "n2", "gt", "gq", "gq_label"]
135
+ columns=[
136
+ "chrom",
137
+ "pos",
138
+ "end",
139
+ "id",
140
+ "alt",
141
+ "svtype",
142
+ "n1",
143
+ "n2",
144
+ "gt",
145
+ "gq",
146
+ "gq_label",
147
+ ]
111
148
  )
112
149
 
113
150
  pre = len(merged)
114
- keep = ~((merged["n1"] < min_support) & (merged["n2"] < min_support))
151
+ total_support = merged["n1"].astype(int) + merged["n2"].astype(int)
152
+ keep = total_support >= int(min_support)
115
153
 
116
154
  stem = sv_vcf.name.removesuffix(".vcf.gz").removesuffix(".vcf")
117
155
 
@@ -122,7 +160,7 @@ def phase_vcf(
122
160
 
123
161
  kept = merged.loc[keep].reset_index(drop=True)
124
162
  if dropped := pre - len(kept):
125
- logger.info("Depth filter removed %d SVs", dropped)
163
+ logger.info("Support filter removed %d SVs", dropped)
126
164
 
127
165
  # 6 ─ Write CSV
128
166
  out_csv = out_dir / f"{stem}_phased.csv"
@@ -138,32 +176,54 @@ def phase_vcf(
138
176
  # ──────────────────────────────────────────────────────────────────────
139
177
  # Small helpers to keep complexity down
140
178
  # ──────────────────────────────────────────────────────────────────────
179
+
180
+
141
181
  def _vcf_info_lookup(
142
182
  in_vcf: Path,
143
- ) -> tuple[dict[tuple[str, int, str], dict[str, object]], list[str], str]:
144
- """Scan input VCF once: return (lookup, raw_header_lines, sample_name)."""
183
+ ) -> tuple[dict[SVKey, VcfRec], dict[SVKeyLegacy, list[SVKey]], list[str], str]:
184
+ """Scan input VCF once.
185
+
186
+ Returns:
187
+ - full_lookup: maps (CHROM, POS, ID, END, ALT) -> record components
188
+ - legacy_index: maps (CHROM, POS, ID) -> list of full keys (fallback)
189
+ - raw_header_lines
190
+ - sample_name
191
+ """
145
192
  rdr = Reader(str(in_vcf))
146
193
  raw_header_lines = rdr.raw_header.strip().splitlines()
147
194
  sample_name = rdr.samples[0] if rdr.samples else "SAMPLE"
148
195
 
149
- lookup: dict[tuple[str, int, str], dict[str, object]] = {}
196
+ full_lookup: dict[SVKey, VcfRec] = {}
197
+ legacy_index: dict[SVKeyLegacy, list[SVKey]] = {}
198
+
150
199
  for rec in rdr:
151
- key = (rec.CHROM, rec.POS, rec.ID or ".")
152
- info_dict: dict[str, object] = {}
200
+ chrom = rec.CHROM
201
+ pos = int(rec.POS)
202
+ vid = rec.ID or "."
203
+ end = int(rec.end) if getattr(rec, "end", None) is not None else int(pos)
204
+ alt = ",".join(rec.ALT) if rec.ALT else "<N>"
205
+
206
+ info_dict: dict[str, Any] = {}
153
207
  for k in rec.INFO:
154
208
  info_key = k[0] if isinstance(k, tuple) else k
155
209
  v = rec.INFO.get(info_key)
156
210
  if v is not None:
157
211
  info_dict[info_key] = v
158
- lookup[key] = {
212
+
213
+ fkey: SVKey = (chrom, pos, vid, end, alt)
214
+ lkey: SVKeyLegacy = (chrom, pos, vid)
215
+
216
+ full_lookup[fkey] = {
159
217
  "REF": rec.REF,
160
- "ALT": ",".join(rec.ALT) if rec.ALT else "<N>",
218
+ "ALT": alt,
161
219
  "QUAL": rec.QUAL if rec.QUAL is not None else ".",
162
220
  "FILTER": rec.FILTER if rec.FILTER else "PASS",
163
221
  "INFO": info_dict,
164
222
  }
223
+ legacy_index.setdefault(lkey, []).append(fkey)
224
+
165
225
  rdr.close()
166
- return lookup, raw_header_lines, sample_name
226
+ return full_lookup, legacy_index, raw_header_lines, sample_name
167
227
 
168
228
 
169
229
  def _write_headers(
@@ -196,25 +256,62 @@ def _write_headers(
196
256
  out.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sample_name + "\n")
197
257
 
198
258
 
199
- def _compose_info_str(orig_info: dict[str, object], svtype: object, gq_label: object) -> str:
200
- """Compose the INFO string with SVTYPE first, original keys (no duplicate), then GQBIN."""
259
+ def _compose_info_str(orig_info: dict[str, Any], svtype: Any, gq_label: Any) -> str:
260
+ """Compose INFO with SVTYPE first, proper FLAG handling, then GQBIN."""
201
261
  items: list[str] = []
262
+
263
+ if svtype:
264
+ items.append(f"SVTYPE={svtype}")
265
+
202
266
  for k, v in orig_info.items():
203
267
  if k == "SVTYPE":
204
268
  continue
205
- items.append(f"{k}={v}")
206
- # treat boolean True as a FLAG (bare key). Keep everything else as k=v.
269
+ if v is None:
270
+ continue
271
+ # cyvcf2 represents INFO flags as boolean True
207
272
  if v is True:
208
- items.append(k)
273
+ items.append(str(k))
209
274
  else:
210
275
  items.append(f"{k}={v}")
211
- if svtype:
212
- items.insert(0, f"SVTYPE={svtype}")
213
- if gq_label is not None and pd.notnull(gq_label):
276
+
277
+ if not _is_missing_scalar(gq_label):
214
278
  items.append(f"GQBIN={gq_label}")
279
+
215
280
  return ";".join(items) if items else "."
216
281
 
217
282
 
283
+ def _select_info_record(
284
+ full_lookup: dict[SVKey, VcfRec],
285
+ legacy_index: dict[SVKeyLegacy, list[SVKey]],
286
+ *,
287
+ chrom: str,
288
+ pos: int,
289
+ vid: str,
290
+ end: int | None,
291
+ alt: str | None,
292
+ ) -> VcfRec | None:
293
+ """Pick the best matching input VCF record for this phased row."""
294
+ if end is not None and alt is not None:
295
+ hit = full_lookup.get((chrom, pos, vid, int(end), str(alt)))
296
+ if hit is not None:
297
+ return hit
298
+
299
+ candidates = legacy_index.get((chrom, pos, vid), [])
300
+ if not candidates:
301
+ return None
302
+
303
+ if len(candidates) == 1:
304
+ return full_lookup[candidates[0]]
305
+
306
+ if end is not None:
307
+ end_matches = [k for k in candidates if k[3] == int(end)]
308
+ if len(end_matches) == 1:
309
+ return full_lookup[end_matches[0]]
310
+
311
+ # Still ambiguous: refuse to guess
312
+ return None
313
+
314
+
218
315
  def _write_phased_vcf(
219
316
  out_vcf: Path,
220
317
  in_vcf: Path,
@@ -223,7 +320,7 @@ def _write_phased_vcf(
223
320
  gqbin_in_header: bool,
224
321
  ) -> None:
225
322
  """Write a phased VCF: tab-delimited, compliant, with ensured GT/GQ (and GQBIN if used)."""
226
- lookup, raw_header_lines, sample_name = _vcf_info_lookup(in_vcf)
323
+ full_lookup, legacy_index, raw_header_lines, sample_name = _vcf_info_lookup(in_vcf)
227
324
 
228
325
  with open(out_vcf, "w", newline="") as out:
229
326
  _write_headers(out, raw_header_lines, sample_name, gqbin_in_header=gqbin_in_header)
@@ -232,14 +329,26 @@ def _write_phased_vcf(
232
329
  chrom = str(getattr(row, "chrom", "."))
233
330
  pos = int(getattr(row, "pos", 0))
234
331
  vid = str(getattr(row, "id", "."))
332
+
333
+ end = getattr(row, "end", None)
334
+ alt = getattr(row, "alt", None)
335
+
235
336
  gt = str(getattr(row, "gt", "./."))
236
337
  gq = str(getattr(row, "gq", "0"))
237
338
  svtype = getattr(row, "svtype", None)
238
339
  gq_label = getattr(row, "gq_label", None)
239
340
 
240
- info = lookup.get((chrom, pos, vid))
341
+ info = _select_info_record(
342
+ full_lookup,
343
+ legacy_index,
344
+ chrom=chrom,
345
+ pos=pos,
346
+ vid=vid,
347
+ end=int(end) if end is not None else None,
348
+ alt=str(alt) if alt is not None else None,
349
+ )
241
350
  if info is None:
242
- logger.warning("Could not find VCF info for %s:%s %s", chrom, pos, vid)
351
+ logger.warning("Could not uniquely match VCF record for %s:%s %s", chrom, pos, vid)
243
352
  continue
244
353
 
245
354
  info_str = _compose_info_str(info["INFO"], svtype, gq_label)
svphaser/phasing/types.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """svphaser.phasing.types
2
2
  ========================
3
- Central place for common type aliases & lightweight data classes.
4
- Keeping them here avoids circular imports and MyPy noise.
3
+ Common type aliases & small data structures.
4
+
5
+ We keep this module light to avoid circular imports.
5
6
  """
6
7
 
7
8
  from __future__ import annotations
@@ -9,8 +10,14 @@ from __future__ import annotations
9
10
  from dataclasses import dataclass
10
11
  from typing import NamedTuple
11
12
 
12
- SVKey = tuple[str, int, str] # (chrom, POS, ID) ID is "." if empty
13
- GQBin = tuple[int, str] # (threshold, label), e.g. (30, "High")
13
+ # Legacy key (older writer used this; can collide when ID='.' or same POS repeats)
14
+ SVKeyLegacy = tuple[str, int, str] # (CHROM, POS, ID)
15
+
16
+ # Collision-resistant key for matching phased rows back to original VCF records
17
+ SVKey = tuple[str, int, str, int, str] # (CHROM, POS, ID, END, ALT)
18
+
19
+ # GQ bin spec: (threshold, label)
20
+ GQBin = tuple[int, str] # e.g. (30, "High")
14
21
 
15
22
 
16
23
  @dataclass(slots=True, frozen=True)
@@ -20,7 +27,7 @@ class WorkerOpts:
20
27
  min_support: int
21
28
  major_delta: float
22
29
  equal_delta: float
23
- gq_bins: list[GQBin] # already parsed by cli → phase_vcf
30
+ gq_bins: list[GQBin]
24
31
 
25
32
 
26
33
  class CallTuple(NamedTuple):
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: svphaser
3
+ Version: 2.1.0
4
+ Summary: Structural-variant phasing from HP-tagged long-read BAMs
5
+ Project-URL: Homepage, https://github.com/SFGLab/SvPhaser
6
+ Project-URL: Issues, https://github.com/SFGLab/SvPhaser/issues
7
+ Project-URL: Source, https://github.com/SFGLab/SvPhaser
8
+ Author-email: SvPhaser Team <you@lab.org>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: BAM,ONT,VCF,genomics,long-reads,phasing,structural-variants
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: cyvcf2>=0.30
25
+ Requires-Dist: pandas>=2.1
26
+ Requires-Dist: pysam>=0.23
27
+ Requires-Dist: typer>=0.14
28
+ Provides-Extra: bench
29
+ Requires-Dist: py-spy>=0.3; extra == 'bench'
30
+ Requires-Dist: pytest-benchmark>=4.0; extra == 'bench'
31
+ Provides-Extra: dev
32
+ Requires-Dist: black>=24.3; extra == 'dev'
33
+ Requires-Dist: build>=1.2; extra == 'dev'
34
+ Requires-Dist: hypothesis>=6.90; extra == 'dev'
35
+ Requires-Dist: mypy>=1.8; extra == 'dev'
36
+ Requires-Dist: pandas-stubs>=2.0; extra == 'dev'
37
+ Requires-Dist: pre-commit>=3.6; extra == 'dev'
38
+ Requires-Dist: pytest-cov>=5; extra == 'dev'
39
+ Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
40
+ Requires-Dist: pytest>=8; extra == 'dev'
41
+ Requires-Dist: ruff>=0.5; extra == 'dev'
42
+ Requires-Dist: tox>=4.10; extra == 'dev'
43
+ Requires-Dist: twine>=5.0; extra == 'dev'
44
+ Provides-Extra: plots
45
+ Requires-Dist: matplotlib>=3.7; extra == 'plots'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # SvPhaser
49
+
50
+ > **Haplotype-aware structural-variant (SV) genotyper for long-read data**
51
+
52
+ [![PyPI version](https://img.shields.io/pypi/v/svphaser.svg?logo=pypi)](https://pypi.org/project/svphaser/)
53
+ [![Python](https://img.shields.io/pypi/pyversions/svphaser.svg)](https://pypi.org/project/svphaser/)
54
+ [![License](https://img.shields.io/github/license/SFGLab/SvPhaser.svg)](LICENSE)
55
+
56
+ ---
57
+
58
+ **SvPhaser** phases **pre-called structural variants (SVs)** using **HP-tagged** long-read alignments (PacBio HiFi, ONT Q20+, …).
59
+
60
+ Think of it as *WhatsHap* for insertions/deletions/duplications:
61
+ - **we do not discover SVs**
62
+ - **we assign haplotype genotypes** (`0|1`, `1|0`, `1|1`, or `./.`)
63
+ - and compute a **Genotype Quality (GQ)** score
64
+
65
+ All in a single, embarrassingly-parallel pass over the genome.
66
+
67
+ ## Highlights
68
+
69
+ - **Fast per-chromosome multiprocessing** (scale-out on multi-core CPUs).
70
+ - **Deterministic Δ-based decision logic** (no MCMC / HMM).
71
+ - **CLI + Python API**.
72
+ - **Non-destructive VCF augmentation**: injects phasing fields while preserving the original header and records.
73
+ - **Configurable confidence bins** + optional plots.
74
+
75
+ ## Installation
76
+
77
+ ### From PyPI (recommended)
78
+
79
+ ```bash
80
+ # Requires Python >= 3.9
81
+ pip install svphaser
82
+ ````
83
+
84
+ Optional extras (if you use them):
85
+
86
+ ```bash
87
+ pip install "svphaser[plots]"
88
+ pip install "svphaser[bench]"
89
+ pip install "svphaser[dev]"
90
+ ```
91
+
92
+ ### From source
93
+
94
+ ```bash
95
+ git clone https://github.com/SFGLab/SvPhaser.git
96
+ cd SvPhaser
97
+ pip install -e .
98
+ ```
99
+
100
+ ## Inputs & requirements
101
+
102
+ SvPhaser expects:
103
+
104
+ 1. **Unphased SV VCF** (`.vcf` / `.vcf.gz`)
105
+
106
+ * SVs should already be called by your preferred SV caller.
107
+
108
+ 2. **HP-tagged BAM** (long-read alignments)
109
+
110
+ * Reads must contain haplotype tags (e.g., `HP`) produced by an upstream phasing pipeline.
111
+
112
+ If your BAM is not HP-tagged, SvPhaser cannot assign haplotypes.
113
+
114
+ ## Quick start (CLI)
115
+
116
+ ```bash
117
+ svphaser phase \
118
+ sample_unphased.vcf.gz \
119
+ sample.sorted_phased.bam \
120
+ --out-dir results/ \
121
+ --min-support 10 \
122
+ --major-delta 0.70 \
123
+ --equal-delta 0.25 \
124
+ --gq-bins "30:High,10:Moderate" \
125
+ --threads 32
126
+ ```
127
+
128
+ ### Outputs
129
+
130
+ Inside `results/`:
131
+
132
+ * `*_phased.vcf` — your original VCF with additional INFO fields:
133
+
134
+ * `HP_GT` — phased genotype
135
+ * `HP_GQ` — genotype quality score
136
+ * `HP_GQBIN` — confidence bin label (based on your `--gq-bins`)
137
+ * `*_phased.csv` — tidy table for plotting / downstream analysis
138
+
139
+ For algorithmic details, see: **`docs/methodology.md`**.
140
+
141
+ ## Python API
142
+
143
+ ```python
144
+ from pathlib import Path
145
+ from svphaser.phasing.io import phase_vcf
146
+
147
+ phase_vcf(
148
+ Path("sample.vcf.gz"),
149
+ Path("sample.bam"),
150
+ out_dir=Path("results"),
151
+ min_support=10,
152
+ major_delta=0.70,
153
+ equal_delta=0.25,
154
+ gq_bins="30:High,10:Moderate",
155
+ threads=8,
156
+ )
157
+ ```
158
+
159
+ The phased table can also be loaded from the generated CSV for custom analytics.
160
+
161
+ ## Repository structure (high level)
162
+
163
+ ```
164
+ SvPhaser/
165
+ ├─ src/svphaser/ # importable package
166
+ ├─ tests/ # test suite + small fixtures (if present)
167
+ ├─ docs/ # methodology + notes
168
+ ├─ notebooks/ # experiments / analysis (if present)
169
+ ├─ figures/ # plots & diagrams (if present)
170
+ ├─ pyproject.toml
171
+ └─ CHANGELOG.md
172
+ ```
173
+
174
+ ## Development
175
+
176
+ ```bash
177
+ git clone https://github.com/SFGLab/SvPhaser.git
178
+ cd SvPhaser
179
+
180
+ python -m venv .venv
181
+ source .venv/bin/activate
182
+
183
+ pip install -e ".[dev]"
184
+ pytest -q
185
+ mypy src/svphaser
186
+ ```
187
+
188
+ See `CONTRIBUTING.md` for contribution guidelines.
189
+
190
+ ## Citing SvPhaser
191
+
192
+ If SvPhaser contributed to your research, please cite:
193
+
194
+ ```bibtex
195
+ @software{svphaser2025,
196
+ author = {Pranjul Mishra and Sachin Gadakh},
197
+ title = {SvPhaser: Haplotype-aware structural-variant genotyping from HP-tagged long-read BAMs},
198
+ version = {2.0.6},
199
+ year = {2025},
200
+ month = nov,
201
+ url = {https://github.com/SFGLab/SvPhaser},
202
+ note = {PyPI: https://pypi.org/project/svphaser/}
203
+ }
204
+ ```
205
+
206
+ (If you need maximum rigor for a paper, cite a specific git commit hash too.)
207
+
208
+ ## License
209
+
210
+ SvPhaser is released under the **MIT License** — see [LICENSE](LICENSE).
211
+
212
+ ## Contact
213
+
214
+ Developed by **Team 5 (BioAI Hackathon)**.
215
+
216
+ * Pranjul Mishra — [pranjul.mishra@proton.me](mailto:pranjul.mishra@proton.me)
217
+ * Sachin Gadakh — [s.gadakh@cent.uw.edu.pl](mailto:s.gadakh@cent.uw.edu.pl)
218
+
219
+ Issues and feature requests: please open a GitHub issue.
220
+
221
+ ```
222
+
223
+ ### Two hard notes (don’t ignore)
224
+ - If you **don’t actually have CI set up**, don’t show a CI badge. A fake badge is worse than no badge.
225
+ - If your repo layout doesn’t include `notebooks/figures/tests fixtures`, either adjust that tree block or remove it to avoid “template smell.”
226
+
227
+ If you want, paste your **current `.github/workflows` filenames** (or tell me if you have none) and I’ll add the *correct* CI badge line too—without guessing.
228
+ ::contentReference[oaicite:1]{index=1}
229
+ ```
230
+
231
+ [1]: https://pypi.org/project/svphaser/ "svphaser · PyPI"
@@ -0,0 +1,16 @@
1
+ svphaser/__init__.py,sha256=h4eLAi05OsAqs9Evii526TVr2CUo3dW-iHO-RjXtv_E,2599
2
+ svphaser/__main__.py,sha256=lWe9boXc4JIpJEknv9dXqSsjS1Glk_FnYV9eBzUEzA0,86
3
+ svphaser/_version.py,sha256=6G8yJbldQAMT0M9ZiFAqGo5OVqUMxGB1aeWeKmrwNIE,704
4
+ svphaser/cli.py,sha256=w8LwEYBWwjJWuLA1AE-XuyMVCzgjS2DjBwCBJHjq0EA,4509
5
+ svphaser/logging.py,sha256=yw7Z8az-sZL-x4qhvPmu7aGVLbhSOYiSdrNqCO8bVtw,841
6
+ svphaser/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ svphaser/phasing/__init__.py,sha256=RcXg2EGc7Dyuq8wDP-bJmtVNKs8f3YqByjTel0mMujM,464
8
+ svphaser/phasing/_workers.py,sha256=8RGBuT5ZMxGpnFu18KqJKJmic7gFRAAGI_HtcClle84,12375
9
+ svphaser/phasing/algorithms.py,sha256=Z7ARpvIXXRtu87qxnumNQBy9haWkO_GGJhTPKYG1TDg,2374
10
+ svphaser/phasing/io.py,sha256=wEV8ZsNpCBzwq-bLM0J1alVTARtYWGH7Qhe5PVXkohg,11676
11
+ svphaser/phasing/types.py,sha256=fux_thtaqT9U8DxVreyyz25g12URdm6fUDej2u3O0J0,981
12
+ svphaser-2.1.0.dist-info/METADATA,sha256=TbiBzv9FUANVWzPKCEq6YJcD4Xlp-hLqhtE5LdsKsd8,6847
13
+ svphaser-2.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ svphaser-2.1.0.dist-info/entry_points.txt,sha256=YFeRJLvcvTc4dazgyVOg3HJ4fNrvBrVDaPX6ULapqeU,46
15
+ svphaser-2.1.0.dist-info/licenses/LICENSE,sha256=qsY5iOcewwIwvhQbj7naSP6tpJAc05Mv0DfhrouPoBU,1102
16
+ svphaser-2.1.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any