tpixel 0.1.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tpixel/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ """tpixel — Pixel-block alignment viewer for hundreds of sequences."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from tpixel.fasta import fasta_panel, read_fasta
6
+ from tpixel.hiv import hiv_panel
7
+ from tpixel.models import Marker, Panel, Region, SeqGroup
8
+ from tpixel.renderer import render_panels
9
+
10
+ try:
11
+ __version__ = version("tpixel")
12
+ except PackageNotFoundError:
13
+ __version__ = "0.0.0"
14
+
15
+ __all__ = [
16
+ "Marker",
17
+ "Panel",
18
+ "Region",
19
+ "SeqGroup",
20
+ "fasta_panel",
21
+ "hiv_panel",
22
+ "read_fasta",
23
+ "render_panels",
24
+ ]
tpixel/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.1.1.dev0'
32
+ __version_tuple__ = version_tuple = (0, 1, 1, 'dev0')
33
+
34
+ __commit_id__ = commit_id = None
tpixel/cli.py ADDED
@@ -0,0 +1,137 @@
1
+ """Click CLI for tpixel."""
2
+
3
+ import sys
4
+
5
+ import click
6
+
7
+ from tpixel.fasta import fasta_panel, read_fasta
8
+ from tpixel.renderer import render_panels
9
+
10
+
11
+ def _expand_stdin(paths: list[str]) -> list[str]:
12
+ """If paths is ``['-']``, read file paths from stdin (one per line).
13
+
14
+ Args:
15
+ paths: List of file path strings. A single ``'-'`` triggers stdin reading.
16
+
17
+ Returns:
18
+ Expanded list of file paths.
19
+
20
+ Examples:
21
+ >>> _expand_stdin(["file1.fasta", "file2.fasta"])
22
+ ['file1.fasta', 'file2.fasta']
23
+ >>> _expand_stdin([])
24
+ []
25
+ """
26
+ if paths and len(paths) == 1 and paths[0] == "-":
27
+ return [line.strip() for line in sys.stdin if line.strip()]
28
+ return list(paths)
29
+
30
+
31
+ def _auto_detect_hiv(fasta_path: str) -> bool:
32
+ """Check if alignment contains HxB2 and a ``*_ref`` sequence.
33
+
34
+ Args:
35
+ fasta_path: Path to the aligned FASTA file.
36
+
37
+ Returns:
38
+ ``True`` if both HxB2 and a ``*_ref`` sequence are present.
39
+ """
40
+ seqs = read_fasta(fasta_path)
41
+ names = {n.split()[0] for n, _ in seqs}
42
+ has_hxb2 = "HxB2" in names
43
+ has_ref = any(n.endswith("_ref") for n in names)
44
+ return has_hxb2 and has_ref
45
+
46
+
47
+ @click.command(
48
+ context_settings={"help_option_names": ["-h", "--help"]},
49
+ epilog="Use '-' to read file paths from stdin, e.g.:\n\n"
50
+ " find . -name '*.fasta' | tpixel --fasta - -o out.png",
51
+ )
52
+ @click.option(
53
+ "--fasta",
54
+ multiple=True,
55
+ help="Aligned FASTA file(s) — each becomes a panel. Use '-' for stdin.",
56
+ )
57
+ @click.option(
58
+ "--columns", help="Column range for FASTA, 1-based inclusive (e.g. 1-120)."
59
+ )
60
+ @click.option(
61
+ "-o",
62
+ "--output",
63
+ default="pixel.png",
64
+ show_default=True,
65
+ help="Output image path.",
66
+ )
67
+ @click.option(
68
+ "--dpi", type=int, default=300, show_default=True, help="Image resolution."
69
+ )
70
+ @click.option(
71
+ "--cell", type=float, default=None, help="Cell size in inches (default: 0.03)."
72
+ )
73
+ @click.option(
74
+ "--hiv/--no-hiv",
75
+ default=None,
76
+ help="Force HIV mode (HxB2 regions, PNGS, animal grouping). Auto-detected if omitted.",
77
+ )
78
+ @click.option(
79
+ "--nt/--aa",
80
+ default=None,
81
+ help="Force nucleotide or amino-acid mode. Auto-detected if omitted.",
82
+ )
83
+ @click.option(
84
+ "--ref-pos",
85
+ default="1,2",
86
+ show_default=True,
87
+ help="Comma-separated 1-based positions of reference sequences. "
88
+ "Last position is the primary reference; earlier ones are extra reference rows.",
89
+ )
90
+ @click.option(
91
+ "--title",
92
+ default=None,
93
+ help="Title displayed above the plot.",
94
+ )
95
+ def main(fasta, columns, output, dpi, cell, hiv, nt, ref_pos, title):
96
+ """Pixel-block alignment viewer for hundreds of sequences.
97
+
98
+ Renders Roark-style PIXEL plots: grey=match, red=substitution, black=gap.
99
+ Each sequence is a thin row of colored blocks — no text in cells.
100
+
101
+ HIV mode is auto-detected when the alignment contains HxB2 and a *_ref
102
+ sequence. Force with --hiv or --no-hiv.
103
+ """
104
+ fasta_paths = _expand_stdin(list(fasta))
105
+
106
+ if not fasta_paths:
107
+ raise click.UsageError("Provide --fasta")
108
+
109
+ ref_positions = [int(x) for x in ref_pos.split(",")]
110
+
111
+ panels = []
112
+ col_start, col_end = None, None
113
+ if columns:
114
+ parts = columns.replace(",", "").split("-")
115
+ col_start = int(parts[0])
116
+ col_end = int(parts[1]) if len(parts) > 1 else None
117
+
118
+ for fasta_path in fasta_paths:
119
+ use_hiv = hiv if hiv is not None else _auto_detect_hiv(fasta_path)
120
+
121
+ if use_hiv:
122
+ from tpixel.hiv import hiv_panel
123
+
124
+ seq_type = None
125
+ if nt is True:
126
+ seq_type = "NT"
127
+ elif nt is False:
128
+ seq_type = "AA"
129
+ panel = hiv_panel(fasta_path, ref_positions=ref_positions, seq_type=seq_type)
130
+ else:
131
+ panel = fasta_panel(fasta_path, col_start, col_end, ref_positions=ref_positions)
132
+
133
+ if title:
134
+ panel.title = title
135
+ panels.append(panel)
136
+
137
+ render_panels(panels, output, dpi=dpi, cell=cell)
tpixel/fasta.py ADDED
@@ -0,0 +1,109 @@
1
+ """FASTA parsing and panel construction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from tpixel.models import Panel
8
+
9
+
10
+ def read_fasta(path: str | Path) -> list[tuple[str, str]]:
11
+ """Parse a FASTA file into a list of (name, sequence) tuples.
12
+
13
+ Args:
14
+ path: Path to the FASTA file.
15
+
16
+ Returns:
17
+ List of (header_name, concatenated_sequence) tuples.
18
+ """
19
+ seqs: list[tuple[str, str]] = []
20
+ name: str | None = None
21
+ buf: list[str] = []
22
+ with open(path, encoding="utf-8") as fh:
23
+ for line in fh:
24
+ if line.startswith(">"):
25
+ if name is not None:
26
+ seqs.append((name, "".join(buf)))
27
+ name = line[1:].strip()
28
+ buf = []
29
+ else:
30
+ buf.append(line.strip())
31
+ if name is not None:
32
+ seqs.append((name, "".join(buf)))
33
+ return seqs
34
+
35
+
36
+ def fasta_panel(
37
+ path: str | Path,
38
+ col_start: int | None = None,
39
+ col_end: int | None = None,
40
+ ref_positions: list[int] | None = None,
41
+ ) -> Panel:
42
+ """Build a Panel from an aligned FASTA.
43
+
44
+ Args:
45
+ path: Path to the aligned FASTA file.
46
+ col_start: 1-based inclusive start column for slicing the alignment.
47
+ col_end: 1-based inclusive end column for slicing the alignment.
48
+ ref_positions: 1-based positions of reference sequences. Last is
49
+ the primary reference; earlier ones become extra reference rows.
50
+ Defaults to [1].
51
+
52
+ Returns:
53
+ A Panel with reference row, sequence rows, and column labels.
54
+
55
+ Raises:
56
+ ValueError: If the FASTA file contains no sequences.
57
+ """
58
+ if ref_positions is None:
59
+ ref_positions = [1]
60
+
61
+ seqs = read_fasta(path)
62
+ if not seqs:
63
+ raise ValueError(f"No sequences in {path}")
64
+
65
+ # Primary reference is the last position in ref_positions
66
+ primary_idx = ref_positions[-1] - 1
67
+ _ref_name, ref_seq = seqs[primary_idx]
68
+
69
+ # Slice columns if requested (1-based inclusive)
70
+ if col_start is not None or col_end is not None:
71
+ cs = (col_start or 1) - 1
72
+ ce = col_end or len(ref_seq)
73
+ ref_seq = ref_seq[cs:ce]
74
+ seqs = [(n, s[cs:ce]) for n, s in seqs]
75
+
76
+ aln_len = len(ref_seq)
77
+ ref_row = list(ref_seq.upper())
78
+
79
+ # Extra reference rows (all ref positions except the last)
80
+ extra_ref_rows: list[tuple[str, list[str]]] = []
81
+ for pos in ref_positions[:-1]:
82
+ idx = pos - 1
83
+ name, seq = seqs[idx]
84
+ row = list(seq.upper()[:aln_len])
85
+ row += ["-"] * (aln_len - len(row))
86
+ extra_ref_rows.append((name, row))
87
+
88
+ # Sample sequences: everything not in ref_positions
89
+ ref_indices = {pos - 1 for pos in ref_positions}
90
+ seq_rows: list[tuple[str, list[str]]] = []
91
+ for i, (name, seq) in enumerate(seqs):
92
+ if i in ref_indices:
93
+ continue
94
+ row = list(seq.upper()[:aln_len])
95
+ row += ["-"] * (aln_len - len(row))
96
+ seq_rows.append((name, row))
97
+
98
+ # Column labels: 1-based position in the reference (skip gap columns)
99
+ col_labels: list[tuple[int, str]] = []
100
+ ref_pos = 0
101
+ for i, base in enumerate(ref_row):
102
+ if base != "-":
103
+ ref_pos += 1
104
+ if ref_pos == 1 or ref_pos % 10 == 0:
105
+ col_labels.append((i, str(ref_pos)))
106
+
107
+ label = Path(path).stem
108
+ return Panel(label, ref_row, seq_rows, aln_len, col_labels,
109
+ extra_ref_rows=extra_ref_rows or None)
tpixel/hiv.py ADDED
@@ -0,0 +1,190 @@
1
+ """HIV-aware panel builder for PIXEL plots.
2
+
3
+ Handles HxB2 coordinate mapping, Env region annotations, PNGS markers,
4
+ and animal-based sequence grouping from SHIV/HIV aligned FASTA files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections import defaultdict
10
+ from pathlib import Path
11
+
12
+ from tpixel.fasta import read_fasta
13
+ from tpixel.hxb2 import _is_nucleotide, build_hxb2_map, hxb2_col_labels, hxb2_regions
14
+ from tpixel.models import Marker, Panel, SeqGroup
15
+ from tpixel.pngs import find_pngs_markers, find_pngs_markers_nt
16
+
17
+
18
+ def _find_ref_id(names: list[str]) -> str | None:
19
+ """Find the parental reference (name ending with ``'_ref'``).
20
+
21
+ Args:
22
+ names: Sequence IDs from the alignment.
23
+
24
+ Returns:
25
+ First name ending with ``'_ref'``, or ``None``.
26
+
27
+ Examples:
28
+ >>> _find_ref_id(["HxB2", "animal1_ref", "animal1_s1"])
29
+ 'animal1_ref'
30
+ >>> _find_ref_id(["HxB2", "s1", "s2"]) is None
31
+ True
32
+ """
33
+ for name in names:
34
+ if name.endswith("_ref"):
35
+ return name
36
+ return None
37
+
38
+
39
+ def _extract_animal(seq_id: str) -> str:
40
+ """Extract animal name from sequence ID (prefix before first ``'_'``).
41
+
42
+ Args:
43
+ seq_id: Full sequence identifier string.
44
+
45
+ Returns:
46
+ The portion of *seq_id* before the first underscore.
47
+
48
+ Examples:
49
+ >>> _extract_animal("animal1_s1")
50
+ 'animal1'
51
+ >>> _extract_animal("RM5695_env_s3")
52
+ 'RM5695'
53
+ >>> _extract_animal("nounderscore")
54
+ 'nounderscore'
55
+ """
56
+ parts = seq_id.split("_")
57
+ return parts[0]
58
+
59
+
60
+ def _sort_animal_groups(animal_names: list[str], lineage: str) -> list[str]:
61
+ """Sort: lineage self first, recombinants, then alphabetical.
62
+
63
+ Args:
64
+ animal_names: Unique animal/group names to sort.
65
+ lineage: The lineage name to place first.
66
+
67
+ Returns:
68
+ Sorted list: lineage first, then recombinants, then others alphabetically.
69
+
70
+ Examples:
71
+ >>> _sort_animal_groups(["B", "rec1", "A", "lin1"], "lin1")
72
+ ['lin1', 'rec1', 'A', 'B']
73
+ >>> _sort_animal_groups(["X", "Y"], "Z")
74
+ ['X', 'Y']
75
+ """
76
+ self_group = []
77
+ rec_group = []
78
+ other_group = []
79
+ for name in animal_names:
80
+ if name == lineage:
81
+ self_group.append(name)
82
+ elif name.lower().startswith("rec"):
83
+ rec_group.append(name)
84
+ else:
85
+ other_group.append(name)
86
+ return self_group + sorted(rec_group) + sorted(other_group)
87
+
88
+
89
+ def hiv_panel(
90
+ path: str | Path,
91
+ hxb2_id: str = "HxB2",
92
+ ref_id: str | None = None,
93
+ tick_step: int = 50,
94
+ ref_positions: list[int] | None = None,
95
+ seq_type: str | None = None,
96
+ ) -> Panel:
97
+ """Build a full Roark-style Panel from an HIV Env alignment.
98
+
99
+ Args:
100
+ path: Path to aligned FASTA containing HxB2 and a *_ref sequence.
101
+ Accepts both amino-acid and nucleotide alignments.
102
+ hxb2_id: ID of the HxB2 coordinate reference in the alignment.
103
+ ref_id: Parental reference ID. Auto-detected (*_ref) if None.
104
+ Ignored when ref_positions is provided.
105
+ tick_step: HxB2 AA position interval for x-axis ticks.
106
+ ref_positions: 1-based positions of reference sequences. Last is
107
+ the primary reference; earlier ones become extra reference rows.
108
+ Defaults to [1, 2].
109
+ seq_type: ``"NT"`` or ``"AA"``. Auto-detected from the reference
110
+ sequence when *None*.
111
+
112
+ Returns:
113
+ Panel with regions, PNGS markers, grouped sequences, and HxB2 ticks.
114
+ """
115
+ seqs = read_fasta(path)
116
+ if not seqs:
117
+ raise ValueError(f"No sequences in {path}")
118
+
119
+ names = [n for n, _ in seqs]
120
+ seq_dict = {n: s for n, s in seqs}
121
+
122
+ if ref_positions is not None:
123
+ # Position-based: last position is primary reference
124
+ primary_idx = ref_positions[-1] - 1
125
+ ref_id = names[primary_idx]
126
+ else:
127
+ # Name-based auto-detection (original behavior)
128
+ ref_positions = [1, 2]
129
+ if ref_id is None:
130
+ ref_id = _find_ref_id(names)
131
+ if ref_id is None:
132
+ raise ValueError("No *_ref sequence found. Specify ref_id explicitly.")
133
+ if ref_id not in seq_dict:
134
+ raise ValueError(f"Reference '{ref_id}' not in alignment")
135
+
136
+ ref_seq = seq_dict[ref_id]
137
+ aln_len = len(ref_seq)
138
+ ref_row = list(ref_seq.upper())
139
+
140
+ # Auto-detect sequence type from reference when not specified
141
+ if seq_type is None:
142
+ seq_type = "NT" if _is_nucleotide(ref_seq) else "AA"
143
+
144
+ hxb2_map = build_hxb2_map(seqs, hxb2_id, seq_type=seq_type)
145
+ regions = hxb2_regions(hxb2_map)
146
+ col_labels = hxb2_col_labels(hxb2_map, step=tick_step)
147
+
148
+ if seq_type == "NT":
149
+ markers = find_pngs_markers_nt(ref_seq, hxb2_map)
150
+ else:
151
+ markers = find_pngs_markers(ref_seq, hxb2_map)
152
+
153
+ lineage = ref_id.replace("_ref", "") if ref_id.endswith("_ref") else ref_id
154
+
155
+ # Extra reference rows: all ref positions except the last
156
+ extra_ref_rows: list[tuple[str, list[str]]] = []
157
+ for pos in ref_positions[:-1]:
158
+ idx = pos - 1
159
+ name = names[idx]
160
+ seq = seq_dict[name]
161
+ row = list(seq.upper()[:aln_len])
162
+ row += ["-"] * (aln_len - len(row))
163
+ extra_ref_rows.append((name, row))
164
+
165
+ # Group sample sequences by animal
166
+ skip = {names[pos - 1] for pos in ref_positions}
167
+ animal_seqs: dict[str, list[tuple[str, list[str]]]] = defaultdict(list)
168
+ for name, seq in seqs:
169
+ if name in skip:
170
+ continue
171
+ animal = _extract_animal(name)
172
+ row = list(seq.upper()[:aln_len])
173
+ row += ["-"] * (aln_len - len(row))
174
+ animal_seqs[animal].append((name, row))
175
+
176
+ sorted_animals = _sort_animal_groups(list(animal_seqs.keys()), lineage)
177
+ groups = [SeqGroup(name=a, seqs=animal_seqs[a]) for a in sorted_animals]
178
+
179
+ return Panel(
180
+ label=ref_id,
181
+ ref_row=ref_row,
182
+ seq_rows=[],
183
+ total_cols=aln_len,
184
+ col_labels=col_labels,
185
+ regions=regions,
186
+ markers=markers,
187
+ marker_color="#4CAF50",
188
+ groups=groups,
189
+ extra_ref_rows=extra_ref_rows,
190
+ )
tpixel/hxb2.py ADDED
@@ -0,0 +1,187 @@
1
+ """HxB2 coordinate mapping for HIV Env gp160 protein alignments.
2
+
3
+ Maps alignment columns to HxB2 amino acid positions and Env structural
4
+ regions using the LANL convention boundaries.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+
11
+ from tpixel.models import Region
12
+
13
+ ENV_REGIONS: list[tuple[str, int, int]] = [
14
+ ("SP", 1, 30),
15
+ ("C1", 31, 130),
16
+ ("V1", 131, 157),
17
+ ("V2", 158, 196),
18
+ ("C2", 197, 295),
19
+ ("V3", 296, 331),
20
+ ("C3", 332, 384),
21
+ ("V4", 385, 418),
22
+ ("C4", 419, 459),
23
+ ("V5", 460, 469),
24
+ ("C5", 470, 511),
25
+ ("gp41", 512, 856),
26
+ ]
27
+
28
+ REGION_COLORS: dict[str, str] = {
29
+ "SP": "#FFF9C4",
30
+ "C1": "#EEEEEE",
31
+ "V1": "#BBDEFB",
32
+ "V2": "#BBDEFB",
33
+ "C2": "#EEEEEE",
34
+ "V3": "#BBDEFB",
35
+ "C3": "#EEEEEE",
36
+ "V4": "#BBDEFB",
37
+ "C4": "#EEEEEE",
38
+ "V5": "#BBDEFB",
39
+ "C5": "#EEEEEE",
40
+ "gp41": "#F8BBD0",
41
+ }
42
+
43
+ _REGION_LOOKUP: dict[int, str] = {}
44
+ for _name, _start, _end in ENV_REGIONS:
45
+ for _pos in range(_start, _end + 1):
46
+ _REGION_LOOKUP[_pos] = _name
47
+
48
+
49
+ def get_env_region(hxb2_aa_pos: int) -> str | None:
50
+ """Return the Env region name for an HxB2 amino acid position.
51
+
52
+ Args:
53
+ hxb2_aa_pos: 1-based HxB2 amino acid position.
54
+
55
+ Returns:
56
+ Region name (e.g. ``'V3'``) or ``None`` if outside known boundaries.
57
+
58
+ Examples:
59
+ >>> get_env_region(1)
60
+ 'SP'
61
+ >>> get_env_region(131)
62
+ 'V1'
63
+ >>> get_env_region(296)
64
+ 'V3'
65
+ >>> get_env_region(900) is None
66
+ True
67
+ """
68
+ return _REGION_LOOKUP.get(hxb2_aa_pos)
69
+
70
+
71
+ @dataclass
72
+ class HxB2Position:
73
+ """A single alignment column mapped to HxB2 coordinates.
74
+
75
+ Attributes:
76
+ alignment_col: 0-based alignment column index.
77
+ hxb2_aa_pos: 1-based HxB2 amino acid position, or ``None`` for gaps.
78
+ region: Env region name (e.g. ``'V3'``), or ``None``.
79
+ hxb2_residue: The residue character at this column in the HxB2 sequence.
80
+ """
81
+
82
+ alignment_col: int
83
+ hxb2_aa_pos: int | None
84
+ region: str | None
85
+ hxb2_residue: str
86
+
87
+
88
+ def _is_nucleotide(seq: str) -> bool:
89
+ """Return True if *seq* looks like a nucleotide sequence.
90
+
91
+ Examples:
92
+ >>> _is_nucleotide("ACGTACGT")
93
+ True
94
+ >>> _is_nucleotide("MWLK")
95
+ False
96
+ >>> _is_nucleotide("ACG-T.NU")
97
+ True
98
+ >>> _is_nucleotide("")
99
+ True
100
+ """
101
+ nt_chars = set("ACGTUNacgtun-.")
102
+ return all(c in nt_chars for c in seq)
103
+
104
+
105
+ def build_hxb2_map(
106
+ aligned_seqs: list[tuple[str, str]],
107
+ hxb2_id: str = "HxB2",
108
+ seq_type: str | None = None,
109
+ ) -> list[HxB2Position]:
110
+ """Walk the HxB2 row and map every alignment column to HxB2 coordinates.
111
+
112
+ Args:
113
+ aligned_seqs: List of (name, sequence) from read_fasta.
114
+ hxb2_id: Sequence ID of HxB2 in the alignment.
115
+ seq_type: ``"NT"`` or ``"AA"``. Auto-detected from the HxB2
116
+ sequence when *None*.
117
+
118
+ Returns:
119
+ One HxB2Position per alignment column.
120
+ """
121
+ hxb2_seq = None
122
+ for name, seq in aligned_seqs:
123
+ if name == hxb2_id or name.split()[0] == hxb2_id:
124
+ hxb2_seq = seq
125
+ break
126
+
127
+ if hxb2_seq is None:
128
+ raise ValueError(f"HxB2 sequence '{hxb2_id}' not found in alignment")
129
+
130
+ if seq_type is None:
131
+ seq_type = "NT" if _is_nucleotide(hxb2_seq) else "AA"
132
+
133
+ is_nt = seq_type == "NT"
134
+
135
+ positions: list[HxB2Position] = []
136
+ nt_counter = 0
137
+ aa_counter = 0
138
+
139
+ for col_idx, residue in enumerate(hxb2_seq):
140
+ if residue in ("-", "."):
141
+ positions.append(HxB2Position(col_idx, None, None, residue))
142
+ else:
143
+ if is_nt:
144
+ nt_counter += 1
145
+ aa_pos = (nt_counter - 1) // 3 + 1
146
+ else:
147
+ aa_counter += 1
148
+ aa_pos = aa_counter
149
+ positions.append(HxB2Position(col_idx, aa_pos, get_env_region(aa_pos), residue))
150
+
151
+ return positions
152
+
153
+
154
+ def hxb2_col_labels(hxb2_map: list[HxB2Position], step: int = 50) -> list[tuple[int, str]]:
155
+ """Build x-axis tick labels at regular HxB2 AA intervals."""
156
+ max_pos = max((p.hxb2_aa_pos for p in hxb2_map if p.hxb2_aa_pos is not None), default=0)
157
+ labels: list[tuple[int, str]] = []
158
+ for target in range(step, max_pos + 1, step):
159
+ for p in hxb2_map:
160
+ if p.hxb2_aa_pos == target:
161
+ labels.append((p.alignment_col, str(target)))
162
+ break
163
+ return labels
164
+
165
+
166
+ def hxb2_regions(hxb2_map: list[HxB2Position]) -> list[Region]:
167
+ """Build Region annotations from HxB2 position map."""
168
+ region_spans: list[tuple[str, int, int]] = []
169
+ current: str | None = None
170
+ span_start = 0
171
+
172
+ for p in hxb2_map:
173
+ r = p.region
174
+ if r != current:
175
+ if current is not None:
176
+ region_spans.append((current, span_start, p.alignment_col))
177
+ current = r
178
+ span_start = p.alignment_col
179
+
180
+ if current is not None:
181
+ region_spans.append((current, span_start, len(hxb2_map)))
182
+
183
+ return [
184
+ Region(name, start, end, REGION_COLORS.get(name, "#EEEEEE"))
185
+ for name, start, end in region_spans
186
+ if name is not None
187
+ ]