zehramsa 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: zehramsa
3
+ Version: 1.0.0
4
+ Summary: Multiple Sequence Alignment via Dynamic Programming
5
+ Requires-Python: >=3.9
@@ -0,0 +1,13 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "zehramsa"
7
+ version = "1.0.0"
8
+ description = "Multiple Sequence Alignment via Dynamic Programming"
9
+ requires-python = ">=3.9"
10
+ dependencies = []
11
+
12
+ [tool.setuptools.packages.find]
13
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ __version__ = "1.0.0"
2
+ __author__ = "Zehra"
3
+
4
+ from .align import align
5
+ from .result import MSAResult, PairwiseResult
6
+ from .scoring import SimpleScoring, DEFAULT_SCORING
7
+ from .needleman_wunsch import needleman_wunsch, needleman_wunsch_score
8
+ from .center_star import center_star_align
9
+
10
+ __all__ = [
11
+ "align",
12
+ "MSAResult",
13
+ "PairwiseResult",
14
+ "SimpleScoring",
15
+ "DEFAULT_SCORING",
16
+ "needleman_wunsch",
17
+ "needleman_wunsch_score",
18
+ "center_star_align",
19
+ ]
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ from .needleman_wunsch import needleman_wunsch
4
+ from .center_star import center_star_align
5
+ from .result import MSAResult, PairwiseResult
6
+ from .scoring import DEFAULT_SCORING
7
+
8
+
9
+ def align(
10
+ sequences: list[str],
11
+ *,
12
+ scoring=None,
13
+ verbose: bool = False,
14
+ ) -> MSAResult:
15
+ if not sequences:
16
+ raise ValueError("sequences list is empty — provide at least 2 sequences.")
17
+ if len(sequences) < 2:
18
+ raise ValueError("At least 2 sequences are required for alignment.")
19
+ for i, s in enumerate(sequences):
20
+ if not isinstance(s, str):
21
+ raise TypeError(
22
+ f"sequences[{i}] must be a string, got {type(s).__name__!r}"
23
+ )
24
+
25
+ if scoring is None:
26
+ scoring = DEFAULT_SCORING
27
+
28
+ names = [f'Seq_{i}' for i in range(len(sequences))]
29
+ seqs = [s.upper() for s in sequences]
30
+ n = len(seqs)
31
+
32
+ if verbose:
33
+ print(f"[zehramsa] Aligning {n} sequences: {names}")
34
+
35
+ pw_results: list[PairwiseResult] = []
36
+ score_matrix: list[list[float]] = [[0.0] * n for _ in range(n)]
37
+ for i in range(n):
38
+ for j in range(i + 1, n):
39
+ pw = needleman_wunsch(
40
+ seqs[i], seqs[j], scoring,
41
+ seq1_name=names[i], seq2_name=names[j],
42
+ )
43
+ pw_results.append(pw)
44
+ score_matrix[i][j] = pw.score
45
+ score_matrix[j][i] = pw.score
46
+ if verbose:
47
+ print(f" NW {names[i]} vs {names[j]}: score={pw.score:.2f}")
48
+
49
+ sequences_aligned, center_name = center_star_align(
50
+ seqs, names, scoring,
51
+ score_matrix=score_matrix,
52
+ pairwise_results=pw_results,
53
+ )
54
+
55
+ if verbose:
56
+ print(f" Center sequence: {center_name}")
57
+
58
+ aligned_seqs = list(sequences_aligned.values())
59
+ length = len(aligned_seqs[0]) if aligned_seqs else 0
60
+ sp_score = 0.0
61
+ for i in range(n):
62
+ for j in range(i + 1, n):
63
+ for col in range(length):
64
+ sp_score += scoring.score(aligned_seqs[i][col], aligned_seqs[j][col])
65
+
66
+ if verbose:
67
+ print(f" SP-score: {sp_score:.2f}")
68
+
69
+ return MSAResult(
70
+ sequences=sequences_aligned,
71
+ score=sp_score,
72
+ center_sequence=center_name,
73
+ pairwise_results=pw_results,
74
+ )
@@ -0,0 +1,131 @@
1
+ from __future__ import annotations
2
+
3
+ from .needleman_wunsch import needleman_wunsch
4
+
5
+
6
+ def center_star_align(
7
+ seqs: list[str],
8
+ names: list[str],
9
+ scoring,
10
+ *,
11
+ score_matrix: list[list[float]] | None = None,
12
+ pairwise_results=None,
13
+ ) -> tuple[dict[str, str], str]:
14
+ n = len(seqs)
15
+
16
+ _cached_pw: dict[tuple[int, int], object] = {}
17
+
18
+ if score_matrix is None:
19
+ score_matrix = [[0.0] * n for _ in range(n)]
20
+ for i in range(n):
21
+ for j in range(i + 1, n):
22
+ pw = needleman_wunsch(
23
+ seqs[i], seqs[j], scoring,
24
+ seq1_name=names[i], seq2_name=names[j],
25
+ )
26
+ score_matrix[i][j] = pw.score
27
+ score_matrix[j][i] = pw.score
28
+ _cached_pw[(i, j)] = pw
29
+
30
+ center_idx = max(range(n), key=lambda i: sum(score_matrix[i]))
31
+
32
+ center_name = names[center_idx]
33
+ pairwise_alignments: dict[int, tuple[str, str]] = {}
34
+ for k in range(n):
35
+ if k == center_idx:
36
+ continue
37
+ if pairwise_results is not None:
38
+ for pw in pairwise_results:
39
+ if pw.seq1_name == center_name and pw.seq2_name == names[k]:
40
+ pairwise_alignments[k] = (pw.aligned_seq1, pw.aligned_seq2)
41
+ break
42
+ if pw.seq2_name == center_name and pw.seq1_name == names[k]:
43
+ pairwise_alignments[k] = (pw.aligned_seq2, pw.aligned_seq1)
44
+ break
45
+ if k not in pairwise_alignments:
46
+ raise ValueError(
47
+ f"No pairwise alignment found for '{names[k]}' against center '{center_name}'. "
48
+ "This is likely a name mismatch in pairwise_results."
49
+ )
50
+ else:
51
+ i_lo = min(center_idx, k)
52
+ i_hi = max(center_idx, k)
53
+ if (i_lo, i_hi) in _cached_pw:
54
+ pw = _cached_pw[(i_lo, i_hi)]
55
+ if center_idx == i_lo:
56
+ pairwise_alignments[k] = (pw.aligned_seq1, pw.aligned_seq2)
57
+ else:
58
+ pairwise_alignments[k] = (pw.aligned_seq2, pw.aligned_seq1)
59
+ else:
60
+ pw = needleman_wunsch(
61
+ seqs[center_idx], seqs[k], scoring,
62
+ seq1_name=center_name, seq2_name=names[k],
63
+ )
64
+ pairwise_alignments[k] = (pw.aligned_seq1, pw.aligned_seq2)
65
+
66
+ if n == 1:
67
+ return {names[0]: seqs[0]}, names[0]
68
+
69
+ col_lists: dict[int, list[tuple[str, str]]] = {
70
+ k: list(zip(ca, oa))
71
+ for k, (ca, oa) in pairwise_alignments.items()
72
+ }
73
+
74
+ other_indices = [k for k in range(n) if k != center_idx]
75
+ ptrs: dict[int, int] = {k: 0 for k in other_indices}
76
+
77
+ out_center: list[str] = []
78
+ out_others: dict[int, list[str]] = {k: [] for k in other_indices}
79
+
80
+ lengths = {k: len(col_lists[k]) for k in other_indices}
81
+
82
+ def leading_center_gaps(k: int) -> int:
83
+ count = 0
84
+ p = ptrs[k]
85
+ while p < lengths[k] and col_lists[k][p][0] == '-':
86
+ count += 1
87
+ p += 1
88
+ return count
89
+
90
+ while True:
91
+ ins_counts = {k: leading_center_gaps(k) for k in other_indices}
92
+ max_ins = max(ins_counts.values()) if ins_counts else 0
93
+
94
+ ins_ptrs = {k: 0 for k in other_indices}
95
+ for _ in range(max_ins):
96
+ out_center.append('-')
97
+ for k in other_indices:
98
+ if ins_ptrs[k] < ins_counts[k]:
99
+ _, oc = col_lists[k][ptrs[k]]
100
+ out_others[k].append(oc)
101
+ ptrs[k] += 1
102
+ ins_ptrs[k] += 1
103
+ else:
104
+ out_others[k].append('-')
105
+
106
+ if all(ptrs[k] >= lengths[k] for k in other_indices):
107
+ break
108
+
109
+ center_chars = set()
110
+ for k in other_indices:
111
+ if ptrs[k] < lengths[k]:
112
+ center_chars.add(col_lists[k][ptrs[k]][0])
113
+
114
+ assert len(center_chars) == 1, (
115
+ f"Expected exactly one center character, got {center_chars!r}"
116
+ )
117
+ c_char = next(iter(center_chars))
118
+ out_center.append(c_char)
119
+ for k in other_indices:
120
+ if ptrs[k] < lengths[k] and col_lists[k][ptrs[k]][0] == c_char:
121
+ _, oc = col_lists[k][ptrs[k]]
122
+ out_others[k].append(oc)
123
+ ptrs[k] += 1
124
+ else:
125
+ out_others[k].append('-')
126
+
127
+ center_aligned = ''.join(out_center)
128
+ result: dict[str, str] = {names[center_idx]: center_aligned}
129
+ for k in other_indices:
130
+ result[names[k]] = ''.join(out_others[k])
131
+ return result, center_name
@@ -0,0 +1,84 @@
1
+ from __future__ import annotations
2
+ from .result import PairwiseResult
3
+ from .scoring import DEFAULT_SCORING
4
+
5
+ _DIAG = 0
6
+ _UP = 1
7
+ _LEFT = 2
8
+
9
+
10
+ def needleman_wunsch(
11
+ seq1: str,
12
+ seq2: str,
13
+ scoring=None,
14
+ *,
15
+ seq1_name: str = 'Seq1',
16
+ seq2_name: str = 'Seq2',
17
+ ) -> PairwiseResult:
18
+ if scoring is None:
19
+ scoring = DEFAULT_SCORING
20
+
21
+ gap = scoring.gap
22
+ n = len(seq1)
23
+ m = len(seq2)
24
+
25
+ F: list[list[float]] = [[0.0] * (m + 1) for _ in range(n + 1)]
26
+ D: list[list[int]] = [[_DIAG] * (m + 1) for _ in range(n + 1)]
27
+
28
+ for i in range(1, n + 1):
29
+ F[i][0] = i * gap
30
+ D[i][0] = _UP
31
+ for j in range(1, m + 1):
32
+ F[0][j] = j * gap
33
+ D[0][j] = _LEFT
34
+
35
+ for i in range(1, n + 1):
36
+ for j in range(1, m + 1):
37
+ diag = F[i - 1][j - 1] + scoring.score(seq1[i - 1], seq2[j - 1])
38
+ up = F[i - 1][j] + gap
39
+ left = F[i][j - 1] + gap
40
+
41
+ if diag >= up and diag >= left:
42
+ F[i][j] = diag
43
+ D[i][j] = _DIAG
44
+ elif up >= left:
45
+ F[i][j] = up
46
+ D[i][j] = _UP
47
+ else:
48
+ F[i][j] = left
49
+ D[i][j] = _LEFT
50
+
51
+ aligned1: list[str] = []
52
+ aligned2: list[str] = []
53
+
54
+ i, j = n, m
55
+ while i > 0 or j > 0:
56
+ d = D[i][j]
57
+ if d == _DIAG:
58
+ aligned1.append(seq1[i - 1])
59
+ aligned2.append(seq2[j - 1])
60
+ i -= 1
61
+ j -= 1
62
+ elif d == _UP:
63
+ aligned1.append(seq1[i - 1])
64
+ aligned2.append('-')
65
+ i -= 1
66
+ else:
67
+ aligned1.append('-')
68
+ aligned2.append(seq2[j - 1])
69
+ j -= 1
70
+
71
+ aligned1.reverse()
72
+ aligned2.reverse()
73
+
74
+ return PairwiseResult(
75
+ seq1_name=seq1_name,
76
+ seq2_name=seq2_name,
77
+ aligned_seq1=''.join(aligned1),
78
+ aligned_seq2=''.join(aligned2),
79
+ score=F[n][m],
80
+ )
81
+
82
+
83
+ def needleman_wunsch_score(seq1: str, seq2: str, scoring=None) -> float:
84
+ return needleman_wunsch(seq1, seq2, scoring).score
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass, field
3
+
4
+
5
+ @dataclass
6
+ class PairwiseResult:
7
+ seq1_name: str
8
+ seq2_name: str
9
+ aligned_seq1: str
10
+ aligned_seq2: str
11
+ score: float
12
+
13
+ @property
14
+ def alignment_length(self) -> int:
15
+ return len(self.aligned_seq1)
16
+
17
+ @property
18
+ def identity(self) -> float:
19
+ """Identity = matches / alignment_length (gaps included in denominator)."""
20
+ if self.alignment_length == 0:
21
+ return 0.0
22
+ matches = sum(
23
+ 1 for a, b in zip(self.aligned_seq1, self.aligned_seq2) if a == b and a != '-'
24
+ )
25
+ return matches / self.alignment_length
26
+
27
+ def __str__(self) -> str:
28
+ return (
29
+ f"{self.seq1_name}: {self.aligned_seq1}\n"
30
+ f"{self.seq2_name}: {self.aligned_seq2}\n"
31
+ f"Score: {self.score} Identity: {self.identity:.1%}"
32
+ )
33
+
34
+
35
+ @dataclass
36
+ class MSAResult:
37
+ sequences: dict[str, str]
38
+ score: float
39
+ center_sequence: str
40
+ pairwise_results: list[PairwiseResult]
41
+ column_count: int = field(init=False)
42
+
43
+ def __post_init__(self) -> None:
44
+ lengths = {len(s) for s in self.sequences.values()}
45
+ if len(lengths) == 1:
46
+ self.column_count = next(iter(lengths))
47
+ elif lengths:
48
+ self.column_count = max(lengths)
49
+ else:
50
+ self.column_count = 0
51
+
52
+ @property
53
+ def names(self) -> list[str]:
54
+ return list(self.sequences.keys())
55
+
56
+ @property
57
+ def aligned_sequences(self) -> list[str]:
58
+ return list(self.sequences.values())
59
+
60
+ def __str__(self) -> str:
61
+ lines = ["=== MSA Result ===", f"Score: {self.score}"]
62
+ for name, seq in self.sequences.items():
63
+ lines.append(f"{name}: {seq}")
64
+ return "\n".join(lines)
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class SimpleScoring:
5
+ def __init__(self, match: float = 1.0, mismatch: float = -1.0, gap: float = -2.0) -> None:
6
+ self.match = match
7
+ self.mismatch = mismatch
8
+ self.gap = gap
9
+
10
+ def score(self, a: str, b: str) -> float:
11
+ if len(a) != 1 or len(b) != 1:
12
+ raise ValueError(
13
+ f"score() expects single characters, got {a!r} and {b!r}"
14
+ )
15
+ if a == '-' or b == '-':
16
+ return self.gap
17
+ return self.match if a == b else self.mismatch
18
+
19
+
20
+ DEFAULT_SCORING = SimpleScoring()
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: zehramsa
3
+ Version: 1.0.0
4
+ Summary: Multiple Sequence Alignment via Dynamic Programming
5
+ Requires-Python: >=3.9
@@ -0,0 +1,11 @@
1
+ pyproject.toml
2
+ src/zehramsa/__init__.py
3
+ src/zehramsa/align.py
4
+ src/zehramsa/center_star.py
5
+ src/zehramsa/needleman_wunsch.py
6
+ src/zehramsa/result.py
7
+ src/zehramsa/scoring.py
8
+ src/zehramsa.egg-info/PKG-INFO
9
+ src/zehramsa.egg-info/SOURCES.txt
10
+ src/zehramsa.egg-info/dependency_links.txt
11
+ src/zehramsa.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ zehramsa