PyPI - zehramsa - Versions diffs - 1.0.0__tar.gz - Mend

zehramsa 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

zehramsa-1.0.0/PKG-INFO +5 -0
zehramsa-1.0.0/pyproject.toml +13 -0
zehramsa-1.0.0/setup.cfg +4 -0
zehramsa-1.0.0/src/zehramsa/__init__.py +19 -0
zehramsa-1.0.0/src/zehramsa/align.py +74 -0
zehramsa-1.0.0/src/zehramsa/center_star.py +131 -0
zehramsa-1.0.0/src/zehramsa/needleman_wunsch.py +84 -0
zehramsa-1.0.0/src/zehramsa/result.py +64 -0
zehramsa-1.0.0/src/zehramsa/scoring.py +20 -0
zehramsa-1.0.0/src/zehramsa.egg-info/PKG-INFO +5 -0
zehramsa-1.0.0/src/zehramsa.egg-info/SOURCES.txt +11 -0
zehramsa-1.0.0/src/zehramsa.egg-info/dependency_links.txt +1 -0
zehramsa-1.0.0/src/zehramsa.egg-info/top_level.txt +1 -0

zehramsa-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: zehramsa
+Version: 1.0.0
+Summary: Multiple Sequence Alignment via Dynamic Programming
+Requires-Python: >=3.9

zehramsa-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,13 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "zehramsa"
+version = "1.0.0"
+description = "Multiple Sequence Alignment via Dynamic Programming"
+requires-python = ">=3.9"
+dependencies = []
+[tool.setuptools.packages.find]
+where = ["src"]

zehramsa-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

zehramsa-1.0.0/src/zehramsa/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+__version__ = "1.0.0"
+__author__ = "Zehra"
+from .align import align
+from .result import MSAResult, PairwiseResult
+from .scoring import SimpleScoring, DEFAULT_SCORING
+from .needleman_wunsch import needleman_wunsch, needleman_wunsch_score
+from .center_star import center_star_align
+__all__ = [
+    "align",
+    "MSAResult",
+    "PairwiseResult",
+    "SimpleScoring",
+    "DEFAULT_SCORING",
+    "needleman_wunsch",
+    "needleman_wunsch_score",
+    "center_star_align",
+]

zehramsa-1.0.0/src/zehramsa/align.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+from .needleman_wunsch import needleman_wunsch
+from .center_star import center_star_align
+from .result import MSAResult, PairwiseResult
+from .scoring import DEFAULT_SCORING
+def align(
+    sequences: list[str],
+    *,
+    scoring=None,
+    verbose: bool = False,
+) -> MSAResult:
+    if not sequences:
+        raise ValueError("sequences list is empty — provide at least 2 sequences.")
+    if len(sequences) < 2:
+        raise ValueError("At least 2 sequences are required for alignment.")
+    for i, s in enumerate(sequences):
+        if not isinstance(s, str):
+            raise TypeError(
+                f"sequences[{i}] must be a string, got {type(s).__name__!r}"
+            )
+    if scoring is None:
+        scoring = DEFAULT_SCORING
+    names = [f'Seq_{i}' for i in range(len(sequences))]
+    seqs  = [s.upper() for s in sequences]
+    n = len(seqs)
+    if verbose:
+        print(f"[zehramsa] Aligning {n} sequences: {names}")
+    pw_results: list[PairwiseResult] = []
+    score_matrix: list[list[float]] = [[0.0] * n for _ in range(n)]
+    for i in range(n):
+        for j in range(i + 1, n):
+            pw = needleman_wunsch(
+                seqs[i], seqs[j], scoring,
+                seq1_name=names[i], seq2_name=names[j],
+            )
+            pw_results.append(pw)
+            score_matrix[i][j] = pw.score
+            score_matrix[j][i] = pw.score
+            if verbose:
+                print(f"  NW {names[i]} vs {names[j]}: score={pw.score:.2f}")
+    sequences_aligned, center_name = center_star_align(
+        seqs, names, scoring,
+        score_matrix=score_matrix,
+        pairwise_results=pw_results,
+    )
+    if verbose:
+        print(f"  Center sequence: {center_name}")
+    aligned_seqs = list(sequences_aligned.values())
+    length = len(aligned_seqs[0]) if aligned_seqs else 0
+    sp_score = 0.0
+    for i in range(n):
+        for j in range(i + 1, n):
+            for col in range(length):
+                sp_score += scoring.score(aligned_seqs[i][col], aligned_seqs[j][col])
+    if verbose:
+        print(f"  SP-score: {sp_score:.2f}")
+    return MSAResult(
+        sequences=sequences_aligned,
+        score=sp_score,
+        center_sequence=center_name,
+        pairwise_results=pw_results,
+    )

zehramsa-1.0.0/src/zehramsa/center_star.py ADDED Viewed

@@ -0,0 +1,131 @@
+from __future__ import annotations
+from .needleman_wunsch import needleman_wunsch
+def center_star_align(
+    seqs: list[str],
+    names: list[str],
+    scoring,
+    *,
+    score_matrix: list[list[float]] | None = None,
+    pairwise_results=None,
+) -> tuple[dict[str, str], str]:
+    n = len(seqs)
+    _cached_pw: dict[tuple[int, int], object] = {}
+    if score_matrix is None:
+        score_matrix = [[0.0] * n for _ in range(n)]
+        for i in range(n):
+            for j in range(i + 1, n):
+                pw = needleman_wunsch(
+                    seqs[i], seqs[j], scoring,
+                    seq1_name=names[i], seq2_name=names[j],
+                )
+                score_matrix[i][j] = pw.score
+                score_matrix[j][i] = pw.score
+                _cached_pw[(i, j)] = pw
+    center_idx = max(range(n), key=lambda i: sum(score_matrix[i]))
+    center_name = names[center_idx]
+    pairwise_alignments: dict[int, tuple[str, str]] = {}
+    for k in range(n):
+        if k == center_idx:
+            continue
+        if pairwise_results is not None:
+            for pw in pairwise_results:
+                if pw.seq1_name == center_name and pw.seq2_name == names[k]:
+                    pairwise_alignments[k] = (pw.aligned_seq1, pw.aligned_seq2)
+                    break
+                if pw.seq2_name == center_name and pw.seq1_name == names[k]:
+                    pairwise_alignments[k] = (pw.aligned_seq2, pw.aligned_seq1)
+                    break
+            if k not in pairwise_alignments:
+                raise ValueError(
+                    f"No pairwise alignment found for '{names[k]}' against center '{center_name}'. "
+                    "This is likely a name mismatch in pairwise_results."
+                )
+        else:
+            i_lo = min(center_idx, k)
+            i_hi = max(center_idx, k)
+            if (i_lo, i_hi) in _cached_pw:
+                pw = _cached_pw[(i_lo, i_hi)]
+                if center_idx == i_lo:
+                    pairwise_alignments[k] = (pw.aligned_seq1, pw.aligned_seq2)
+                else:
+                    pairwise_alignments[k] = (pw.aligned_seq2, pw.aligned_seq1)
+            else:
+                pw = needleman_wunsch(
+                    seqs[center_idx], seqs[k], scoring,
+                    seq1_name=center_name, seq2_name=names[k],
+                )
+                pairwise_alignments[k] = (pw.aligned_seq1, pw.aligned_seq2)
+    if n == 1:
+        return {names[0]: seqs[0]}, names[0]
+    col_lists: dict[int, list[tuple[str, str]]] = {
+        k: list(zip(ca, oa))
+        for k, (ca, oa) in pairwise_alignments.items()
+    }
+    other_indices = [k for k in range(n) if k != center_idx]
+    ptrs: dict[int, int] = {k: 0 for k in other_indices}
+    out_center: list[str] = []
+    out_others: dict[int, list[str]] = {k: [] for k in other_indices}
+    lengths = {k: len(col_lists[k]) for k in other_indices}
+    def leading_center_gaps(k: int) -> int:
+        count = 0
+        p = ptrs[k]
+        while p < lengths[k] and col_lists[k][p][0] == '-':
+            count += 1
+            p += 1
+        return count
+    while True:
+        ins_counts = {k: leading_center_gaps(k) for k in other_indices}
+        max_ins = max(ins_counts.values()) if ins_counts else 0
+        ins_ptrs = {k: 0 for k in other_indices}
+        for _ in range(max_ins):
+            out_center.append('-')
+            for k in other_indices:
+                if ins_ptrs[k] < ins_counts[k]:
+                    _, oc = col_lists[k][ptrs[k]]
+                    out_others[k].append(oc)
+                    ptrs[k] += 1
+                    ins_ptrs[k] += 1
+                else:
+                    out_others[k].append('-')
+        if all(ptrs[k] >= lengths[k] for k in other_indices):
+            break
+        center_chars = set()
+        for k in other_indices:
+            if ptrs[k] < lengths[k]:
+                center_chars.add(col_lists[k][ptrs[k]][0])
+        assert len(center_chars) == 1, (
+            f"Expected exactly one center character, got {center_chars!r}"
+        )
+        c_char = next(iter(center_chars))
+        out_center.append(c_char)
+        for k in other_indices:
+            if ptrs[k] < lengths[k] and col_lists[k][ptrs[k]][0] == c_char:
+                _, oc = col_lists[k][ptrs[k]]
+                out_others[k].append(oc)
+                ptrs[k] += 1
+            else:
+                out_others[k].append('-')
+    center_aligned = ''.join(out_center)
+    result: dict[str, str] = {names[center_idx]: center_aligned}
+    for k in other_indices:
+        result[names[k]] = ''.join(out_others[k])
+    return result, center_name

zehramsa-1.0.0/src/zehramsa/needleman_wunsch.py ADDED Viewed

@@ -0,0 +1,84 @@
+from __future__ import annotations
+from .result import PairwiseResult
+from .scoring import DEFAULT_SCORING
+_DIAG = 0
+_UP = 1
+_LEFT = 2
+def needleman_wunsch(
+    seq1: str,
+    seq2: str,
+    scoring=None,
+    *,
+    seq1_name: str = 'Seq1',
+    seq2_name: str = 'Seq2',
+) -> PairwiseResult:
+    if scoring is None:
+        scoring = DEFAULT_SCORING
+    gap = scoring.gap
+    n = len(seq1)
+    m = len(seq2)
+    F: list[list[float]] = [[0.0] * (m + 1) for _ in range(n + 1)]
+    D: list[list[int]] = [[_DIAG] * (m + 1) for _ in range(n + 1)]
+    for i in range(1, n + 1):
+        F[i][0] = i * gap
+        D[i][0] = _UP
+    for j in range(1, m + 1):
+        F[0][j] = j * gap
+        D[0][j] = _LEFT
+    for i in range(1, n + 1):
+        for j in range(1, m + 1):
+            diag = F[i - 1][j - 1] + scoring.score(seq1[i - 1], seq2[j - 1])
+            up   = F[i - 1][j]     + gap
+            left = F[i][j - 1]     + gap
+            if diag >= up and diag >= left:
+                F[i][j] = diag
+                D[i][j] = _DIAG
+            elif up >= left:
+                F[i][j] = up
+                D[i][j] = _UP
+            else:
+                F[i][j] = left
+                D[i][j] = _LEFT
+    aligned1: list[str] = []
+    aligned2: list[str] = []
+    i, j = n, m
+    while i > 0 or j > 0:
+        d = D[i][j]
+        if d == _DIAG:
+            aligned1.append(seq1[i - 1])
+            aligned2.append(seq2[j - 1])
+            i -= 1
+            j -= 1
+        elif d == _UP:
+            aligned1.append(seq1[i - 1])
+            aligned2.append('-')
+            i -= 1
+        else:
+            aligned1.append('-')
+            aligned2.append(seq2[j - 1])
+            j -= 1
+    aligned1.reverse()
+    aligned2.reverse()
+    return PairwiseResult(
+        seq1_name=seq1_name,
+        seq2_name=seq2_name,
+        aligned_seq1=''.join(aligned1),
+        aligned_seq2=''.join(aligned2),
+        score=F[n][m],
+    )
+def needleman_wunsch_score(seq1: str, seq2: str, scoring=None) -> float:
+    return needleman_wunsch(seq1, seq2, scoring).score

zehramsa-1.0.0/src/zehramsa/result.py ADDED Viewed

@@ -0,0 +1,64 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass
+class PairwiseResult:
+    seq1_name: str
+    seq2_name: str
+    aligned_seq1: str
+    aligned_seq2: str
+    score: float
+    @property
+    def alignment_length(self) -> int:
+        return len(self.aligned_seq1)
+    @property
+    def identity(self) -> float:
+        """Identity = matches / alignment_length (gaps included in denominator)."""
+        if self.alignment_length == 0:
+            return 0.0
+        matches = sum(
+            1 for a, b in zip(self.aligned_seq1, self.aligned_seq2) if a == b and a != '-'
+        )
+        return matches / self.alignment_length
+    def __str__(self) -> str:
+        return (
+            f"{self.seq1_name}: {self.aligned_seq1}\n"
+            f"{self.seq2_name}: {self.aligned_seq2}\n"
+            f"Score: {self.score}  Identity: {self.identity:.1%}"
+        )
+@dataclass
+class MSAResult:
+    sequences: dict[str, str]
+    score: float
+    center_sequence: str
+    pairwise_results: list[PairwiseResult]
+    column_count: int = field(init=False)
+    def __post_init__(self) -> None:
+        lengths = {len(s) for s in self.sequences.values()}
+        if len(lengths) == 1:
+            self.column_count = next(iter(lengths))
+        elif lengths:
+            self.column_count = max(lengths)
+        else:
+            self.column_count = 0
+    @property
+    def names(self) -> list[str]:
+        return list(self.sequences.keys())
+    @property
+    def aligned_sequences(self) -> list[str]:
+        return list(self.sequences.values())
+    def __str__(self) -> str:
+        lines = ["=== MSA Result ===", f"Score: {self.score}"]
+        for name, seq in self.sequences.items():
+            lines.append(f"{name}: {seq}")
+        return "\n".join(lines)

zehramsa-1.0.0/src/zehramsa/scoring.py ADDED Viewed

@@ -0,0 +1,20 @@
+from __future__ import annotations
+class SimpleScoring:
+    def __init__(self, match: float = 1.0, mismatch: float = -1.0, gap: float = -2.0) -> None:
+        self.match = match
+        self.mismatch = mismatch
+        self.gap = gap
+    def score(self, a: str, b: str) -> float:
+        if len(a) != 1 or len(b) != 1:
+            raise ValueError(
+                f"score() expects single characters, got {a!r} and {b!r}"
+            )
+        if a == '-' or b == '-':
+            return self.gap
+        return self.match if a == b else self.mismatch
+DEFAULT_SCORING = SimpleScoring()

zehramsa-1.0.0/src/zehramsa.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: zehramsa
+Version: 1.0.0
+Summary: Multiple Sequence Alignment via Dynamic Programming
+Requires-Python: >=3.9

zehramsa-1.0.0/src/zehramsa.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,11 @@
+pyproject.toml
+src/zehramsa/__init__.py
+src/zehramsa/align.py
+src/zehramsa/center_star.py
+src/zehramsa/needleman_wunsch.py
+src/zehramsa/result.py
+src/zehramsa/scoring.py
+src/zehramsa.egg-info/PKG-INFO
+src/zehramsa.egg-info/SOURCES.txt
+src/zehramsa.egg-info/dependency_links.txt
+src/zehramsa.egg-info/top_level.txt

zehramsa-1.0.0/src/zehramsa.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

zehramsa-1.0.0/src/zehramsa.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ zehramsa