PyPI - uht-tooling - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

uht-tooling 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

uht_tooling/workflows/design_slim.py CHANGED Viewed

@@ -16,6 +16,41 @@ TARGET_TM = 60.0
 MAX_TM = 70.0
 UPSTREAM_15 = 12
+# IUPAC ambiguity codes mapping
+IUPAC_AMBIGUITY = {
+    'A': ['A'], 'C': ['C'], 'G': ['G'], 'T': ['T'],
+    'R': ['A', 'G'],      # puRine
+    'Y': ['C', 'T'],      # pYrimidine
+    'S': ['G', 'C'],      # Strong
+    'W': ['A', 'T'],      # Weak
+    'K': ['G', 'T'],      # Keto
+    'M': ['A', 'C'],      # aMino
+    'B': ['C', 'G', 'T'], # not A
+    'D': ['A', 'G', 'T'], # not C
+    'H': ['A', 'C', 'T'], # not G
+    'V': ['A', 'C', 'G'], # not T
+    'N': ['A', 'C', 'G', 'T'],
+}
+VALID_DEGENERATE_BASES = set(IUPAC_AMBIGUITY.keys())
+def is_valid_degenerate_codon(codon: str) -> bool:
+    """Check if a codon contains only valid IUPAC nucleotide codes."""
+    return len(codon) == 3 and all(b.upper() in VALID_DEGENERATE_BASES for b in codon)
+def contains_degenerate_bases(seq: str) -> bool:
+    """Return True if sequence contains non-standard (degenerate) bases."""
+    return any(b.upper() not in {'A', 'C', 'G', 'T'} for b in seq)
+def expand_degenerate_sequence(seq: str) -> list[str]:
+    """Expand a degenerate sequence to all possible standard sequences."""
+    from itertools import product
+    possibilities = [IUPAC_AMBIGUITY.get(b.upper(), [b]) for b in seq]
+    return [''.join(combo) for combo in product(*possibilities)]
 def codon_table():
     return {
@@ -103,7 +138,12 @@ def pick_mutant_codon(wt_codon, target_aa):
     return best_list[0][0]
-def calc_tm(seq):
+def calc_tm(seq: str) -> float:
+    """Calculate Tm, using average across expansions for degenerate sequences."""
+    if contains_degenerate_bases(seq):
+        expanded = expand_degenerate_sequence(seq)
+        tms = [mt.Tm_NN(s) for s in expanded]
+        return sum(tms) / len(tms)
     return mt.Tm_NN(seq)
@@ -266,6 +306,7 @@ def run_design_slim(
                     m_indel = re.match(r"^([A-Z])(\d+)InDel([A-Z])(\d+)([A-Z]+)$", m)
                     m_sub = re.match(r"^([A-Z])(\d+)([A-Z])$", m)
                     m_ins = re.match(r"^([A-Z])(\d+)([A-Z]{2,})$", m)
+                    m_lib = re.match(r"^([A-Z])(\d+):([A-Za-z]{3})$", m)
                     if m_del:
                         wt_aa, pos1 = m_del.group(1), int(m_del.group(2))
@@ -328,6 +369,39 @@ def run_design_slim(
                         if not new_seq:
                             logger.error("No minimal-change codon for %s->%s", wt_aa, mut_aa)
                             raise ValueError(f"No minimal-change codon for {wt_aa}->{mut_aa}")
+                    elif m_lib:
+                        wt_aa, pos_str, degenerate_codon = m_lib.groups()
+                        pos = int(pos_str)
+                        degenerate_codon = degenerate_codon.upper()
+                        # Validate the degenerate codon
+                        if not is_valid_degenerate_codon(degenerate_codon):
+                            raise ValueError(f"Invalid degenerate codon: {degenerate_codon}")
+                        region_start = gene_offset + (pos - 1) * 3
+                        old_len = 3
+                        # Validate WT amino acid (same as substitution validation)
+                        wt_codon = full_seq[region_start : region_start + 3]
+                        translated = translate_codon(wt_codon)
+                        if translated != wt_aa:
+                            logger.error(
+                                "Expected %s but found %s at codon %s for mutation %s",
+                                wt_aa, translated, wt_codon, mutation,
+                            )
+                            raise ValueError(
+                                f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
+                            )
+                        new_seq = degenerate_codon
+                        # Log library coverage info
+                        expanded_codons = expand_degenerate_sequence(degenerate_codon)
+                        unique_aas = set(translate_codon(c) for c in expanded_codons if translate_codon(c) != '?')
+                        logger.info(
+                            "Library mutation %s: %d possible codons, %d amino acids",
+                            mutation, len(expanded_codons), len(unique_aas)
+                        )
                     else:
                         logger.error("Unknown mutation format: %s", mutation)
                         raise ValueError(f"Unknown mutation format: {mutation}")

uht_tooling/workflows/gui.py CHANGED Viewed

@@ -566,8 +566,8 @@ def create_gui() -> gr.Blocks:
         with gr.Tab("Nextera XT"):  # --- Nextera ---
             gr.Markdown(
                 textwrap.dedent(
-                    """
-                    ### Illumina-Compatible Primer Design
+                """
+                ### Illumina-Compatible Primer Design
                     Generates Nextera XT-ready primers from forward/reverse binding regions. The workflow preloads 12 i5 and 12 i7 indices (144 combinations) and mirrors the “One-PCR-to-flowcell” process described in the README.
                     **Inputs**
@@ -577,7 +577,7 @@ def create_gui() -> gr.Blocks:
                     **Outputs**
                     - CSV with i5/i7 indices, primer sequences, and ordering-ready metadata.
                     - Run log noting index selection and any validation warnings.
-                    """
+                """
                 )
             )
             forward = gr.Textbox(label="Forward primer (5'→3')")
@@ -599,13 +599,13 @@ def create_gui() -> gr.Blocks:
                         - Confirm primer depletion via electrophoresis (e.g., BioAnalyzer) before sequencing prep.
                         """
                     )
-                )
+            )
         with gr.Tab("SLIM"):
             gr.Markdown(
                 textwrap.dedent(
-                    """
-                    ### Sequence-Ligation Independent Mutagenesis
+                """
+                ### Sequence-Ligation Independent Mutagenesis
                     Designs paired short/long primers to introduce targeted mutations by SLIM cloning, matching the workflow outlined in the README.
                     **Inputs**
@@ -616,7 +616,7 @@ def create_gui() -> gr.Blocks:
                     **Outputs**
                     - `SLIM_primers.csv` with primer sequences and annealing temperatures.
                     - Log file capturing primer QC and any design warnings.
-                    """
+                """
                 )
             )
             slim_gene = gr.Textbox(label="Gene sequence", lines=4)
@@ -640,13 +640,13 @@ def create_gui() -> gr.Blocks:
                         4. Transform directly into NEB 5-alpha or BL21 (DE3); the method scales to dozens of mutants simultaneously.
                         """
                     )
-                )
+            )
         with gr.Tab("Gibson"):
             gr.Markdown(
                 textwrap.dedent(
-                    """
-                    ### Gibson Assembly Primer Design
+                """
+                ### Gibson Assembly Primer Design
                     Plans primer sets and assembly steps for Gibson mutagenesis, supporting multi-mutation constructs using the `+` syntax (e.g. `A123G+T150A`).
                     **Inputs**
@@ -658,7 +658,7 @@ def create_gui() -> gr.Blocks:
                     - Primer CSV with overlap sequences and melting temperatures.
                     - Assembly plan CSV detailing fragment combinations.
                     - Log summarising design decisions and any warnings about overlapping regions.
-                    """
+                """
                 )
             )
             gibson_gene = gr.Textbox(label="Gene sequence", lines=4)
@@ -681,13 +681,13 @@ def create_gui() -> gr.Blocks:
                         - When replacing entire codons (e.g. `L46GP`), ensure the plasmid context covers both flanks to maintain overlap.
                         """
                     )
-                )
+            )
         with gr.Tab("Mutation Caller"):
             gr.Markdown(
                 textwrap.dedent(
-                    """
-                    ### Long-read Mutation Analysis
+                """
+                ### Long-read Mutation Analysis
                     Extracts coding regions bounded by user-defined flanks, aligns them to the template, and reports amino-acid substitutions alongside co-occurrence summaries.
                     **Required inputs**
@@ -695,8 +695,8 @@ def create_gui() -> gr.Blocks:
                     - Template FASTA: coding sequence used as the reference for alignment.
                     - Flank sequences: short 8–12 bp motifs immediately upstream and downstream of the gene.
                     - Gene length bounds: acceptable size window (in nucleotides) for the extracted gene segment.
-                    """
-                )
+                """
+            )
             )
             with gr.Row():
                 mc_fastq = gr.File(
@@ -753,12 +753,12 @@ def create_gui() -> gr.Blocks:
                         - Outputs mirror the CLI version: per-sample directories with CSV summaries, JSON co-occurrence graphs, QC plots, and a detailed `run.log`.
                         """
                     )
-                )
+            )
         with gr.Tab("UMI Hunter"):
             gr.Markdown(
                 textwrap.dedent(
-                    """
+                """
                     ### UMI–Gene Pair Clustering
                     Detects UMI barcodes, extracts paired gene inserts, clusters reads by UMI identity, and emits consensus sequences with abundance tables.
@@ -768,8 +768,8 @@ def create_gui() -> gr.Blocks:
                     - UMI and gene flank sequences marking the barcode and insert boundaries.
                     - UMI length bounds plus clustering thresholds.
                     - Minimum reads per cluster to keep (clusters below the threshold are reported but no consensus is generated).
-                    """
-                )
+                """
+            )
             )
             with gr.Row():
                 umi_fastq = gr.File(
@@ -862,19 +862,19 @@ def create_gui() -> gr.Blocks:
                         - Outputs include per-sample summaries, consensus FASTA files, cluster membership tables, QC plots, and logs mirroring the CLI workflow.
                         """
                     )
-                )
+            )
         with gr.Tab("Profile Inserts"):
             gr.Markdown(
                 textwrap.dedent(
-                    """
+                """
                     ### Probe-Guided Insert Profiling
                     Characterises inserts demarcated by user-supplied upstream/downstream probes, extracts sequences, and produces QC plots plus summary tables.
                     **Required inputs**
                     - FASTQ reads containing the inserts of interest.
                     - One or more probe pairs: 5'→3' sequences for the upstream and downstream anchors (reverse complements are matched automatically).
-                    """
+                """
                 )
             )
             probes_table = gr.Dataframe(
@@ -916,13 +916,13 @@ def create_gui() -> gr.Blocks:
                         - Logs are stored alongside the results so runs remain fully reproducible.
                         """
                     )
-                )
+            )
         with gr.Tab("EP Library Profile"):
             gr.Markdown(
                 textwrap.dedent(
-                    """
-                    ### Library Profiling Without UMIs
+                """
+                ### Library Profiling Without UMIs
                     Estimates background and target mutation rates for enzyme evolution libraries without UMI barcodes.
                     **Inputs**
@@ -934,7 +934,7 @@ def create_gui() -> gr.Blocks:
                     - Per-sample directories with coverage tables, mutation rate statistics, and QC plots.
                     - `master_summary.txt` aggregating condition-level metrics.
                     - Verbose logs recording alignment commands and rate calculations.
-                    """
+                """
                 )
             )
             ep_fastq = gr.File(
@@ -963,7 +963,7 @@ def create_gui() -> gr.Blocks:
                         - Download the archive to inspect per-sample plots, TSV summaries, the consensus summary, and logs for troubleshooting.
                         """
                     )
-                )
+            )
         gr.Markdown(
             textwrap.dedent(

uht_tooling/workflows/mut_rate.py CHANGED Viewed

@@ -539,7 +539,7 @@ def run_qc_analysis(fastq_path, results_dir, ref_hit_fasta, plasmid_fasta):
             f.write(
                 "Q-score\tMean_AA\tStd_AA\tCI_Lower\tCI_Upper\tMappable_Bases\tSegments\n"
             )
-            for result in qc_results:
+                for result in qc_results:
                 f.write(
                     f"{result['quality_threshold']}\t"
                     f"{result['mean_aa_mutations']:.6f}\t"
@@ -566,10 +566,10 @@ def compute_consensus_aa_mutation(
 ) -> Tuple[Optional[dict], List[dict]]:
     """
     Derive a consensus amino-acid mutation estimate across Q-score thresholds.
     Each threshold must meet a minimum coverage requirement. The consensus is a
     precision-weighted average (weights = 1 / std_aa_mutations).
     Returns:
         consensus_info (dict or None)
             {
@@ -648,7 +648,7 @@ def compute_consensus_aa_mutation(
         consensus_std,
         thresholds,
     )
     return consensus_info, valid_results
 def create_simple_qc_plots(quality_thresholds, qc_results, results_dir, consensus_info=None):
@@ -2170,12 +2170,12 @@ def run_main_analysis_for_qscore(fastq_path, qscore, qscore_desc, sample_name, w
             color="gray",
             transform=ax3.transAxes,
         )
-    ax3.set_title("AA Mutation Distribution", fontsize=14, fontweight='bold')
-    ax3.set_xlabel("Number of AA Mutations", fontsize=12)
-    ax3.set_ylabel("Density", fontsize=12)
-    ax3.spines['top'].set_visible(False)
-    ax3.spines['right'].set_visible(False)
+        ax3.set_title("AA Mutation Distribution", fontsize=14, fontweight='bold')
+        ax3.set_xlabel("Number of AA Mutations", fontsize=12)
+        ax3.set_ylabel("Density", fontsize=12)
+        ax3.spines['top'].set_visible(False)
+        ax3.spines['right'].set_visible(False)
     # Save the combined figure as both PNG and PDF
     panel_path_png = os.path.join(qscore_results_dir, "summary_panels.png")
@@ -2380,7 +2380,7 @@ def run_ep_library_profile(
     output_dir.mkdir(parents=True, exist_ok=True)
     work_dir.mkdir(parents=True, exist_ok=True)
-    master_summary_path = output_dir / "master_summary.txt"
+        master_summary_path = output_dir / "master_summary.txt"
     header = "\t".join(
         [
             "Sample",

{uht_tooling-0.1.7.dist-info → uht_tooling-0.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: uht-tooling
-Version: 0.1.7
+Version: 0.1.8
 Summary: Tooling for ultra-high throughput screening workflows.
 Author: Matt115A
 License: MIT
@@ -141,6 +141,46 @@ Mutation nomenclature examples:
 - `T241Del` (deletion)
 - `T241TS` (insert Ser after Thr241)
 - `L46GP` (replace Leu46 with Gly-Pro)
+- `A123:NNK` (library mutation with degenerate codon)
+#### Library mutations with degenerate codons
+For saturation mutagenesis and library generation, SLIM supports degenerate (IUPAC ambiguity) codons using the format `<WT_AA><position>:<codon>`. The codon must be exactly 3 characters using valid IUPAC nucleotide codes:
+| Code | Bases | Mnemonic |
+|------|-------|----------|
+| A, C, G, T | Single base | Standard |
+| R | A, G | puRine |
+| Y | C, T | pYrimidine |
+| S | G, C | Strong |
+| W | A, T | Weak |
+| K | G, T | Keto |
+| M | A, C | aMino |
+| B | C, G, T | not A |
+| D | A, G, T | not C |
+| H | A, C, T | not G |
+| V | A, C, G | not T |
+| N | A, C, G, T | aNy |
+Common degenerate codon schemes for library construction:
+| Scheme | Codons | Amino acids | Stop codons | Notes |
+|--------|--------|-------------|-------------|-------|
+| NNK | 32 | 20 | 1 (TAG) | Reduced stop codon frequency |
+| NNS | 32 | 20 | 1 (TAG) | Equivalent to NNK |
+| NNN | 64 | 20 | 3 | All codons, higher stop frequency |
+| NDT | 12 | 12 | 0 | F, L, I, V, Y, H, N, D, C, R, S, G only |
+Example CSV with mixed mutation types:
+```csv
+mutations
+A123G
+T50:NNK
+S100:NNS
+T241Del
+```
+The workflow validates that the wild-type amino acid matches the template sequence and logs library coverage information (number of possible codons and amino acids) for each degenerate mutation. Primers are generated with the degenerate bases embedded; reverse primers contain the correct IUPAC reverse complements (e.g., K↔M, R↔Y, S↔S).
 #### Experimental blueprint

{uht_tooling-0.1.7.dist-info → uht_tooling-0.1.8.dist-info}/RECORD RENAMED Viewed

@@ -3,15 +3,15 @@ uht_tooling/cli.py,sha256=XnpJbMiuB3g5GL-d2bLf4TsDsd9eWDG-tjaAaMnAPTk,13008
 uht_tooling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 uht_tooling/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 uht_tooling/workflows/design_gibson.py,sha256=SQEThq6dxPMPCsUrwqMUaG5I-diE9jUXPRii9Y7O_7U,13617
-uht_tooling/workflows/design_slim.py,sha256=Qeh8N32kmVFZvohmTlBudJsLzOqLy4XcY3aXbkP-sFQ,14421
-uht_tooling/workflows/gui.py,sha256=P4FdZWsS0NLX5VmOZZ-WO-biVEhbfa6M1gY6DFcgR7k,43153
-uht_tooling/workflows/mut_rate.py,sha256=j8QzYe9QrT_yyhSYUbH3MHyvUp61U_h0w1bEd8b3aFI,109038
+uht_tooling/workflows/design_slim.py,sha256=wGXnmaJCzlAZTjf2SRupwt_3MBl5cgZr1O9nnMQyoGo,17767
+uht_tooling/workflows/gui.py,sha256=2TctLdsoqA9sx37erWWkUGjnQerPl1tPf2ShEfdL76k,43041
+uht_tooling/workflows/mut_rate.py,sha256=jyqZbUE7617jF_gOF4m7gX-Rgc6-WV4fWS9oVxhnAUU,109082
 uht_tooling/workflows/mutation_caller.py,sha256=BczuNATOSUcmlw-x6qTzEQfW8MBbvGclEyqiQiBX0cg,16222
 uht_tooling/workflows/nextera_designer.py,sha256=8MZ_DyQ0JwPojXH5mZ6bAGAkqki_0qQGac45T_Ll8FQ,6170
 uht_tooling/workflows/profile_inserts.py,sha256=C-SZ10YefiV_4QZbo1oEkI4qYipwaYqPP5jF-MC5O58,16947
 uht_tooling/workflows/umi_hunter.py,sha256=baycWycqVzUfMp5u2WZdHRl0sNuykTjy-iqtj5ahucU,15075
-uht_tooling-0.1.7.dist-info/METADATA,sha256=YuHkyuvRdznGgVH111anZaqsOBt9k-szz1vJGF-eWy0,12925
-uht_tooling-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-uht_tooling-0.1.7.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
-uht_tooling-0.1.7.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
-uht_tooling-0.1.7.dist-info/RECORD,,
+uht_tooling-0.1.8.dist-info/METADATA,sha256=dQ8u8XSyBvbujsLyWIKAZqcDxqIkYb8BU1fFddAjxDs,14436
+uht_tooling-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+uht_tooling-0.1.8.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
+uht_tooling-0.1.8.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
+uht_tooling-0.1.8.dist-info/RECORD,,

{uht_tooling-0.1.7.dist-info → uht_tooling-0.1.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{uht_tooling-0.1.7.dist-info → uht_tooling-0.1.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{uht_tooling-0.1.7.dist-info → uht_tooling-0.1.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

uht-tooling 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

uht-tooling 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl