uht-tooling 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,41 @@ TARGET_TM = 60.0
16
16
  MAX_TM = 70.0
17
17
  UPSTREAM_15 = 12
18
18
 
19
+ # IUPAC ambiguity codes mapping
20
+ IUPAC_AMBIGUITY = {
21
+ 'A': ['A'], 'C': ['C'], 'G': ['G'], 'T': ['T'],
22
+ 'R': ['A', 'G'], # puRine
23
+ 'Y': ['C', 'T'], # pYrimidine
24
+ 'S': ['G', 'C'], # Strong
25
+ 'W': ['A', 'T'], # Weak
26
+ 'K': ['G', 'T'], # Keto
27
+ 'M': ['A', 'C'], # aMino
28
+ 'B': ['C', 'G', 'T'], # not A
29
+ 'D': ['A', 'G', 'T'], # not C
30
+ 'H': ['A', 'C', 'T'], # not G
31
+ 'V': ['A', 'C', 'G'], # not T
32
+ 'N': ['A', 'C', 'G', 'T'],
33
+ }
34
+
35
+ VALID_DEGENERATE_BASES = set(IUPAC_AMBIGUITY.keys())
36
+
37
+
38
+ def is_valid_degenerate_codon(codon: str) -> bool:
39
+ """Check if a codon contains only valid IUPAC nucleotide codes."""
40
+ return len(codon) == 3 and all(b.upper() in VALID_DEGENERATE_BASES for b in codon)
41
+
42
+
43
+ def contains_degenerate_bases(seq: str) -> bool:
44
+ """Return True if sequence contains non-standard (degenerate) bases."""
45
+ return any(b.upper() not in {'A', 'C', 'G', 'T'} for b in seq)
46
+
47
+
48
+ def expand_degenerate_sequence(seq: str) -> list[str]:
49
+ """Expand a degenerate sequence to all possible standard sequences."""
50
+ from itertools import product
51
+ possibilities = [IUPAC_AMBIGUITY.get(b.upper(), [b]) for b in seq]
52
+ return [''.join(combo) for combo in product(*possibilities)]
53
+
19
54
 
20
55
  def codon_table():
21
56
  return {
@@ -103,7 +138,12 @@ def pick_mutant_codon(wt_codon, target_aa):
103
138
  return best_list[0][0]
104
139
 
105
140
 
106
- def calc_tm(seq):
141
+ def calc_tm(seq: str) -> float:
142
+ """Calculate Tm, using average across expansions for degenerate sequences."""
143
+ if contains_degenerate_bases(seq):
144
+ expanded = expand_degenerate_sequence(seq)
145
+ tms = [mt.Tm_NN(s) for s in expanded]
146
+ return sum(tms) / len(tms)
107
147
  return mt.Tm_NN(seq)
108
148
 
109
149
 
@@ -266,6 +306,7 @@ def run_design_slim(
266
306
  m_indel = re.match(r"^([A-Z])(\d+)InDel([A-Z])(\d+)([A-Z]+)$", m)
267
307
  m_sub = re.match(r"^([A-Z])(\d+)([A-Z])$", m)
268
308
  m_ins = re.match(r"^([A-Z])(\d+)([A-Z]{2,})$", m)
309
+ m_lib = re.match(r"^([A-Z])(\d+):([A-Za-z]{3})$", m)
269
310
 
270
311
  if m_del:
271
312
  wt_aa, pos1 = m_del.group(1), int(m_del.group(2))
@@ -328,6 +369,39 @@ def run_design_slim(
328
369
  if not new_seq:
329
370
  logger.error("No minimal-change codon for %s->%s", wt_aa, mut_aa)
330
371
  raise ValueError(f"No minimal-change codon for {wt_aa}->{mut_aa}")
372
+ elif m_lib:
373
+ wt_aa, pos_str, degenerate_codon = m_lib.groups()
374
+ pos = int(pos_str)
375
+ degenerate_codon = degenerate_codon.upper()
376
+
377
+ # Validate the degenerate codon
378
+ if not is_valid_degenerate_codon(degenerate_codon):
379
+ raise ValueError(f"Invalid degenerate codon: {degenerate_codon}")
380
+
381
+ region_start = gene_offset + (pos - 1) * 3
382
+ old_len = 3
383
+
384
+ # Validate WT amino acid (same as substitution validation)
385
+ wt_codon = full_seq[region_start : region_start + 3]
386
+ translated = translate_codon(wt_codon)
387
+ if translated != wt_aa:
388
+ logger.error(
389
+ "Expected %s but found %s at codon %s for mutation %s",
390
+ wt_aa, translated, wt_codon, mutation,
391
+ )
392
+ raise ValueError(
393
+ f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
394
+ )
395
+
396
+ new_seq = degenerate_codon
397
+
398
+ # Log library coverage info
399
+ expanded_codons = expand_degenerate_sequence(degenerate_codon)
400
+ unique_aas = set(translate_codon(c) for c in expanded_codons if translate_codon(c) != '?')
401
+ logger.info(
402
+ "Library mutation %s: %d possible codons, %d amino acids",
403
+ mutation, len(expanded_codons), len(unique_aas)
404
+ )
331
405
  else:
332
406
  logger.error("Unknown mutation format: %s", mutation)
333
407
  raise ValueError(f"Unknown mutation format: {mutation}")
@@ -566,8 +566,8 @@ def create_gui() -> gr.Blocks:
566
566
  with gr.Tab("Nextera XT"): # --- Nextera ---
567
567
  gr.Markdown(
568
568
  textwrap.dedent(
569
- """
570
- ### Illumina-Compatible Primer Design
569
+ """
570
+ ### Illumina-Compatible Primer Design
571
571
  Generates Nextera XT-ready primers from forward/reverse binding regions. The workflow preloads 12 i5 and 12 i7 indices (144 combinations) and mirrors the “One-PCR-to-flowcell” process described in the README.
572
572
 
573
573
  **Inputs**
@@ -577,7 +577,7 @@ def create_gui() -> gr.Blocks:
577
577
  **Outputs**
578
578
  - CSV with i5/i7 indices, primer sequences, and ordering-ready metadata.
579
579
  - Run log noting index selection and any validation warnings.
580
- """
580
+ """
581
581
  )
582
582
  )
583
583
  forward = gr.Textbox(label="Forward primer (5'→3')")
@@ -599,13 +599,13 @@ def create_gui() -> gr.Blocks:
599
599
  - Confirm primer depletion via electrophoresis (e.g., BioAnalyzer) before sequencing prep.
600
600
  """
601
601
  )
602
- )
602
+ )
603
603
 
604
604
  with gr.Tab("SLIM"):
605
605
  gr.Markdown(
606
606
  textwrap.dedent(
607
- """
608
- ### Sequence-Ligation Independent Mutagenesis
607
+ """
608
+ ### Sequence-Ligation Independent Mutagenesis
609
609
  Designs paired short/long primers to introduce targeted mutations by SLIM cloning, matching the workflow outlined in the README.
610
610
 
611
611
  **Inputs**
@@ -616,7 +616,7 @@ def create_gui() -> gr.Blocks:
616
616
  **Outputs**
617
617
  - `SLIM_primers.csv` with primer sequences and annealing temperatures.
618
618
  - Log file capturing primer QC and any design warnings.
619
- """
619
+ """
620
620
  )
621
621
  )
622
622
  slim_gene = gr.Textbox(label="Gene sequence", lines=4)
@@ -640,13 +640,13 @@ def create_gui() -> gr.Blocks:
640
640
  4. Transform directly into NEB 5-alpha or BL21 (DE3); the method scales to dozens of mutants simultaneously.
641
641
  """
642
642
  )
643
- )
643
+ )
644
644
 
645
645
  with gr.Tab("Gibson"):
646
646
  gr.Markdown(
647
647
  textwrap.dedent(
648
- """
649
- ### Gibson Assembly Primer Design
648
+ """
649
+ ### Gibson Assembly Primer Design
650
650
  Plans primer sets and assembly steps for Gibson mutagenesis, supporting multi-mutation constructs using the `+` syntax (e.g. `A123G+T150A`).
651
651
 
652
652
  **Inputs**
@@ -658,7 +658,7 @@ def create_gui() -> gr.Blocks:
658
658
  - Primer CSV with overlap sequences and melting temperatures.
659
659
  - Assembly plan CSV detailing fragment combinations.
660
660
  - Log summarising design decisions and any warnings about overlapping regions.
661
- """
661
+ """
662
662
  )
663
663
  )
664
664
  gibson_gene = gr.Textbox(label="Gene sequence", lines=4)
@@ -681,13 +681,13 @@ def create_gui() -> gr.Blocks:
681
681
  - When replacing entire codons (e.g. `L46GP`), ensure the plasmid context covers both flanks to maintain overlap.
682
682
  """
683
683
  )
684
- )
684
+ )
685
685
 
686
686
  with gr.Tab("Mutation Caller"):
687
687
  gr.Markdown(
688
688
  textwrap.dedent(
689
- """
690
- ### Long-read Mutation Analysis
689
+ """
690
+ ### Long-read Mutation Analysis
691
691
  Extracts coding regions bounded by user-defined flanks, aligns them to the template, and reports amino-acid substitutions alongside co-occurrence summaries.
692
692
 
693
693
  **Required inputs**
@@ -695,8 +695,8 @@ def create_gui() -> gr.Blocks:
695
695
  - Template FASTA: coding sequence used as the reference for alignment.
696
696
  - Flank sequences: short 8–12 bp motifs immediately upstream and downstream of the gene.
697
697
  - Gene length bounds: acceptable size window (in nucleotides) for the extracted gene segment.
698
- """
699
- )
698
+ """
699
+ )
700
700
  )
701
701
  with gr.Row():
702
702
  mc_fastq = gr.File(
@@ -753,12 +753,12 @@ def create_gui() -> gr.Blocks:
753
753
  - Outputs mirror the CLI version: per-sample directories with CSV summaries, JSON co-occurrence graphs, QC plots, and a detailed `run.log`.
754
754
  """
755
755
  )
756
- )
756
+ )
757
757
 
758
758
  with gr.Tab("UMI Hunter"):
759
759
  gr.Markdown(
760
760
  textwrap.dedent(
761
- """
761
+ """
762
762
  ### UMI–Gene Pair Clustering
763
763
  Detects UMI barcodes, extracts paired gene inserts, clusters reads by UMI identity, and emits consensus sequences with abundance tables.
764
764
 
@@ -768,8 +768,8 @@ def create_gui() -> gr.Blocks:
768
768
  - UMI and gene flank sequences marking the barcode and insert boundaries.
769
769
  - UMI length bounds plus clustering thresholds.
770
770
  - Minimum reads per cluster to keep (clusters below the threshold are reported but no consensus is generated).
771
- """
772
- )
771
+ """
772
+ )
773
773
  )
774
774
  with gr.Row():
775
775
  umi_fastq = gr.File(
@@ -862,19 +862,19 @@ def create_gui() -> gr.Blocks:
862
862
  - Outputs include per-sample summaries, consensus FASTA files, cluster membership tables, QC plots, and logs mirroring the CLI workflow.
863
863
  """
864
864
  )
865
- )
865
+ )
866
866
 
867
867
  with gr.Tab("Profile Inserts"):
868
868
  gr.Markdown(
869
869
  textwrap.dedent(
870
- """
870
+ """
871
871
  ### Probe-Guided Insert Profiling
872
872
  Characterises inserts demarcated by user-supplied upstream/downstream probes, extracts sequences, and produces QC plots plus summary tables.
873
873
 
874
874
  **Required inputs**
875
875
  - FASTQ reads containing the inserts of interest.
876
876
  - One or more probe pairs: 5'→3' sequences for the upstream and downstream anchors (reverse complements are matched automatically).
877
- """
877
+ """
878
878
  )
879
879
  )
880
880
  probes_table = gr.Dataframe(
@@ -916,13 +916,13 @@ def create_gui() -> gr.Blocks:
916
916
  - Logs are stored alongside the results so runs remain fully reproducible.
917
917
  """
918
918
  )
919
- )
919
+ )
920
920
 
921
921
  with gr.Tab("EP Library Profile"):
922
922
  gr.Markdown(
923
923
  textwrap.dedent(
924
- """
925
- ### Library Profiling Without UMIs
924
+ """
925
+ ### Library Profiling Without UMIs
926
926
  Estimates background and target mutation rates for enzyme evolution libraries without UMI barcodes.
927
927
 
928
928
  **Inputs**
@@ -934,7 +934,7 @@ def create_gui() -> gr.Blocks:
934
934
  - Per-sample directories with coverage tables, mutation rate statistics, and QC plots.
935
935
  - `master_summary.txt` aggregating condition-level metrics.
936
936
  - Verbose logs recording alignment commands and rate calculations.
937
- """
937
+ """
938
938
  )
939
939
  )
940
940
  ep_fastq = gr.File(
@@ -963,7 +963,7 @@ def create_gui() -> gr.Blocks:
963
963
  - Download the archive to inspect per-sample plots, TSV summaries, the consensus summary, and logs for troubleshooting.
964
964
  """
965
965
  )
966
- )
966
+ )
967
967
 
968
968
  gr.Markdown(
969
969
  textwrap.dedent(
@@ -539,7 +539,7 @@ def run_qc_analysis(fastq_path, results_dir, ref_hit_fasta, plasmid_fasta):
539
539
  f.write(
540
540
  "Q-score\tMean_AA\tStd_AA\tCI_Lower\tCI_Upper\tMappable_Bases\tSegments\n"
541
541
  )
542
- for result in qc_results:
542
+ for result in qc_results:
543
543
  f.write(
544
544
  f"{result['quality_threshold']}\t"
545
545
  f"{result['mean_aa_mutations']:.6f}\t"
@@ -566,10 +566,10 @@ def compute_consensus_aa_mutation(
566
566
  ) -> Tuple[Optional[dict], List[dict]]:
567
567
  """
568
568
  Derive a consensus amino-acid mutation estimate across Q-score thresholds.
569
-
569
+
570
570
  Each threshold must meet a minimum coverage requirement. The consensus is a
571
571
  precision-weighted average (weights = 1 / std_aa_mutations).
572
-
572
+
573
573
  Returns:
574
574
  consensus_info (dict or None)
575
575
  {
@@ -648,7 +648,7 @@ def compute_consensus_aa_mutation(
648
648
  consensus_std,
649
649
  thresholds,
650
650
  )
651
-
651
+
652
652
  return consensus_info, valid_results
653
653
 
654
654
  def create_simple_qc_plots(quality_thresholds, qc_results, results_dir, consensus_info=None):
@@ -2170,12 +2170,12 @@ def run_main_analysis_for_qscore(fastq_path, qscore, qscore_desc, sample_name, w
2170
2170
  color="gray",
2171
2171
  transform=ax3.transAxes,
2172
2172
  )
2173
-
2174
- ax3.set_title("AA Mutation Distribution", fontsize=14, fontweight='bold')
2175
- ax3.set_xlabel("Number of AA Mutations", fontsize=12)
2176
- ax3.set_ylabel("Density", fontsize=12)
2177
- ax3.spines['top'].set_visible(False)
2178
- ax3.spines['right'].set_visible(False)
2173
+
2174
+ ax3.set_title("AA Mutation Distribution", fontsize=14, fontweight='bold')
2175
+ ax3.set_xlabel("Number of AA Mutations", fontsize=12)
2176
+ ax3.set_ylabel("Density", fontsize=12)
2177
+ ax3.spines['top'].set_visible(False)
2178
+ ax3.spines['right'].set_visible(False)
2179
2179
 
2180
2180
  # Save the combined figure as both PNG and PDF
2181
2181
  panel_path_png = os.path.join(qscore_results_dir, "summary_panels.png")
@@ -2380,7 +2380,7 @@ def run_ep_library_profile(
2380
2380
  output_dir.mkdir(parents=True, exist_ok=True)
2381
2381
  work_dir.mkdir(parents=True, exist_ok=True)
2382
2382
 
2383
- master_summary_path = output_dir / "master_summary.txt"
2383
+ master_summary_path = output_dir / "master_summary.txt"
2384
2384
  header = "\t".join(
2385
2385
  [
2386
2386
  "Sample",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uht-tooling
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Tooling for ultra-high throughput screening workflows.
5
5
  Author: Matt115A
6
6
  License: MIT
@@ -141,6 +141,46 @@ Mutation nomenclature examples:
141
141
  - `T241Del` (deletion)
142
142
  - `T241TS` (insert Ser after Thr241)
143
143
  - `L46GP` (replace Leu46 with Gly-Pro)
144
+ - `A123:NNK` (library mutation with degenerate codon)
145
+
146
+ #### Library mutations with degenerate codons
147
+
148
+ For saturation mutagenesis and library generation, SLIM supports degenerate (IUPAC ambiguity) codons using the format `<WT_AA><position>:<codon>`. The codon must be exactly 3 characters using valid IUPAC nucleotide codes:
149
+
150
+ | Code | Bases | Mnemonic |
151
+ |------|-------|----------|
152
+ | A, C, G, T | Single base | Standard |
153
+ | R | A, G | puRine |
154
+ | Y | C, T | pYrimidine |
155
+ | S | G, C | Strong |
156
+ | W | A, T | Weak |
157
+ | K | G, T | Keto |
158
+ | M | A, C | aMino |
159
+ | B | C, G, T | not A |
160
+ | D | A, G, T | not C |
161
+ | H | A, C, T | not G |
162
+ | V | A, C, G | not T |
163
+ | N | A, C, G, T | aNy |
164
+
165
+ Common degenerate codon schemes for library construction:
166
+
167
+ | Scheme | Codons | Amino acids | Stop codons | Notes |
168
+ |--------|--------|-------------|-------------|-------|
169
+ | NNK | 32 | 20 | 1 (TAG) | Reduced stop codon frequency |
170
+ | NNS | 32 | 20 | 1 (TAG) | Equivalent to NNK |
171
+ | NNN | 64 | 20 | 3 | All codons, higher stop frequency |
172
+ | NDT | 12 | 12 | 0 | F, L, I, V, Y, H, N, D, C, R, S, G only |
173
+
174
+ Example CSV with mixed mutation types:
175
+ ```csv
176
+ mutations
177
+ A123G
178
+ T50:NNK
179
+ S100:NNS
180
+ T241Del
181
+ ```
182
+
183
+ The workflow validates that the wild-type amino acid matches the template sequence and logs library coverage information (number of possible codons and amino acids) for each degenerate mutation. Primers are generated with the degenerate bases embedded; reverse primers contain the correct IUPAC reverse complements (e.g., K↔M, R↔Y, S↔S).
144
184
 
145
185
  #### Experimental blueprint
146
186
 
@@ -3,15 +3,15 @@ uht_tooling/cli.py,sha256=XnpJbMiuB3g5GL-d2bLf4TsDsd9eWDG-tjaAaMnAPTk,13008
3
3
  uht_tooling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  uht_tooling/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  uht_tooling/workflows/design_gibson.py,sha256=SQEThq6dxPMPCsUrwqMUaG5I-diE9jUXPRii9Y7O_7U,13617
6
- uht_tooling/workflows/design_slim.py,sha256=Qeh8N32kmVFZvohmTlBudJsLzOqLy4XcY3aXbkP-sFQ,14421
7
- uht_tooling/workflows/gui.py,sha256=P4FdZWsS0NLX5VmOZZ-WO-biVEhbfa6M1gY6DFcgR7k,43153
8
- uht_tooling/workflows/mut_rate.py,sha256=j8QzYe9QrT_yyhSYUbH3MHyvUp61U_h0w1bEd8b3aFI,109038
6
+ uht_tooling/workflows/design_slim.py,sha256=wGXnmaJCzlAZTjf2SRupwt_3MBl5cgZr1O9nnMQyoGo,17767
7
+ uht_tooling/workflows/gui.py,sha256=2TctLdsoqA9sx37erWWkUGjnQerPl1tPf2ShEfdL76k,43041
8
+ uht_tooling/workflows/mut_rate.py,sha256=jyqZbUE7617jF_gOF4m7gX-Rgc6-WV4fWS9oVxhnAUU,109082
9
9
  uht_tooling/workflows/mutation_caller.py,sha256=BczuNATOSUcmlw-x6qTzEQfW8MBbvGclEyqiQiBX0cg,16222
10
10
  uht_tooling/workflows/nextera_designer.py,sha256=8MZ_DyQ0JwPojXH5mZ6bAGAkqki_0qQGac45T_Ll8FQ,6170
11
11
  uht_tooling/workflows/profile_inserts.py,sha256=C-SZ10YefiV_4QZbo1oEkI4qYipwaYqPP5jF-MC5O58,16947
12
12
  uht_tooling/workflows/umi_hunter.py,sha256=baycWycqVzUfMp5u2WZdHRl0sNuykTjy-iqtj5ahucU,15075
13
- uht_tooling-0.1.7.dist-info/METADATA,sha256=YuHkyuvRdznGgVH111anZaqsOBt9k-szz1vJGF-eWy0,12925
14
- uht_tooling-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- uht_tooling-0.1.7.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
16
- uht_tooling-0.1.7.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
17
- uht_tooling-0.1.7.dist-info/RECORD,,
13
+ uht_tooling-0.1.8.dist-info/METADATA,sha256=dQ8u8XSyBvbujsLyWIKAZqcDxqIkYb8BU1fFddAjxDs,14436
14
+ uht_tooling-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
+ uht_tooling-0.1.8.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
16
+ uht_tooling-0.1.8.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
17
+ uht_tooling-0.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5