uht-tooling 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uht_tooling/cli.py +40 -0
- uht_tooling/workflows/design_kld.py +687 -0
- uht_tooling/workflows/design_slim.py +75 -1
- uht_tooling/workflows/gui.py +101 -27
- uht_tooling/workflows/mut_rate.py +9 -9
- {uht_tooling-0.1.7.dist-info → uht_tooling-0.1.9.dist-info}/METADATA +96 -8
- {uht_tooling-0.1.7.dist-info → uht_tooling-0.1.9.dist-info}/RECORD +10 -9
- {uht_tooling-0.1.7.dist-info → uht_tooling-0.1.9.dist-info}/WHEEL +1 -1
- {uht_tooling-0.1.7.dist-info → uht_tooling-0.1.9.dist-info}/entry_points.txt +0 -0
- {uht_tooling-0.1.7.dist-info → uht_tooling-0.1.9.dist-info}/top_level.txt +0 -0
uht_tooling/cli.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import Optional
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
6
6
|
from uht_tooling.workflows.design_gibson import run_design_gibson
|
|
7
|
+
from uht_tooling.workflows.design_kld import run_design_kld
|
|
7
8
|
from uht_tooling.workflows.design_slim import run_design_slim
|
|
8
9
|
from uht_tooling.workflows.mutation_caller import (
|
|
9
10
|
expand_fastq_inputs as expand_fastq_inputs_mutation,
|
|
@@ -66,6 +67,45 @@ def design_slim_command(
|
|
|
66
67
|
typer.echo(f"SLIM primers written to {output_dir / 'SLIM_primers.csv'}")
|
|
67
68
|
|
|
68
69
|
|
|
70
|
+
@app.command("design-kld", help="Design KLD (inverse PCR) primers from user-specified FASTA/CSV inputs.")
|
|
71
|
+
def design_kld_command(
|
|
72
|
+
gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
|
|
73
|
+
context_fasta: Path = typer.Option(
|
|
74
|
+
...,
|
|
75
|
+
exists=True,
|
|
76
|
+
readable=True,
|
|
77
|
+
help="Path to the context FASTA file containing the plasmid or genomic sequence.",
|
|
78
|
+
),
|
|
79
|
+
mutations_csv: Path = typer.Option(
|
|
80
|
+
...,
|
|
81
|
+
exists=True,
|
|
82
|
+
readable=True,
|
|
83
|
+
help="CSV file containing a 'mutations' column with the desired edits.",
|
|
84
|
+
),
|
|
85
|
+
output_dir: Path = typer.Option(
|
|
86
|
+
...,
|
|
87
|
+
dir_okay=True,
|
|
88
|
+
writable=True,
|
|
89
|
+
help="Directory where results will be written.",
|
|
90
|
+
),
|
|
91
|
+
log_path: Optional[Path] = typer.Option(
|
|
92
|
+
None,
|
|
93
|
+
dir_okay=False,
|
|
94
|
+
writable=True,
|
|
95
|
+
help="Optional path to write a dedicated log file for this run.",
|
|
96
|
+
),
|
|
97
|
+
):
|
|
98
|
+
"""Design KLD (inverse PCR) primers from user-provided inputs."""
|
|
99
|
+
result_path = run_design_kld(
|
|
100
|
+
gene_fasta=gene_fasta,
|
|
101
|
+
context_fasta=context_fasta,
|
|
102
|
+
mutations_csv=mutations_csv,
|
|
103
|
+
output_dir=output_dir,
|
|
104
|
+
log_path=log_path,
|
|
105
|
+
)
|
|
106
|
+
typer.echo(f"KLD primers written to {result_path}")
|
|
107
|
+
|
|
108
|
+
|
|
69
109
|
@app.command("nextera-primers", help="Generate Nextera XT primers from binding region CSV input.")
|
|
70
110
|
def nextera_primers_command(
|
|
71
111
|
binding_csv: Path = typer.Option(
|
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
"""KLD (Kinase-Ligation-DpnI) primer design for inverse PCR mutagenesis."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import csv
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from Bio import SeqIO
|
|
12
|
+
from Bio.Seq import Seq
|
|
13
|
+
from Bio.SeqUtils import MeltingTemp as mt
|
|
14
|
+
|
|
15
|
+
# KLD primer design parameters
|
|
16
|
+
MIN_TM = 50.0 # Minimum melting temperature
|
|
17
|
+
MAX_TM = 65.0 # Maximum melting temperature
|
|
18
|
+
TM_DIFF_THRESHOLD = 5.0 # Max Tm difference between F & R
|
|
19
|
+
MIN_LENGTH = 18 # Minimum primer length (template-binding region)
|
|
20
|
+
MAX_LENGTH = 24 # Maximum primer length (template-binding region)
|
|
21
|
+
MIN_GC = 40.0 # Minimum GC content %
|
|
22
|
+
MAX_GC = 60.0 # Maximum GC content %
|
|
23
|
+
MIN_BINDING = 10 # Minimum template-binding region
|
|
24
|
+
|
|
25
|
+
# IUPAC ambiguity codes mapping
|
|
26
|
+
IUPAC_AMBIGUITY = {
|
|
27
|
+
'A': ['A'], 'C': ['C'], 'G': ['G'], 'T': ['T'],
|
|
28
|
+
'R': ['A', 'G'], # puRine
|
|
29
|
+
'Y': ['C', 'T'], # pYrimidine
|
|
30
|
+
'S': ['G', 'C'], # Strong
|
|
31
|
+
'W': ['A', 'T'], # Weak
|
|
32
|
+
'K': ['G', 'T'], # Keto
|
|
33
|
+
'M': ['A', 'C'], # aMino
|
|
34
|
+
'B': ['C', 'G', 'T'], # not A
|
|
35
|
+
'D': ['A', 'G', 'T'], # not C
|
|
36
|
+
'H': ['A', 'C', 'T'], # not G
|
|
37
|
+
'V': ['A', 'C', 'G'], # not T
|
|
38
|
+
'N': ['A', 'C', 'G', 'T'],
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
VALID_DEGENERATE_BASES = set(IUPAC_AMBIGUITY.keys())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def is_valid_degenerate_codon(codon: str) -> bool:
|
|
45
|
+
"""Check if a codon contains only valid IUPAC nucleotide codes."""
|
|
46
|
+
return len(codon) == 3 and all(b.upper() in VALID_DEGENERATE_BASES for b in codon)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def contains_degenerate_bases(seq: str) -> bool:
|
|
50
|
+
"""Return True if sequence contains non-standard (degenerate) bases."""
|
|
51
|
+
return any(b.upper() not in {'A', 'C', 'G', 'T'} for b in seq)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def expand_degenerate_sequence(seq: str) -> List[str]:
|
|
55
|
+
"""Expand a degenerate sequence to all possible standard sequences."""
|
|
56
|
+
from itertools import product
|
|
57
|
+
possibilities = [IUPAC_AMBIGUITY.get(b.upper(), [b]) for b in seq]
|
|
58
|
+
return [''.join(combo) for combo in product(*possibilities)]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def codon_table():
|
|
62
|
+
return {
|
|
63
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
64
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
65
|
+
"TAT": "Y", "TAC": "Y", "TAA": "*", "TAG": "*",
|
|
66
|
+
"TGT": "C", "TGC": "C", "TGA": "*", "TGG": "W",
|
|
67
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
68
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
69
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
70
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
71
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
72
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
73
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
74
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
75
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
76
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
77
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
78
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def translate_codon(cd: str) -> str:
|
|
83
|
+
"""Translate a 3-nt codon to its amino acid."""
|
|
84
|
+
return codon_table().get(cd.upper(), "?")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def pick_mutant_codon(wt_codon: str, target_aa: str) -> Optional[str]:
|
|
88
|
+
"""Pick the codon for target_aa that differs minimally from wt_codon."""
|
|
89
|
+
best_list = []
|
|
90
|
+
for codon, aa in codon_table().items():
|
|
91
|
+
if aa == target_aa:
|
|
92
|
+
diff = sum(a != b for a, b in zip(codon.upper(), wt_codon.upper()))
|
|
93
|
+
best_list.append((codon.upper(), diff))
|
|
94
|
+
if not best_list:
|
|
95
|
+
return None
|
|
96
|
+
best_list.sort(key=lambda x: x[1])
|
|
97
|
+
return best_list[0][0]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def calc_tm_binding_region(seq: str) -> float:
|
|
101
|
+
"""Calculate Tm for template-binding region (handles degenerate bases)."""
|
|
102
|
+
if contains_degenerate_bases(seq):
|
|
103
|
+
expanded = expand_degenerate_sequence(seq)
|
|
104
|
+
tms = [mt.Tm_NN(s) for s in expanded]
|
|
105
|
+
return sum(tms) / len(tms)
|
|
106
|
+
return mt.Tm_NN(seq)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def calc_gc_content(seq: str) -> float:
|
|
110
|
+
"""Calculate GC content percentage."""
|
|
111
|
+
seq = seq.upper()
|
|
112
|
+
gc = sum(1 for b in seq if b in 'GC')
|
|
113
|
+
return (gc / len(seq)) * 100 if seq else 0.0
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def design_forward_primer(
|
|
117
|
+
full_seq: str,
|
|
118
|
+
mutation_nt_pos: int,
|
|
119
|
+
new_codon: str,
|
|
120
|
+
min_tm: float = MIN_TM,
|
|
121
|
+
max_tm: float = MAX_TM,
|
|
122
|
+
min_len: int = MIN_LENGTH,
|
|
123
|
+
max_len: int = MAX_LENGTH,
|
|
124
|
+
) -> Tuple[str, str, float, float, int]:
|
|
125
|
+
"""
|
|
126
|
+
Design forward primer with mutation at 5' end.
|
|
127
|
+
|
|
128
|
+
Structure: [MUTATED_CODON] + [WT_DOWNSTREAM_SEQUENCE]
|
|
129
|
+
|
|
130
|
+
The forward primer points downstream (→→→), with its 5' end at the mutation
|
|
131
|
+
site. The template-binding region is the WT sequence immediately downstream
|
|
132
|
+
of the mutation.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
full_seq: Full plasmid/context sequence
|
|
136
|
+
mutation_nt_pos: Nucleotide position of mutation start (0-indexed)
|
|
137
|
+
new_codon: The mutated codon sequence (3 nt, may be degenerate)
|
|
138
|
+
min_tm: Minimum melting temperature for binding region
|
|
139
|
+
max_tm: Maximum melting temperature for binding region
|
|
140
|
+
min_len: Minimum binding region length
|
|
141
|
+
max_len: Maximum binding region length
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Tuple of (full_primer_seq, binding_region_seq, tm, gc, total_length)
|
|
145
|
+
|
|
146
|
+
Raises:
|
|
147
|
+
ValueError: If no valid binding region can be found
|
|
148
|
+
"""
|
|
149
|
+
# Template-binding region starts immediately after mutation codon
|
|
150
|
+
# (we use len(new_codon) to handle both regular codons and deletions)
|
|
151
|
+
old_codon_len = 3 # Standard codon replacement
|
|
152
|
+
binding_start = mutation_nt_pos + old_codon_len
|
|
153
|
+
|
|
154
|
+
best_binding = None
|
|
155
|
+
best_tm_diff = float('inf')
|
|
156
|
+
target_tm = (min_tm + max_tm) / 2
|
|
157
|
+
|
|
158
|
+
for length in range(min_len, max_len + 1):
|
|
159
|
+
binding_end = binding_start + length
|
|
160
|
+
if binding_end > len(full_seq):
|
|
161
|
+
break
|
|
162
|
+
|
|
163
|
+
binding_seq = full_seq[binding_start:binding_end]
|
|
164
|
+
tm = calc_tm_binding_region(binding_seq)
|
|
165
|
+
gc = calc_gc_content(binding_seq)
|
|
166
|
+
|
|
167
|
+
if min_tm <= tm <= max_tm and MIN_GC <= gc <= MAX_GC:
|
|
168
|
+
tm_diff = abs(tm - target_tm)
|
|
169
|
+
if tm_diff < best_tm_diff:
|
|
170
|
+
best_tm_diff = tm_diff
|
|
171
|
+
best_binding = (binding_seq, tm, gc, length)
|
|
172
|
+
|
|
173
|
+
if not best_binding:
|
|
174
|
+
# Try to find any binding region that meets length constraints
|
|
175
|
+
for length in range(min_len, max_len + 1):
|
|
176
|
+
binding_end = binding_start + length
|
|
177
|
+
if binding_end > len(full_seq):
|
|
178
|
+
break
|
|
179
|
+
binding_seq = full_seq[binding_start:binding_end]
|
|
180
|
+
tm = calc_tm_binding_region(binding_seq)
|
|
181
|
+
gc = calc_gc_content(binding_seq)
|
|
182
|
+
# Relax constraints slightly
|
|
183
|
+
if tm >= min_tm - 5 and tm <= max_tm + 5:
|
|
184
|
+
best_binding = (binding_seq, tm, gc, length)
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
if not best_binding:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f"Cannot find forward primer binding region meeting constraints "
|
|
190
|
+
f"(pos={mutation_nt_pos}, Tm={min_tm}-{max_tm}°C, len={min_len}-{max_len}bp)"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
binding_seq, tm, gc, length = best_binding
|
|
194
|
+
full_primer = new_codon + binding_seq
|
|
195
|
+
return full_primer, binding_seq, tm, gc, len(full_primer)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def design_reverse_primer(
|
|
199
|
+
full_seq: str,
|
|
200
|
+
mutation_nt_pos: int,
|
|
201
|
+
min_tm: float = MIN_TM,
|
|
202
|
+
max_tm: float = MAX_TM,
|
|
203
|
+
min_len: int = MIN_LENGTH,
|
|
204
|
+
max_len: int = MAX_LENGTH,
|
|
205
|
+
) -> Tuple[str, float, float, int]:
|
|
206
|
+
"""
|
|
207
|
+
Design reverse primer adjacent to forward primer's 5' end.
|
|
208
|
+
|
|
209
|
+
Structure: reverse_complement([WT_UPSTREAM_SEQUENCE])
|
|
210
|
+
|
|
211
|
+
The reverse primer's 5' end is at position mutation_nt_pos - 1, immediately
|
|
212
|
+
adjacent to the forward primer's 5' end. The primer anneals to the top
|
|
213
|
+
strand upstream of the mutation and points upstream (←←←).
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
full_seq: Full plasmid/context sequence
|
|
217
|
+
mutation_nt_pos: Nucleotide position of mutation start (0-indexed)
|
|
218
|
+
min_tm: Minimum melting temperature
|
|
219
|
+
max_tm: Maximum melting temperature
|
|
220
|
+
min_len: Minimum primer length
|
|
221
|
+
max_len: Maximum primer length
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Tuple of (primer_seq, tm, gc, length)
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
ValueError: If no valid primer can be found
|
|
228
|
+
"""
|
|
229
|
+
# Upstream region ends at mutation position (exclusive)
|
|
230
|
+
upstream_end = mutation_nt_pos
|
|
231
|
+
|
|
232
|
+
best_primer = None
|
|
233
|
+
best_tm_diff = float('inf')
|
|
234
|
+
target_tm = (min_tm + max_tm) / 2
|
|
235
|
+
|
|
236
|
+
for length in range(min_len, max_len + 1):
|
|
237
|
+
upstream_start = upstream_end - length
|
|
238
|
+
if upstream_start < 0:
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
upstream_seq = full_seq[upstream_start:upstream_end]
|
|
242
|
+
tm = calc_tm_binding_region(upstream_seq)
|
|
243
|
+
gc = calc_gc_content(upstream_seq)
|
|
244
|
+
|
|
245
|
+
if min_tm <= tm <= max_tm and MIN_GC <= gc <= MAX_GC:
|
|
246
|
+
tm_diff = abs(tm - target_tm)
|
|
247
|
+
if tm_diff < best_tm_diff:
|
|
248
|
+
best_tm_diff = tm_diff
|
|
249
|
+
primer_seq = str(Seq(upstream_seq).reverse_complement())
|
|
250
|
+
best_primer = (primer_seq, tm, gc, length)
|
|
251
|
+
|
|
252
|
+
if not best_primer:
|
|
253
|
+
# Try to find any primer that meets length constraints
|
|
254
|
+
for length in range(min_len, max_len + 1):
|
|
255
|
+
upstream_start = upstream_end - length
|
|
256
|
+
if upstream_start < 0:
|
|
257
|
+
break
|
|
258
|
+
upstream_seq = full_seq[upstream_start:upstream_end]
|
|
259
|
+
tm = calc_tm_binding_region(upstream_seq)
|
|
260
|
+
gc = calc_gc_content(upstream_seq)
|
|
261
|
+
# Relax constraints slightly
|
|
262
|
+
if tm >= min_tm - 5 and tm <= max_tm + 5:
|
|
263
|
+
primer_seq = str(Seq(upstream_seq).reverse_complement())
|
|
264
|
+
best_primer = (primer_seq, tm, gc, length)
|
|
265
|
+
break
|
|
266
|
+
|
|
267
|
+
if not best_primer:
|
|
268
|
+
raise ValueError(
|
|
269
|
+
f"Cannot find reverse primer meeting constraints "
|
|
270
|
+
f"(pos={mutation_nt_pos}, Tm={min_tm}-{max_tm}°C, len={min_len}-{max_len}bp)"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return best_primer
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def balance_primer_tms(
|
|
277
|
+
fwd_result: Tuple[str, str, float, float, int],
|
|
278
|
+
rev_result: Tuple[str, float, float, int],
|
|
279
|
+
full_seq: str,
|
|
280
|
+
mutation_nt_pos: int,
|
|
281
|
+
new_codon: str,
|
|
282
|
+
tm_threshold: float = TM_DIFF_THRESHOLD,
|
|
283
|
+
) -> Tuple[Tuple[str, str, float, float, int], Tuple[str, float, float, int]]:
|
|
284
|
+
"""
|
|
285
|
+
Balance Tm between forward and reverse primers by adjusting lengths.
|
|
286
|
+
|
|
287
|
+
If the Tm difference exceeds the threshold, attempt to trim the hotter
|
|
288
|
+
primer's binding region from its 3' end to reduce its Tm.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
fwd_result: Forward primer tuple (full_seq, binding_seq, tm, gc, length)
|
|
292
|
+
rev_result: Reverse primer tuple (primer_seq, tm, gc, length)
|
|
293
|
+
full_seq: Full plasmid/context sequence
|
|
294
|
+
mutation_nt_pos: Nucleotide position of mutation start
|
|
295
|
+
new_codon: The mutated codon sequence
|
|
296
|
+
tm_threshold: Maximum allowed Tm difference
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Adjusted (fwd_result, rev_result) tuples
|
|
300
|
+
"""
|
|
301
|
+
fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len = fwd_result
|
|
302
|
+
rev_seq, rev_tm, rev_gc, rev_len = rev_result
|
|
303
|
+
|
|
304
|
+
tm_diff = abs(fwd_tm - rev_tm)
|
|
305
|
+
|
|
306
|
+
if tm_diff <= tm_threshold:
|
|
307
|
+
return fwd_result, rev_result
|
|
308
|
+
|
|
309
|
+
# Try to trim the hotter primer
|
|
310
|
+
if fwd_tm > rev_tm:
|
|
311
|
+
# Trim forward binding region from 3' end
|
|
312
|
+
while len(fwd_binding) > MIN_BINDING and fwd_tm - rev_tm > tm_threshold:
|
|
313
|
+
fwd_binding = fwd_binding[:-1]
|
|
314
|
+
fwd_tm = calc_tm_binding_region(fwd_binding)
|
|
315
|
+
|
|
316
|
+
fwd_seq = new_codon + fwd_binding
|
|
317
|
+
fwd_gc = calc_gc_content(fwd_seq)
|
|
318
|
+
fwd_len = len(fwd_seq)
|
|
319
|
+
fwd_result = (fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len)
|
|
320
|
+
else:
|
|
321
|
+
# Trim reverse primer from 3' end
|
|
322
|
+
# The reverse primer is already reverse-complemented, so we need to
|
|
323
|
+
# trim from the 3' end of the original upstream sequence
|
|
324
|
+
upstream_end = mutation_nt_pos
|
|
325
|
+
current_len = rev_len
|
|
326
|
+
|
|
327
|
+
while current_len > MIN_BINDING and rev_tm - fwd_tm > tm_threshold:
|
|
328
|
+
current_len -= 1
|
|
329
|
+
upstream_start = upstream_end - current_len
|
|
330
|
+
if upstream_start < 0:
|
|
331
|
+
break
|
|
332
|
+
upstream_seq = full_seq[upstream_start:upstream_end]
|
|
333
|
+
rev_tm = calc_tm_binding_region(upstream_seq)
|
|
334
|
+
|
|
335
|
+
if upstream_start >= 0:
|
|
336
|
+
upstream_seq = full_seq[upstream_start:upstream_end]
|
|
337
|
+
rev_seq = str(Seq(upstream_seq).reverse_complement())
|
|
338
|
+
rev_gc = calc_gc_content(rev_seq)
|
|
339
|
+
rev_len = len(rev_seq)
|
|
340
|
+
rev_result = (rev_seq, rev_tm, rev_gc, rev_len)
|
|
341
|
+
|
|
342
|
+
return fwd_result, rev_result
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def run_design_kld(
|
|
346
|
+
gene_fasta: Path,
|
|
347
|
+
context_fasta: Path,
|
|
348
|
+
mutations_csv: Path,
|
|
349
|
+
output_dir: Path,
|
|
350
|
+
log_path: Optional[Path] = None,
|
|
351
|
+
logger: Optional[logging.Logger] = None,
|
|
352
|
+
) -> Path:
|
|
353
|
+
"""
|
|
354
|
+
Design KLD (inverse PCR) primers for mutations.
|
|
355
|
+
|
|
356
|
+
KLD cloning uses two primers per mutation that point away from each other,
|
|
357
|
+
amplifying the entire plasmid. The forward primer has the mutation at its
|
|
358
|
+
5' end, and the reverse primer's 5' end is adjacent to the forward's.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
gene_fasta: Path to FASTA file containing the gene sequence
|
|
362
|
+
context_fasta: Path to FASTA file containing the plasmid/context sequence
|
|
363
|
+
mutations_csv: CSV file with a 'mutations' column
|
|
364
|
+
output_dir: Directory for output files
|
|
365
|
+
log_path: Optional path for log file
|
|
366
|
+
logger: Optional logger instance
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Path to the output CSV file
|
|
370
|
+
|
|
371
|
+
Raises:
|
|
372
|
+
ValueError: If inputs are invalid or mutations cannot be processed
|
|
373
|
+
"""
|
|
374
|
+
gene_fasta = Path(gene_fasta)
|
|
375
|
+
context_fasta = Path(context_fasta)
|
|
376
|
+
mutations_csv = Path(mutations_csv)
|
|
377
|
+
output_dir = Path(output_dir)
|
|
378
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
379
|
+
|
|
380
|
+
managed_logger = logger is None
|
|
381
|
+
if logger is None:
|
|
382
|
+
logger = logging.getLogger("uht_tooling.design_kld")
|
|
383
|
+
logger.setLevel(logging.INFO)
|
|
384
|
+
handler: logging.Handler
|
|
385
|
+
if log_path:
|
|
386
|
+
handler = logging.FileHandler(log_path, mode="w")
|
|
387
|
+
else:
|
|
388
|
+
handler = logging.StreamHandler()
|
|
389
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s: %(message)s"))
|
|
390
|
+
logger.handlers = []
|
|
391
|
+
logger.addHandler(handler)
|
|
392
|
+
logger.propagate = False
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
# Load sequences
|
|
396
|
+
gene_record = next(SeqIO.parse(str(gene_fasta), "fasta"))
|
|
397
|
+
context_record = next(SeqIO.parse(str(context_fasta), "fasta"))
|
|
398
|
+
gene = str(gene_record.seq).upper()
|
|
399
|
+
context = str(context_record.seq).upper()
|
|
400
|
+
logger.info("Loaded gene (%s nt) and context (%s nt).", len(gene), len(context))
|
|
401
|
+
|
|
402
|
+
# Load mutations
|
|
403
|
+
df = pd.read_csv(mutations_csv)
|
|
404
|
+
if "mutations" not in df.columns:
|
|
405
|
+
raise ValueError("Mutations CSV must contain a 'mutations' column.")
|
|
406
|
+
mutations = df["mutations"].dropna().tolist()
|
|
407
|
+
logger.info("Loaded %s mutation entries.", len(mutations))
|
|
408
|
+
|
|
409
|
+
# Align gene within context
|
|
410
|
+
try:
|
|
411
|
+
gene_offset = context.index(gene)
|
|
412
|
+
logger.info("Gene aligned at offset %s within context.", gene_offset)
|
|
413
|
+
except ValueError as exc:
|
|
414
|
+
message = "Could not align gene within context. No perfect substring match found."
|
|
415
|
+
logger.error(message)
|
|
416
|
+
raise ValueError(message) from exc
|
|
417
|
+
|
|
418
|
+
full_seq = context
|
|
419
|
+
|
|
420
|
+
# Output file
|
|
421
|
+
results_path = output_dir / "KLD_primers.csv"
|
|
422
|
+
with results_path.open("w", newline="") as csvfile:
|
|
423
|
+
writer = csv.writer(csvfile)
|
|
424
|
+
writer.writerow([
|
|
425
|
+
"Primer Name", "Sequence", "Tm (binding)", "GC%", "Length", "Notes"
|
|
426
|
+
])
|
|
427
|
+
|
|
428
|
+
for mutation in mutations:
|
|
429
|
+
try:
|
|
430
|
+
m = mutation
|
|
431
|
+
m_del = re.match(r"^([A-Z])(\d+)Del$", m)
|
|
432
|
+
m_indel = re.match(r"^([A-Z])(\d+)InDel([A-Z])(\d+)([A-Z]+)$", m)
|
|
433
|
+
m_sub = re.match(r"^([A-Z])(\d+)([A-Z])$", m)
|
|
434
|
+
m_ins = re.match(r"^([A-Z])(\d+)([A-Z]{2,})$", m)
|
|
435
|
+
m_lib = re.match(r"^([A-Z])(\d+):([A-Za-z]{3})$", m)
|
|
436
|
+
|
|
437
|
+
region_start: int
|
|
438
|
+
new_seq: str
|
|
439
|
+
notes: str = ""
|
|
440
|
+
|
|
441
|
+
if m_del:
|
|
442
|
+
# Deletion: remove the codon entirely
|
|
443
|
+
wt_aa, pos1 = m_del.group(1), int(m_del.group(2))
|
|
444
|
+
region_start = gene_offset + (pos1 - 1) * 3
|
|
445
|
+
# For deletion, new_seq is empty - forward primer starts at next codon
|
|
446
|
+
new_seq = ""
|
|
447
|
+
notes = "Deletion"
|
|
448
|
+
|
|
449
|
+
elif m_indel:
|
|
450
|
+
# InDel: replace range with new amino acids
|
|
451
|
+
wt1, pos1_s, wt2, pos2_s, ins_aa = m_indel.groups()
|
|
452
|
+
pos1, pos2 = int(pos1_s), int(pos2_s)
|
|
453
|
+
region_start = gene_offset + (pos1 - 1) * 3
|
|
454
|
+
wt_codon = full_seq[region_start : region_start + 3]
|
|
455
|
+
new_seq = ""
|
|
456
|
+
for aa in ins_aa:
|
|
457
|
+
codon = pick_mutant_codon(wt_codon, aa)
|
|
458
|
+
if not codon:
|
|
459
|
+
logger.error("No codon found for %s->%s", wt1, ins_aa)
|
|
460
|
+
raise ValueError(f"No codon found for {wt1}->{ins_aa}")
|
|
461
|
+
new_seq += codon
|
|
462
|
+
notes = f"InDel: {pos2 - pos1 + 1} AA -> {len(ins_aa)} AA"
|
|
463
|
+
|
|
464
|
+
elif m_ins:
|
|
465
|
+
# Insertion: add amino acids after position
|
|
466
|
+
wt_aa, pos1_s, ins_str = m_ins.groups()
|
|
467
|
+
pos1 = int(pos1_s)
|
|
468
|
+
codon_start_old = gene_offset + (pos1 - 1) * 3
|
|
469
|
+
wt_codon = full_seq[codon_start_old : codon_start_old + 3]
|
|
470
|
+
if ins_str[0] == wt_aa:
|
|
471
|
+
# First AA matches WT, so insert after
|
|
472
|
+
inserted_aas = ins_str[1:]
|
|
473
|
+
region_start = codon_start_old + 3
|
|
474
|
+
else:
|
|
475
|
+
# Replace WT with insertion
|
|
476
|
+
inserted_aas = ins_str
|
|
477
|
+
region_start = codon_start_old
|
|
478
|
+
new_seq = ""
|
|
479
|
+
for aa in inserted_aas:
|
|
480
|
+
codon = pick_mutant_codon(wt_codon, aa)
|
|
481
|
+
if not codon:
|
|
482
|
+
logger.error("No codon for insertion amino acid %s", aa)
|
|
483
|
+
raise ValueError(f"No codon for insertion amino acid {aa}")
|
|
484
|
+
new_seq += codon
|
|
485
|
+
notes = f"Insertion: +{len(inserted_aas)} AA"
|
|
486
|
+
|
|
487
|
+
elif m_sub:
|
|
488
|
+
# Substitution: single amino acid change
|
|
489
|
+
wt_aa, pos1, mut_aa = m_sub.group(1), int(m_sub.group(2)), m_sub.group(3)
|
|
490
|
+
region_start = gene_offset + (pos1 - 1) * 3
|
|
491
|
+
wt_codon = full_seq[region_start : region_start + 3]
|
|
492
|
+
translated = translate_codon(wt_codon)
|
|
493
|
+
if translated != wt_aa:
|
|
494
|
+
logger.error(
|
|
495
|
+
"Expected %s but found %s at codon %s for mutation %s",
|
|
496
|
+
wt_aa, translated, wt_codon, mutation,
|
|
497
|
+
)
|
|
498
|
+
raise ValueError(
|
|
499
|
+
f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
|
|
500
|
+
)
|
|
501
|
+
new_seq = pick_mutant_codon(wt_codon, mut_aa)
|
|
502
|
+
if not new_seq:
|
|
503
|
+
logger.error("No minimal-change codon for %s->%s", wt_aa, mut_aa)
|
|
504
|
+
raise ValueError(f"No minimal-change codon for {wt_aa}->{mut_aa}")
|
|
505
|
+
notes = f"Substitution: {wt_aa}->{mut_aa}"
|
|
506
|
+
|
|
507
|
+
elif m_lib:
|
|
508
|
+
# Library mutation with degenerate codon
|
|
509
|
+
wt_aa, pos_str, degenerate_codon = m_lib.groups()
|
|
510
|
+
pos = int(pos_str)
|
|
511
|
+
degenerate_codon = degenerate_codon.upper()
|
|
512
|
+
|
|
513
|
+
if not is_valid_degenerate_codon(degenerate_codon):
|
|
514
|
+
raise ValueError(f"Invalid degenerate codon: {degenerate_codon}")
|
|
515
|
+
|
|
516
|
+
region_start = gene_offset + (pos - 1) * 3
|
|
517
|
+
wt_codon = full_seq[region_start : region_start + 3]
|
|
518
|
+
translated = translate_codon(wt_codon)
|
|
519
|
+
if translated != wt_aa:
|
|
520
|
+
logger.error(
|
|
521
|
+
"Expected %s but found %s at codon %s for mutation %s",
|
|
522
|
+
wt_aa, translated, wt_codon, mutation,
|
|
523
|
+
)
|
|
524
|
+
raise ValueError(
|
|
525
|
+
f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
new_seq = degenerate_codon
|
|
529
|
+
|
|
530
|
+
# Log library coverage info
|
|
531
|
+
expanded_codons = expand_degenerate_sequence(degenerate_codon)
|
|
532
|
+
unique_aas = set(
|
|
533
|
+
translate_codon(c) for c in expanded_codons if translate_codon(c) != '?'
|
|
534
|
+
)
|
|
535
|
+
logger.info(
|
|
536
|
+
"Library mutation %s: %d possible codons, %d amino acids",
|
|
537
|
+
mutation, len(expanded_codons), len(unique_aas)
|
|
538
|
+
)
|
|
539
|
+
notes = f"Library: {len(expanded_codons)} codons, {len(unique_aas)} AAs"
|
|
540
|
+
|
|
541
|
+
else:
|
|
542
|
+
logger.error("Unknown mutation format: %s", mutation)
|
|
543
|
+
raise ValueError(f"Unknown mutation format: {mutation}")
|
|
544
|
+
|
|
545
|
+
# Handle deletion specially - forward primer starts at next position
|
|
546
|
+
if m_del:
|
|
547
|
+
# For deletion, the forward primer binding region starts
|
|
548
|
+
# immediately after the deleted codon
|
|
549
|
+
fwd_binding_start = region_start + 3
|
|
550
|
+
|
|
551
|
+
# Find optimal forward binding region
|
|
552
|
+
best_fwd = None
|
|
553
|
+
best_tm_diff = float('inf')
|
|
554
|
+
target_tm = (MIN_TM + MAX_TM) / 2
|
|
555
|
+
|
|
556
|
+
for length in range(MIN_LENGTH, MAX_LENGTH + 1):
|
|
557
|
+
binding_end = fwd_binding_start + length
|
|
558
|
+
if binding_end > len(full_seq):
|
|
559
|
+
break
|
|
560
|
+
binding_seq = full_seq[fwd_binding_start:binding_end]
|
|
561
|
+
tm = calc_tm_binding_region(binding_seq)
|
|
562
|
+
gc = calc_gc_content(binding_seq)
|
|
563
|
+
if MIN_TM <= tm <= MAX_TM and MIN_GC <= gc <= MAX_GC:
|
|
564
|
+
tm_diff = abs(tm - target_tm)
|
|
565
|
+
if tm_diff < best_tm_diff:
|
|
566
|
+
best_tm_diff = tm_diff
|
|
567
|
+
best_fwd = (binding_seq, binding_seq, tm, gc, length)
|
|
568
|
+
|
|
569
|
+
if not best_fwd:
|
|
570
|
+
raise ValueError(
|
|
571
|
+
f"Cannot find forward primer for deletion {mutation}"
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len = best_fwd
|
|
575
|
+
|
|
576
|
+
# Reverse primer design is the same
|
|
577
|
+
rev_seq, rev_tm, rev_gc, rev_len = design_reverse_primer(
|
|
578
|
+
full_seq, region_start
|
|
579
|
+
)
|
|
580
|
+
else:
|
|
581
|
+
# Standard primer design
|
|
582
|
+
fwd_result = design_forward_primer(
|
|
583
|
+
full_seq, region_start, new_seq
|
|
584
|
+
)
|
|
585
|
+
rev_result = design_reverse_primer(full_seq, region_start)
|
|
586
|
+
|
|
587
|
+
# Balance Tms
|
|
588
|
+
fwd_result, rev_result = balance_primer_tms(
|
|
589
|
+
fwd_result, rev_result, full_seq, region_start, new_seq
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len = fwd_result
|
|
593
|
+
rev_seq, rev_tm, rev_gc, rev_len = rev_result
|
|
594
|
+
|
|
595
|
+
# Write forward primer
|
|
596
|
+
writer.writerow([
|
|
597
|
+
f"{mutation}_F",
|
|
598
|
+
fwd_seq,
|
|
599
|
+
f"{fwd_tm:.1f}",
|
|
600
|
+
f"{fwd_gc:.1f}",
|
|
601
|
+
fwd_len,
|
|
602
|
+
notes,
|
|
603
|
+
])
|
|
604
|
+
|
|
605
|
+
# Write reverse primer
|
|
606
|
+
writer.writerow([
|
|
607
|
+
f"{mutation}_R",
|
|
608
|
+
rev_seq,
|
|
609
|
+
f"{rev_tm:.1f}",
|
|
610
|
+
f"{rev_gc:.1f}",
|
|
611
|
+
rev_len,
|
|
612
|
+
"",
|
|
613
|
+
])
|
|
614
|
+
|
|
615
|
+
logger.info(
|
|
616
|
+
"Designed KLD primers for %s: F_Tm=%.1f°C, R_Tm=%.1f°C, diff=%.1f°C",
|
|
617
|
+
mutation, fwd_tm, rev_tm, abs(fwd_tm - rev_tm)
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
except Exception as exc:
|
|
621
|
+
logger.error("Error processing mutation %s: %s", mutation, exc)
|
|
622
|
+
raise
|
|
623
|
+
|
|
624
|
+
logger.info("KLD primer design completed successfully. Output written to %s", results_path)
|
|
625
|
+
return results_path
|
|
626
|
+
|
|
627
|
+
finally:
|
|
628
|
+
if managed_logger and logger:
|
|
629
|
+
for handler in list(logger.handlers):
|
|
630
|
+
handler.close()
|
|
631
|
+
logger.removeHandler(handler)
|
|
632
|
+
logger.propagate = True
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
636
|
+
"""Build argument parser for command-line usage."""
|
|
637
|
+
parser = argparse.ArgumentParser(
|
|
638
|
+
description="Design KLD (inverse PCR) primers from user-provided inputs."
|
|
639
|
+
)
|
|
640
|
+
parser.add_argument(
|
|
641
|
+
"--gene-fasta",
|
|
642
|
+
required=True,
|
|
643
|
+
type=Path,
|
|
644
|
+
help="Path to FASTA file containing the gene sequence.",
|
|
645
|
+
)
|
|
646
|
+
parser.add_argument(
|
|
647
|
+
"--context-fasta",
|
|
648
|
+
required=True,
|
|
649
|
+
type=Path,
|
|
650
|
+
help="Path to FASTA file containing the plasmid or genomic context.",
|
|
651
|
+
)
|
|
652
|
+
parser.add_argument(
|
|
653
|
+
"--mutations-csv",
|
|
654
|
+
required=True,
|
|
655
|
+
type=Path,
|
|
656
|
+
help="CSV file containing a 'mutations' column with each mutation specification.",
|
|
657
|
+
)
|
|
658
|
+
parser.add_argument(
|
|
659
|
+
"--output-dir",
|
|
660
|
+
required=True,
|
|
661
|
+
type=Path,
|
|
662
|
+
help="Directory where results and logs will be written.",
|
|
663
|
+
)
|
|
664
|
+
parser.add_argument(
|
|
665
|
+
"--log-path",
|
|
666
|
+
default=None,
|
|
667
|
+
type=Path,
|
|
668
|
+
help="Optional path for the run log (defaults to console logging).",
|
|
669
|
+
)
|
|
670
|
+
return parser
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def main(argv: Optional[List[str]] = None):
|
|
674
|
+
"""Main entry point for command-line usage."""
|
|
675
|
+
parser = build_parser()
|
|
676
|
+
args = parser.parse_args(argv)
|
|
677
|
+
run_design_kld(
|
|
678
|
+
gene_fasta=args.gene_fasta,
|
|
679
|
+
context_fasta=args.context_fasta,
|
|
680
|
+
mutations_csv=args.mutations_csv,
|
|
681
|
+
output_dir=args.output_dir,
|
|
682
|
+
log_path=args.log_path,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
if __name__ == "__main__":
|
|
687
|
+
main()
|
|
@@ -16,6 +16,41 @@ TARGET_TM = 60.0
|
|
|
16
16
|
MAX_TM = 70.0
|
|
17
17
|
UPSTREAM_15 = 12
|
|
18
18
|
|
|
19
|
+
# IUPAC ambiguity codes mapping
|
|
20
|
+
IUPAC_AMBIGUITY = {
|
|
21
|
+
'A': ['A'], 'C': ['C'], 'G': ['G'], 'T': ['T'],
|
|
22
|
+
'R': ['A', 'G'], # puRine
|
|
23
|
+
'Y': ['C', 'T'], # pYrimidine
|
|
24
|
+
'S': ['G', 'C'], # Strong
|
|
25
|
+
'W': ['A', 'T'], # Weak
|
|
26
|
+
'K': ['G', 'T'], # Keto
|
|
27
|
+
'M': ['A', 'C'], # aMino
|
|
28
|
+
'B': ['C', 'G', 'T'], # not A
|
|
29
|
+
'D': ['A', 'G', 'T'], # not C
|
|
30
|
+
'H': ['A', 'C', 'T'], # not G
|
|
31
|
+
'V': ['A', 'C', 'G'], # not T
|
|
32
|
+
'N': ['A', 'C', 'G', 'T'],
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
VALID_DEGENERATE_BASES = set(IUPAC_AMBIGUITY.keys())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def is_valid_degenerate_codon(codon: str) -> bool:
|
|
39
|
+
"""Check if a codon contains only valid IUPAC nucleotide codes."""
|
|
40
|
+
return len(codon) == 3 and all(b.upper() in VALID_DEGENERATE_BASES for b in codon)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def contains_degenerate_bases(seq: str) -> bool:
|
|
44
|
+
"""Return True if sequence contains non-standard (degenerate) bases."""
|
|
45
|
+
return any(b.upper() not in {'A', 'C', 'G', 'T'} for b in seq)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def expand_degenerate_sequence(seq: str) -> list[str]:
|
|
49
|
+
"""Expand a degenerate sequence to all possible standard sequences."""
|
|
50
|
+
from itertools import product
|
|
51
|
+
possibilities = [IUPAC_AMBIGUITY.get(b.upper(), [b]) for b in seq]
|
|
52
|
+
return [''.join(combo) for combo in product(*possibilities)]
|
|
53
|
+
|
|
19
54
|
|
|
20
55
|
def codon_table():
|
|
21
56
|
return {
|
|
@@ -103,7 +138,12 @@ def pick_mutant_codon(wt_codon, target_aa):
|
|
|
103
138
|
return best_list[0][0]
|
|
104
139
|
|
|
105
140
|
|
|
106
|
-
def calc_tm(seq):
|
|
141
|
+
def calc_tm(seq: str) -> float:
|
|
142
|
+
"""Calculate Tm, using average across expansions for degenerate sequences."""
|
|
143
|
+
if contains_degenerate_bases(seq):
|
|
144
|
+
expanded = expand_degenerate_sequence(seq)
|
|
145
|
+
tms = [mt.Tm_NN(s) for s in expanded]
|
|
146
|
+
return sum(tms) / len(tms)
|
|
107
147
|
return mt.Tm_NN(seq)
|
|
108
148
|
|
|
109
149
|
|
|
@@ -266,6 +306,7 @@ def run_design_slim(
|
|
|
266
306
|
m_indel = re.match(r"^([A-Z])(\d+)InDel([A-Z])(\d+)([A-Z]+)$", m)
|
|
267
307
|
m_sub = re.match(r"^([A-Z])(\d+)([A-Z])$", m)
|
|
268
308
|
m_ins = re.match(r"^([A-Z])(\d+)([A-Z]{2,})$", m)
|
|
309
|
+
m_lib = re.match(r"^([A-Z])(\d+):([A-Za-z]{3})$", m)
|
|
269
310
|
|
|
270
311
|
if m_del:
|
|
271
312
|
wt_aa, pos1 = m_del.group(1), int(m_del.group(2))
|
|
@@ -328,6 +369,39 @@ def run_design_slim(
|
|
|
328
369
|
if not new_seq:
|
|
329
370
|
logger.error("No minimal-change codon for %s->%s", wt_aa, mut_aa)
|
|
330
371
|
raise ValueError(f"No minimal-change codon for {wt_aa}->{mut_aa}")
|
|
372
|
+
elif m_lib:
|
|
373
|
+
wt_aa, pos_str, degenerate_codon = m_lib.groups()
|
|
374
|
+
pos = int(pos_str)
|
|
375
|
+
degenerate_codon = degenerate_codon.upper()
|
|
376
|
+
|
|
377
|
+
# Validate the degenerate codon
|
|
378
|
+
if not is_valid_degenerate_codon(degenerate_codon):
|
|
379
|
+
raise ValueError(f"Invalid degenerate codon: {degenerate_codon}")
|
|
380
|
+
|
|
381
|
+
region_start = gene_offset + (pos - 1) * 3
|
|
382
|
+
old_len = 3
|
|
383
|
+
|
|
384
|
+
# Validate WT amino acid (same as substitution validation)
|
|
385
|
+
wt_codon = full_seq[region_start : region_start + 3]
|
|
386
|
+
translated = translate_codon(wt_codon)
|
|
387
|
+
if translated != wt_aa:
|
|
388
|
+
logger.error(
|
|
389
|
+
"Expected %s but found %s at codon %s for mutation %s",
|
|
390
|
+
wt_aa, translated, wt_codon, mutation,
|
|
391
|
+
)
|
|
392
|
+
raise ValueError(
|
|
393
|
+
f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
new_seq = degenerate_codon
|
|
397
|
+
|
|
398
|
+
# Log library coverage info
|
|
399
|
+
expanded_codons = expand_degenerate_sequence(degenerate_codon)
|
|
400
|
+
unique_aas = set(translate_codon(c) for c in expanded_codons if translate_codon(c) != '?')
|
|
401
|
+
logger.info(
|
|
402
|
+
"Library mutation %s: %d possible codons, %d amino acids",
|
|
403
|
+
mutation, len(expanded_codons), len(unique_aas)
|
|
404
|
+
)
|
|
331
405
|
else:
|
|
332
406
|
logger.error("Unknown mutation format: %s", mutation)
|
|
333
407
|
raise ValueError(f"Unknown mutation format: {mutation}")
|
uht_tooling/workflows/gui.py
CHANGED
|
@@ -22,6 +22,7 @@ except ImportError as exc: # pragma: no cover - handled at runtime
|
|
|
22
22
|
) from exc
|
|
23
23
|
|
|
24
24
|
from uht_tooling.workflows.design_gibson import run_design_gibson
|
|
25
|
+
from uht_tooling.workflows.design_kld import run_design_kld
|
|
25
26
|
from uht_tooling.workflows.design_slim import run_design_slim
|
|
26
27
|
from uht_tooling.workflows.mut_rate import run_ep_library_profile
|
|
27
28
|
from uht_tooling.workflows.mutation_caller import run_mutation_caller
|
|
@@ -187,6 +188,47 @@ def run_gui_design_slim(
|
|
|
187
188
|
_clean_temp_path(locals().get("output_dir", Path()))
|
|
188
189
|
|
|
189
190
|
|
|
191
|
+
def run_gui_design_kld(
|
|
192
|
+
template_gene_content: str,
|
|
193
|
+
context_content: str,
|
|
194
|
+
mutations_text: str,
|
|
195
|
+
) -> Tuple[str, Optional[str]]:
|
|
196
|
+
try:
|
|
197
|
+
gene_seq = _ensure_text(template_gene_content, "Template gene sequence")
|
|
198
|
+
context_seq = _ensure_text(context_content, "Context sequence")
|
|
199
|
+
mutation_lines = [line.strip() for line in mutations_text.splitlines() if line.strip()]
|
|
200
|
+
if not mutation_lines:
|
|
201
|
+
raise ValueError("Provide at least one mutation (e.g., A123G).")
|
|
202
|
+
|
|
203
|
+
work_dir = Path(tempfile.mkdtemp(prefix="uht_gui_kld_work_"))
|
|
204
|
+
output_dir = Path(tempfile.mkdtemp(prefix="uht_gui_kld_out_"))
|
|
205
|
+
|
|
206
|
+
gene_fasta = work_dir / "template_gene.fasta"
|
|
207
|
+
context_fasta = work_dir / "context.fasta"
|
|
208
|
+
mutations_csv = work_dir / "mutations.csv"
|
|
209
|
+
|
|
210
|
+
gene_fasta.write_text(f">template\n{gene_seq}\n")
|
|
211
|
+
context_fasta.write_text(f">context\n{context_seq}\n")
|
|
212
|
+
mutations_csv.write_text("mutations\n" + "\n".join(mutation_lines) + "\n")
|
|
213
|
+
|
|
214
|
+
result_csv = run_design_kld(
|
|
215
|
+
gene_fasta=gene_fasta,
|
|
216
|
+
context_fasta=context_fasta,
|
|
217
|
+
mutations_csv=mutations_csv,
|
|
218
|
+
output_dir=output_dir,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
summary = _format_header("KLD Primer Design") + _preview_csv(result_csv)
|
|
222
|
+
archive = _zip_paths([output_dir], "kld")
|
|
223
|
+
return summary, str(archive)
|
|
224
|
+
except Exception as exc: # pragma: no cover
|
|
225
|
+
_LOGGER.exception("KLD GUI failure")
|
|
226
|
+
return f"⚠️ Error: {exc}", None
|
|
227
|
+
finally:
|
|
228
|
+
_clean_temp_path(locals().get("work_dir", Path()))
|
|
229
|
+
_clean_temp_path(locals().get("output_dir", Path()))
|
|
230
|
+
|
|
231
|
+
|
|
190
232
|
def run_gui_design_gibson(
|
|
191
233
|
template_gene_content: str,
|
|
192
234
|
context_content: str,
|
|
@@ -566,8 +608,8 @@ def create_gui() -> gr.Blocks:
|
|
|
566
608
|
with gr.Tab("Nextera XT"): # --- Nextera ---
|
|
567
609
|
gr.Markdown(
|
|
568
610
|
textwrap.dedent(
|
|
569
|
-
|
|
570
|
-
|
|
611
|
+
"""
|
|
612
|
+
### Illumina-Compatible Primer Design
|
|
571
613
|
Generates Nextera XT-ready primers from forward/reverse binding regions. The workflow preloads 12 i5 and 12 i7 indices (144 combinations) and mirrors the “One-PCR-to-flowcell” process described in the README.
|
|
572
614
|
|
|
573
615
|
**Inputs**
|
|
@@ -577,7 +619,7 @@ def create_gui() -> gr.Blocks:
|
|
|
577
619
|
**Outputs**
|
|
578
620
|
- CSV with i5/i7 indices, primer sequences, and ordering-ready metadata.
|
|
579
621
|
- Run log noting index selection and any validation warnings.
|
|
580
|
-
|
|
622
|
+
"""
|
|
581
623
|
)
|
|
582
624
|
)
|
|
583
625
|
forward = gr.Textbox(label="Forward primer (5'→3')")
|
|
@@ -599,24 +641,25 @@ def create_gui() -> gr.Blocks:
|
|
|
599
641
|
- Confirm primer depletion via electrophoresis (e.g., BioAnalyzer) before sequencing prep.
|
|
600
642
|
"""
|
|
601
643
|
)
|
|
602
|
-
|
|
644
|
+
)
|
|
603
645
|
|
|
604
646
|
with gr.Tab("SLIM"):
|
|
605
647
|
gr.Markdown(
|
|
606
648
|
textwrap.dedent(
|
|
607
|
-
|
|
608
|
-
|
|
649
|
+
"""
|
|
650
|
+
### Sequence-Ligation Independent Mutagenesis
|
|
609
651
|
Designs paired short/long primers to introduce targeted mutations by SLIM cloning, matching the workflow outlined in the README.
|
|
610
652
|
|
|
611
653
|
**Inputs**
|
|
612
654
|
- Target gene coding sequence (FASTA content).
|
|
613
655
|
- Plasmid or genomic context containing the gene.
|
|
614
656
|
- Mutations (one per line, e.g. substitution `A123G`, deletion `T241Del`, insertion `T241TS`).
|
|
657
|
+
- Library codons are supported via `AApos:COD` syntax (e.g. `R57:NNK`).
|
|
615
658
|
|
|
616
659
|
**Outputs**
|
|
617
660
|
- `SLIM_primers.csv` with primer sequences and annealing temperatures.
|
|
618
661
|
- Log file capturing primer QC and any design warnings.
|
|
619
|
-
|
|
662
|
+
"""
|
|
620
663
|
)
|
|
621
664
|
)
|
|
622
665
|
slim_gene = gr.Textbox(label="Gene sequence", lines=4)
|
|
@@ -640,13 +683,44 @@ def create_gui() -> gr.Blocks:
|
|
|
640
683
|
4. Transform directly into NEB 5-alpha or BL21 (DE3); the method scales to dozens of mutants simultaneously.
|
|
641
684
|
"""
|
|
642
685
|
)
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
with gr.Tab("KLD"):
|
|
689
|
+
gr.Markdown(
|
|
690
|
+
textwrap.dedent(
|
|
691
|
+
"""
|
|
692
|
+
### KLD (Inverse PCR) Primer Design
|
|
693
|
+
Designs inverse-PCR primers for KLD cloning. Forward primers carry the mutation at the 5' end, and reverse primers bind upstream to re-amplify the full plasmid.
|
|
694
|
+
|
|
695
|
+
**Inputs**
|
|
696
|
+
- Target gene coding sequence (FASTA content).
|
|
697
|
+
- Plasmid or genomic context containing the gene.
|
|
698
|
+
- Mutations (one per line, e.g. substitution `A123G`, deletion `T241Del`, insertion `T241TS`).
|
|
699
|
+
- Library codons are supported via `AApos:COD` syntax (e.g. `R57:NNK`).
|
|
700
|
+
|
|
701
|
+
**Outputs**
|
|
702
|
+
- `KLD_primers.csv` with primer sequences and annealing temperatures.
|
|
703
|
+
- Log file capturing primer QC and any design warnings.
|
|
704
|
+
"""
|
|
643
705
|
)
|
|
706
|
+
)
|
|
707
|
+
kld_gene = gr.Textbox(label="Gene sequence", lines=4)
|
|
708
|
+
kld_context = gr.Textbox(label="Plasmid context", lines=4)
|
|
709
|
+
kld_mutations = gr.Textbox(label="Mutations (one per line)", lines=6)
|
|
710
|
+
kld_btn = gr.Button("Design KLD primers", variant="primary")
|
|
711
|
+
kld_summary = gr.Markdown(label="Summary")
|
|
712
|
+
kld_download = gr.File(label="Download primers", file_count="single")
|
|
713
|
+
kld_btn.click(
|
|
714
|
+
fn=run_gui_design_kld,
|
|
715
|
+
inputs=[kld_gene, kld_context, kld_mutations],
|
|
716
|
+
outputs=[kld_summary, kld_download],
|
|
717
|
+
)
|
|
644
718
|
|
|
645
719
|
with gr.Tab("Gibson"):
|
|
646
720
|
gr.Markdown(
|
|
647
721
|
textwrap.dedent(
|
|
648
|
-
|
|
649
|
-
|
|
722
|
+
"""
|
|
723
|
+
### Gibson Assembly Primer Design
|
|
650
724
|
Plans primer sets and assembly steps for Gibson mutagenesis, supporting multi-mutation constructs using the `+` syntax (e.g. `A123G+T150A`).
|
|
651
725
|
|
|
652
726
|
**Inputs**
|
|
@@ -658,7 +732,7 @@ def create_gui() -> gr.Blocks:
|
|
|
658
732
|
- Primer CSV with overlap sequences and melting temperatures.
|
|
659
733
|
- Assembly plan CSV detailing fragment combinations.
|
|
660
734
|
- Log summarising design decisions and any warnings about overlapping regions.
|
|
661
|
-
|
|
735
|
+
"""
|
|
662
736
|
)
|
|
663
737
|
)
|
|
664
738
|
gibson_gene = gr.Textbox(label="Gene sequence", lines=4)
|
|
@@ -681,13 +755,13 @@ def create_gui() -> gr.Blocks:
|
|
|
681
755
|
- When replacing entire codons (e.g. `L46GP`), ensure the plasmid context covers both flanks to maintain overlap.
|
|
682
756
|
"""
|
|
683
757
|
)
|
|
684
|
-
|
|
758
|
+
)
|
|
685
759
|
|
|
686
760
|
with gr.Tab("Mutation Caller"):
|
|
687
761
|
gr.Markdown(
|
|
688
762
|
textwrap.dedent(
|
|
689
|
-
|
|
690
|
-
|
|
763
|
+
"""
|
|
764
|
+
### Long-read Mutation Analysis
|
|
691
765
|
Extracts coding regions bounded by user-defined flanks, aligns them to the template, and reports amino-acid substitutions alongside co-occurrence summaries.
|
|
692
766
|
|
|
693
767
|
**Required inputs**
|
|
@@ -695,8 +769,8 @@ def create_gui() -> gr.Blocks:
|
|
|
695
769
|
- Template FASTA: coding sequence used as the reference for alignment.
|
|
696
770
|
- Flank sequences: short 8–12 bp motifs immediately upstream and downstream of the gene.
|
|
697
771
|
- Gene length bounds: acceptable size window (in nucleotides) for the extracted gene segment.
|
|
698
|
-
|
|
699
|
-
|
|
772
|
+
"""
|
|
773
|
+
)
|
|
700
774
|
)
|
|
701
775
|
with gr.Row():
|
|
702
776
|
mc_fastq = gr.File(
|
|
@@ -753,12 +827,12 @@ def create_gui() -> gr.Blocks:
|
|
|
753
827
|
- Outputs mirror the CLI version: per-sample directories with CSV summaries, JSON co-occurrence graphs, QC plots, and a detailed `run.log`.
|
|
754
828
|
"""
|
|
755
829
|
)
|
|
756
|
-
|
|
830
|
+
)
|
|
757
831
|
|
|
758
832
|
with gr.Tab("UMI Hunter"):
|
|
759
833
|
gr.Markdown(
|
|
760
834
|
textwrap.dedent(
|
|
761
|
-
|
|
835
|
+
"""
|
|
762
836
|
### UMI–Gene Pair Clustering
|
|
763
837
|
Detects UMI barcodes, extracts paired gene inserts, clusters reads by UMI identity, and emits consensus sequences with abundance tables.
|
|
764
838
|
|
|
@@ -768,8 +842,8 @@ def create_gui() -> gr.Blocks:
|
|
|
768
842
|
- UMI and gene flank sequences marking the barcode and insert boundaries.
|
|
769
843
|
- UMI length bounds plus clustering thresholds.
|
|
770
844
|
- Minimum reads per cluster to keep (clusters below the threshold are reported but no consensus is generated).
|
|
771
|
-
|
|
772
|
-
|
|
845
|
+
"""
|
|
846
|
+
)
|
|
773
847
|
)
|
|
774
848
|
with gr.Row():
|
|
775
849
|
umi_fastq = gr.File(
|
|
@@ -862,19 +936,19 @@ def create_gui() -> gr.Blocks:
|
|
|
862
936
|
- Outputs include per-sample summaries, consensus FASTA files, cluster membership tables, QC plots, and logs mirroring the CLI workflow.
|
|
863
937
|
"""
|
|
864
938
|
)
|
|
865
|
-
|
|
939
|
+
)
|
|
866
940
|
|
|
867
941
|
with gr.Tab("Profile Inserts"):
|
|
868
942
|
gr.Markdown(
|
|
869
943
|
textwrap.dedent(
|
|
870
|
-
|
|
944
|
+
"""
|
|
871
945
|
### Probe-Guided Insert Profiling
|
|
872
946
|
Characterises inserts demarcated by user-supplied upstream/downstream probes, extracts sequences, and produces QC plots plus summary tables.
|
|
873
947
|
|
|
874
948
|
**Required inputs**
|
|
875
949
|
- FASTQ reads containing the inserts of interest.
|
|
876
950
|
- One or more probe pairs: 5'→3' sequences for the upstream and downstream anchors (reverse complements are matched automatically).
|
|
877
|
-
|
|
951
|
+
"""
|
|
878
952
|
)
|
|
879
953
|
)
|
|
880
954
|
probes_table = gr.Dataframe(
|
|
@@ -916,13 +990,13 @@ def create_gui() -> gr.Blocks:
|
|
|
916
990
|
- Logs are stored alongside the results so runs remain fully reproducible.
|
|
917
991
|
"""
|
|
918
992
|
)
|
|
919
|
-
|
|
993
|
+
)
|
|
920
994
|
|
|
921
995
|
with gr.Tab("EP Library Profile"):
|
|
922
996
|
gr.Markdown(
|
|
923
997
|
textwrap.dedent(
|
|
924
|
-
|
|
925
|
-
|
|
998
|
+
"""
|
|
999
|
+
### Library Profiling Without UMIs
|
|
926
1000
|
Estimates background and target mutation rates for enzyme evolution libraries without UMI barcodes.
|
|
927
1001
|
|
|
928
1002
|
**Inputs**
|
|
@@ -934,7 +1008,7 @@ def create_gui() -> gr.Blocks:
|
|
|
934
1008
|
- Per-sample directories with coverage tables, mutation rate statistics, and QC plots.
|
|
935
1009
|
- `master_summary.txt` aggregating condition-level metrics.
|
|
936
1010
|
- Verbose logs recording alignment commands and rate calculations.
|
|
937
|
-
|
|
1011
|
+
"""
|
|
938
1012
|
)
|
|
939
1013
|
)
|
|
940
1014
|
ep_fastq = gr.File(
|
|
@@ -963,7 +1037,7 @@ def create_gui() -> gr.Blocks:
|
|
|
963
1037
|
- Download the archive to inspect per-sample plots, TSV summaries, the consensus summary, and logs for troubleshooting.
|
|
964
1038
|
"""
|
|
965
1039
|
)
|
|
966
|
-
|
|
1040
|
+
)
|
|
967
1041
|
|
|
968
1042
|
gr.Markdown(
|
|
969
1043
|
textwrap.dedent(
|
|
@@ -566,10 +566,10 @@ def compute_consensus_aa_mutation(
|
|
|
566
566
|
) -> Tuple[Optional[dict], List[dict]]:
|
|
567
567
|
"""
|
|
568
568
|
Derive a consensus amino-acid mutation estimate across Q-score thresholds.
|
|
569
|
-
|
|
569
|
+
|
|
570
570
|
Each threshold must meet a minimum coverage requirement. The consensus is a
|
|
571
571
|
precision-weighted average (weights = 1 / std_aa_mutations).
|
|
572
|
-
|
|
572
|
+
|
|
573
573
|
Returns:
|
|
574
574
|
consensus_info (dict or None)
|
|
575
575
|
{
|
|
@@ -648,7 +648,7 @@ def compute_consensus_aa_mutation(
|
|
|
648
648
|
consensus_std,
|
|
649
649
|
thresholds,
|
|
650
650
|
)
|
|
651
|
-
|
|
651
|
+
|
|
652
652
|
return consensus_info, valid_results
|
|
653
653
|
|
|
654
654
|
def create_simple_qc_plots(quality_thresholds, qc_results, results_dir, consensus_info=None):
|
|
@@ -2170,12 +2170,12 @@ def run_main_analysis_for_qscore(fastq_path, qscore, qscore_desc, sample_name, w
|
|
|
2170
2170
|
color="gray",
|
|
2171
2171
|
transform=ax3.transAxes,
|
|
2172
2172
|
)
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2173
|
+
|
|
2174
|
+
ax3.set_title("AA Mutation Distribution", fontsize=14, fontweight='bold')
|
|
2175
|
+
ax3.set_xlabel("Number of AA Mutations", fontsize=12)
|
|
2176
|
+
ax3.set_ylabel("Density", fontsize=12)
|
|
2177
|
+
ax3.spines['top'].set_visible(False)
|
|
2178
|
+
ax3.spines['right'].set_visible(False)
|
|
2179
2179
|
|
|
2180
2180
|
# Save the combined figure as both PNG and PDF
|
|
2181
2181
|
panel_path_png = os.path.join(qscore_results_dir, "summary_panels.png")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: uht-tooling
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.9
|
|
4
4
|
Summary: Tooling for ultra-high throughput screening workflows.
|
|
5
5
|
Author: Matt115A
|
|
6
|
-
License: MIT
|
|
6
|
+
License-Expression: MIT
|
|
7
7
|
Requires-Python: >=3.8
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: biopython==1.85
|
|
@@ -82,6 +82,7 @@ Each command mirrors a workflow module. Common entry points:
|
|
|
82
82
|
| --- | --- |
|
|
83
83
|
| `uht-tooling nextera-primers` | Generate Nextera XT primer pairs from a binding-region CSV. |
|
|
84
84
|
| `uht-tooling design-slim` | Design SLIM mutagenesis primers from FASTA/CSV inputs. |
|
|
85
|
+
| `uht-tooling design-kld` | Design KLD (inverse PCR) mutagenesis primers. |
|
|
85
86
|
| `uht-tooling design-gibson` | Produce Gibson mutagenesis primers and assembly plans. |
|
|
86
87
|
| `uht-tooling mutation-caller` | Summarise amino-acid substitutions from long-read FASTQ files. |
|
|
87
88
|
| `uht-tooling umi-hunter` | Cluster UMIs and call consensus genes. |
|
|
@@ -141,6 +142,46 @@ Mutation nomenclature examples:
|
|
|
141
142
|
- `T241Del` (deletion)
|
|
142
143
|
- `T241TS` (insert Ser after Thr241)
|
|
143
144
|
- `L46GP` (replace Leu46 with Gly-Pro)
|
|
145
|
+
- `A123:NNK` (library mutation with degenerate codon)
|
|
146
|
+
|
|
147
|
+
#### Library mutations with degenerate codons
|
|
148
|
+
|
|
149
|
+
For saturation mutagenesis and library generation, SLIM supports degenerate (IUPAC ambiguity) codons using the format `<WT_AA><position>:<codon>`. The codon must be exactly 3 characters using valid IUPAC nucleotide codes:
|
|
150
|
+
|
|
151
|
+
| Code | Bases | Mnemonic |
|
|
152
|
+
|------|-------|----------|
|
|
153
|
+
| A, C, G, T | Single base | Standard |
|
|
154
|
+
| R | A, G | puRine |
|
|
155
|
+
| Y | C, T | pYrimidine |
|
|
156
|
+
| S | G, C | Strong |
|
|
157
|
+
| W | A, T | Weak |
|
|
158
|
+
| K | G, T | Keto |
|
|
159
|
+
| M | A, C | aMino |
|
|
160
|
+
| B | C, G, T | not A |
|
|
161
|
+
| D | A, G, T | not C |
|
|
162
|
+
| H | A, C, T | not G |
|
|
163
|
+
| V | A, C, G | not T |
|
|
164
|
+
| N | A, C, G, T | aNy |
|
|
165
|
+
|
|
166
|
+
Common degenerate codon schemes for library construction:
|
|
167
|
+
|
|
168
|
+
| Scheme | Codons | Amino acids | Stop codons | Notes |
|
|
169
|
+
|--------|--------|-------------|-------------|-------|
|
|
170
|
+
| NNK | 32 | 20 | 1 (TAG) | Reduced stop codon frequency |
|
|
171
|
+
| NNS | 32 | 20 | 1 (TAG) | Equivalent to NNK |
|
|
172
|
+
| NNN | 64 | 20 | 3 | All codons, higher stop frequency |
|
|
173
|
+
| NDT | 12 | 12 | 0 | F, L, I, V, Y, H, N, D, C, R, S, G only |
|
|
174
|
+
|
|
175
|
+
Example CSV with mixed mutation types:
|
|
176
|
+
```csv
|
|
177
|
+
mutations
|
|
178
|
+
A123G
|
|
179
|
+
T50:NNK
|
|
180
|
+
S100:NNS
|
|
181
|
+
T241Del
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
The workflow validates that the wild-type amino acid matches the template sequence and logs library coverage information (number of possible codons and amino acids) for each degenerate mutation. Primers are generated with the degenerate bases embedded; reverse primers contain the correct IUPAC reverse complements (e.g., K↔M, R↔Y, S↔S).
|
|
144
185
|
|
|
145
186
|
#### Experimental blueprint
|
|
146
187
|
|
|
@@ -149,6 +190,52 @@ Mutation nomenclature examples:
|
|
|
149
190
|
- Combine 10 µL from each PCR with 10 µL H-buffer (150 mM Tris pH 8, 400 mM NaCl, 60 mM EDTA) for a 30 µL annealing reaction: 99 °C for 3 min, then two cycles of 65 °C for 5 min followed by 30 °C for 15 min, hold at 4 °C.
|
|
150
191
|
- Transform directly into NEB 5-alpha or BL21 (DE3) cells without additional cleanup. The protocol has been validated for simultaneous introduction of dozens of mutations.
|
|
151
192
|
|
|
193
|
+
### KLD primer design
|
|
194
|
+
|
|
195
|
+
KLD (Kinase-Ligation-DpnI) is an alternative mutagenesis method using inverse PCR to amplify the entire plasmid with mutations incorporated at the primer junction.
|
|
196
|
+
|
|
197
|
+
- Inputs: Same as SLIM design
|
|
198
|
+
- `data/design_kld/kld_template_gene.fasta`
|
|
199
|
+
- `data/design_kld/kld_context.fasta`
|
|
200
|
+
- `data/design_kld/kld_target_mutations.csv` (single `mutations` column)
|
|
201
|
+
- Run:
|
|
202
|
+
```bash
|
|
203
|
+
uht-tooling design-kld \
|
|
204
|
+
--gene-fasta data/design_kld/kld_template_gene.fasta \
|
|
205
|
+
--context-fasta data/design_kld/kld_context.fasta \
|
|
206
|
+
--mutations-csv data/design_kld/kld_target_mutations.csv \
|
|
207
|
+
--output-dir results/design_kld/
|
|
208
|
+
```
|
|
209
|
+
- Output: `results/design_kld/KLD_primers.csv` plus logs.
|
|
210
|
+
|
|
211
|
+
Mutation nomenclature: Same as SLIM (substitution, deletion, insertion, indel, library).
|
|
212
|
+
|
|
213
|
+
#### KLD vs SLIM
|
|
214
|
+
|
|
215
|
+
| Method | Primers | Mechanism | Best for |
|
|
216
|
+
|--------|---------|-----------|----------|
|
|
217
|
+
| SLIM | 4 per mutation | Overlap assembly | Multiple simultaneous mutations |
|
|
218
|
+
| KLD | 2 per mutation | Inverse PCR + ligation | Single mutations, simpler workflow |
|
|
219
|
+
|
|
220
|
+
#### KLD primer design rules
|
|
221
|
+
|
|
222
|
+
- Forward primer: Mutation codon at 5' end + downstream template-binding region
|
|
223
|
+
- Reverse primer: Reverse complement of upstream region, 5' end adjacent to forward
|
|
224
|
+
- Tm calculated on template-binding regions only (50-65°C target)
|
|
225
|
+
- Tm difference between primers kept within 5°C
|
|
226
|
+
- GC content 40-60%
|
|
227
|
+
- Binding region 18-24 bp
|
|
228
|
+
|
|
229
|
+
#### Experimental workflow
|
|
230
|
+
|
|
231
|
+
1. PCR amplify entire plasmid with KLD primer pair
|
|
232
|
+
2. DpnI digest to remove methylated template
|
|
233
|
+
3. T4 PNK phosphorylation of 5' ends
|
|
234
|
+
4. T4 DNA ligase to circularize
|
|
235
|
+
5. Transform into competent cells
|
|
236
|
+
|
|
237
|
+
NEB sells a KLD Enzyme Mix (M0554) that combines these steps.
|
|
238
|
+
|
|
152
239
|
### Gibson assembly primers
|
|
153
240
|
|
|
154
241
|
- Inputs mirror the SLIM workflow but use `data/design_gibson/`.
|
|
@@ -253,12 +340,13 @@ Key points:
|
|
|
253
340
|
### Tabs and capabilities
|
|
254
341
|
|
|
255
342
|
1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
|
|
256
|
-
2. **SLIM** – template/context FASTA text areas plus mutation list.
|
|
257
|
-
3. **
|
|
258
|
-
4. **
|
|
259
|
-
5. **
|
|
260
|
-
6. **
|
|
261
|
-
7. **
|
|
343
|
+
2. **SLIM** – template/context FASTA text areas plus mutation list (supports library codons like `R57:NNK`).
|
|
344
|
+
3. **KLD** – inverse-PCR primer design using the same mutation list format (including library codons like `R57:NNK`).
|
|
345
|
+
4. **Gibson** – multi-mutation support using `+` syntax.
|
|
346
|
+
5. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
|
|
347
|
+
6. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
|
|
348
|
+
7. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
|
|
349
|
+
8. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
|
|
262
350
|
|
|
263
351
|
### Workflow tips
|
|
264
352
|
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
uht_tooling/__init__.py,sha256=hf0tJaa4_9y9aYb8OB1FtJh1FOuX08dQ6_MCveWFNAc,242
|
|
2
|
-
uht_tooling/cli.py,sha256=
|
|
2
|
+
uht_tooling/cli.py,sha256=3QUxYBFqhQyeZ9xM_JTlqhr_UJhb_PRj7Y_UMH5Tslc,14366
|
|
3
3
|
uht_tooling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
uht_tooling/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
uht_tooling/workflows/design_gibson.py,sha256=SQEThq6dxPMPCsUrwqMUaG5I-diE9jUXPRii9Y7O_7U,13617
|
|
6
|
-
uht_tooling/workflows/
|
|
7
|
-
uht_tooling/workflows/
|
|
8
|
-
uht_tooling/workflows/
|
|
6
|
+
uht_tooling/workflows/design_kld.py,sha256=SWbKVfi1JgJ7cN9TU3dLEiYmZT7LQiGL_mUZ-n3PdzE,27368
|
|
7
|
+
uht_tooling/workflows/design_slim.py,sha256=wGXnmaJCzlAZTjf2SRupwt_3MBl5cgZr1O9nnMQyoGo,17767
|
|
8
|
+
uht_tooling/workflows/gui.py,sha256=FpzxgjOo8SQCPJRM7ltVLk3bcwZ_AxjQzZxwz7J_c1M,46436
|
|
9
|
+
uht_tooling/workflows/mut_rate.py,sha256=Sv4OU68RNTOOsKV0QSbJ7FOgxh3vQeUeib_5mrXqyHg,109074
|
|
9
10
|
uht_tooling/workflows/mutation_caller.py,sha256=BczuNATOSUcmlw-x6qTzEQfW8MBbvGclEyqiQiBX0cg,16222
|
|
10
11
|
uht_tooling/workflows/nextera_designer.py,sha256=8MZ_DyQ0JwPojXH5mZ6bAGAkqki_0qQGac45T_Ll8FQ,6170
|
|
11
12
|
uht_tooling/workflows/profile_inserts.py,sha256=C-SZ10YefiV_4QZbo1oEkI4qYipwaYqPP5jF-MC5O58,16947
|
|
12
13
|
uht_tooling/workflows/umi_hunter.py,sha256=baycWycqVzUfMp5u2WZdHRl0sNuykTjy-iqtj5ahucU,15075
|
|
13
|
-
uht_tooling-0.1.
|
|
14
|
-
uht_tooling-0.1.
|
|
15
|
-
uht_tooling-0.1.
|
|
16
|
-
uht_tooling-0.1.
|
|
17
|
-
uht_tooling-0.1.
|
|
14
|
+
uht_tooling-0.1.9.dist-info/METADATA,sha256=mMC92ln1dMYhDQFlKRfBsMMsMVvy0p0LDY8s6aX-4Ig,16399
|
|
15
|
+
uht_tooling-0.1.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
+
uht_tooling-0.1.9.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
|
|
17
|
+
uht_tooling-0.1.9.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
|
|
18
|
+
uht_tooling-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|