uht-tooling 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
uht_tooling/cli.py CHANGED
@@ -4,6 +4,7 @@ from typing import Optional
4
4
  import typer
5
5
 
6
6
  from uht_tooling.workflows.design_gibson import run_design_gibson
7
+ from uht_tooling.workflows.design_kld import run_design_kld
7
8
  from uht_tooling.workflows.design_slim import run_design_slim
8
9
  from uht_tooling.workflows.mutation_caller import (
9
10
  expand_fastq_inputs as expand_fastq_inputs_mutation,
@@ -66,6 +67,45 @@ def design_slim_command(
66
67
  typer.echo(f"SLIM primers written to {output_dir / 'SLIM_primers.csv'}")
67
68
 
68
69
 
70
+ @app.command("design-kld", help="Design KLD (inverse PCR) primers from user-specified FASTA/CSV inputs.")
71
+ def design_kld_command(
72
+ gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
73
+ context_fasta: Path = typer.Option(
74
+ ...,
75
+ exists=True,
76
+ readable=True,
77
+ help="Path to the context FASTA file containing the plasmid or genomic sequence.",
78
+ ),
79
+ mutations_csv: Path = typer.Option(
80
+ ...,
81
+ exists=True,
82
+ readable=True,
83
+ help="CSV file containing a 'mutations' column with the desired edits.",
84
+ ),
85
+ output_dir: Path = typer.Option(
86
+ ...,
87
+ dir_okay=True,
88
+ writable=True,
89
+ help="Directory where results will be written.",
90
+ ),
91
+ log_path: Optional[Path] = typer.Option(
92
+ None,
93
+ dir_okay=False,
94
+ writable=True,
95
+ help="Optional path to write a dedicated log file for this run.",
96
+ ),
97
+ ):
98
+ """Design KLD (inverse PCR) primers from user-provided inputs."""
99
+ result_path = run_design_kld(
100
+ gene_fasta=gene_fasta,
101
+ context_fasta=context_fasta,
102
+ mutations_csv=mutations_csv,
103
+ output_dir=output_dir,
104
+ log_path=log_path,
105
+ )
106
+ typer.echo(f"KLD primers written to {result_path}")
107
+
108
+
69
109
  @app.command("nextera-primers", help="Generate Nextera XT primers from binding region CSV input.")
70
110
  def nextera_primers_command(
71
111
  binding_csv: Path = typer.Option(
@@ -0,0 +1,687 @@
1
+ """KLD (Kinase-Ligation-DpnI) primer design for inverse PCR mutagenesis."""
2
+
3
+ import argparse
4
+ import csv
5
+ import logging
6
+ import re
7
+ from pathlib import Path
8
+ from typing import List, Optional, Tuple
9
+
10
+ import pandas as pd
11
+ from Bio import SeqIO
12
+ from Bio.Seq import Seq
13
+ from Bio.SeqUtils import MeltingTemp as mt
14
+
15
+ # KLD primer design parameters
16
+ MIN_TM = 50.0 # Minimum melting temperature
17
+ MAX_TM = 65.0 # Maximum melting temperature
18
+ TM_DIFF_THRESHOLD = 5.0 # Max Tm difference between F & R
19
+ MIN_LENGTH = 18 # Minimum primer length (template-binding region)
20
+ MAX_LENGTH = 24 # Maximum primer length (template-binding region)
21
+ MIN_GC = 40.0 # Minimum GC content %
22
+ MAX_GC = 60.0 # Maximum GC content %
23
+ MIN_BINDING = 10 # Minimum template-binding region
24
+
25
+ # IUPAC ambiguity codes mapping
26
+ IUPAC_AMBIGUITY = {
27
+ 'A': ['A'], 'C': ['C'], 'G': ['G'], 'T': ['T'],
28
+ 'R': ['A', 'G'], # puRine
29
+ 'Y': ['C', 'T'], # pYrimidine
30
+ 'S': ['G', 'C'], # Strong
31
+ 'W': ['A', 'T'], # Weak
32
+ 'K': ['G', 'T'], # Keto
33
+ 'M': ['A', 'C'], # aMino
34
+ 'B': ['C', 'G', 'T'], # not A
35
+ 'D': ['A', 'G', 'T'], # not C
36
+ 'H': ['A', 'C', 'T'], # not G
37
+ 'V': ['A', 'C', 'G'], # not T
38
+ 'N': ['A', 'C', 'G', 'T'],
39
+ }
40
+
41
+ VALID_DEGENERATE_BASES = set(IUPAC_AMBIGUITY.keys())
42
+
43
+
44
+ def is_valid_degenerate_codon(codon: str) -> bool:
45
+ """Check if a codon contains only valid IUPAC nucleotide codes."""
46
+ return len(codon) == 3 and all(b.upper() in VALID_DEGENERATE_BASES for b in codon)
47
+
48
+
49
+ def contains_degenerate_bases(seq: str) -> bool:
50
+ """Return True if sequence contains non-standard (degenerate) bases."""
51
+ return any(b.upper() not in {'A', 'C', 'G', 'T'} for b in seq)
52
+
53
+
54
+ def expand_degenerate_sequence(seq: str) -> List[str]:
55
+ """Expand a degenerate sequence to all possible standard sequences."""
56
+ from itertools import product
57
+ possibilities = [IUPAC_AMBIGUITY.get(b.upper(), [b]) for b in seq]
58
+ return [''.join(combo) for combo in product(*possibilities)]
59
+
60
+
61
+ def codon_table():
62
+ return {
63
+ "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
64
+ "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
65
+ "TAT": "Y", "TAC": "Y", "TAA": "*", "TAG": "*",
66
+ "TGT": "C", "TGC": "C", "TGA": "*", "TGG": "W",
67
+ "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
68
+ "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
69
+ "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
70
+ "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
71
+ "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
72
+ "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
73
+ "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
74
+ "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
75
+ "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
76
+ "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
77
+ "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
78
+ "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
79
+ }
80
+
81
+
82
+ def translate_codon(cd: str) -> str:
83
+ """Translate a 3-nt codon to its amino acid."""
84
+ return codon_table().get(cd.upper(), "?")
85
+
86
+
87
+ def pick_mutant_codon(wt_codon: str, target_aa: str) -> Optional[str]:
88
+ """Pick the codon for target_aa that differs minimally from wt_codon."""
89
+ best_list = []
90
+ for codon, aa in codon_table().items():
91
+ if aa == target_aa:
92
+ diff = sum(a != b for a, b in zip(codon.upper(), wt_codon.upper()))
93
+ best_list.append((codon.upper(), diff))
94
+ if not best_list:
95
+ return None
96
+ best_list.sort(key=lambda x: x[1])
97
+ return best_list[0][0]
98
+
99
+
100
+ def calc_tm_binding_region(seq: str) -> float:
101
+ """Calculate Tm for template-binding region (handles degenerate bases)."""
102
+ if contains_degenerate_bases(seq):
103
+ expanded = expand_degenerate_sequence(seq)
104
+ tms = [mt.Tm_NN(s) for s in expanded]
105
+ return sum(tms) / len(tms)
106
+ return mt.Tm_NN(seq)
107
+
108
+
109
+ def calc_gc_content(seq: str) -> float:
110
+ """Calculate GC content percentage."""
111
+ seq = seq.upper()
112
+ gc = sum(1 for b in seq if b in 'GC')
113
+ return (gc / len(seq)) * 100 if seq else 0.0
114
+
115
+
116
+ def design_forward_primer(
117
+ full_seq: str,
118
+ mutation_nt_pos: int,
119
+ new_codon: str,
120
+ min_tm: float = MIN_TM,
121
+ max_tm: float = MAX_TM,
122
+ min_len: int = MIN_LENGTH,
123
+ max_len: int = MAX_LENGTH,
124
+ ) -> Tuple[str, str, float, float, int]:
125
+ """
126
+ Design forward primer with mutation at 5' end.
127
+
128
+ Structure: [MUTATED_CODON] + [WT_DOWNSTREAM_SEQUENCE]
129
+
130
+ The forward primer points downstream (→→→), with its 5' end at the mutation
131
+ site. The template-binding region is the WT sequence immediately downstream
132
+ of the mutation.
133
+
134
+ Args:
135
+ full_seq: Full plasmid/context sequence
136
+ mutation_nt_pos: Nucleotide position of mutation start (0-indexed)
137
+ new_codon: The mutated codon sequence (3 nt, may be degenerate)
138
+ min_tm: Minimum melting temperature for binding region
139
+ max_tm: Maximum melting temperature for binding region
140
+ min_len: Minimum binding region length
141
+ max_len: Maximum binding region length
142
+
143
+ Returns:
144
+ Tuple of (full_primer_seq, binding_region_seq, tm, gc, total_length)
145
+
146
+ Raises:
147
+ ValueError: If no valid binding region can be found
148
+ """
149
+ # Template-binding region starts immediately after mutation codon
150
+ # (we use len(new_codon) to handle both regular codons and deletions)
151
+ old_codon_len = 3 # Standard codon replacement
152
+ binding_start = mutation_nt_pos + old_codon_len
153
+
154
+ best_binding = None
155
+ best_tm_diff = float('inf')
156
+ target_tm = (min_tm + max_tm) / 2
157
+
158
+ for length in range(min_len, max_len + 1):
159
+ binding_end = binding_start + length
160
+ if binding_end > len(full_seq):
161
+ break
162
+
163
+ binding_seq = full_seq[binding_start:binding_end]
164
+ tm = calc_tm_binding_region(binding_seq)
165
+ gc = calc_gc_content(binding_seq)
166
+
167
+ if min_tm <= tm <= max_tm and MIN_GC <= gc <= MAX_GC:
168
+ tm_diff = abs(tm - target_tm)
169
+ if tm_diff < best_tm_diff:
170
+ best_tm_diff = tm_diff
171
+ best_binding = (binding_seq, tm, gc, length)
172
+
173
+ if not best_binding:
174
+ # Try to find any binding region that meets length constraints
175
+ for length in range(min_len, max_len + 1):
176
+ binding_end = binding_start + length
177
+ if binding_end > len(full_seq):
178
+ break
179
+ binding_seq = full_seq[binding_start:binding_end]
180
+ tm = calc_tm_binding_region(binding_seq)
181
+ gc = calc_gc_content(binding_seq)
182
+ # Relax constraints slightly
183
+ if tm >= min_tm - 5 and tm <= max_tm + 5:
184
+ best_binding = (binding_seq, tm, gc, length)
185
+ break
186
+
187
+ if not best_binding:
188
+ raise ValueError(
189
+ f"Cannot find forward primer binding region meeting constraints "
190
+ f"(pos={mutation_nt_pos}, Tm={min_tm}-{max_tm}°C, len={min_len}-{max_len}bp)"
191
+ )
192
+
193
+ binding_seq, tm, gc, length = best_binding
194
+ full_primer = new_codon + binding_seq
195
+ return full_primer, binding_seq, tm, gc, len(full_primer)
196
+
197
+
198
+ def design_reverse_primer(
199
+ full_seq: str,
200
+ mutation_nt_pos: int,
201
+ min_tm: float = MIN_TM,
202
+ max_tm: float = MAX_TM,
203
+ min_len: int = MIN_LENGTH,
204
+ max_len: int = MAX_LENGTH,
205
+ ) -> Tuple[str, float, float, int]:
206
+ """
207
+ Design reverse primer adjacent to forward primer's 5' end.
208
+
209
+ Structure: reverse_complement([WT_UPSTREAM_SEQUENCE])
210
+
211
+ The reverse primer's 5' end is at position mutation_nt_pos - 1, immediately
212
+ adjacent to the forward primer's 5' end. The primer anneals to the top
213
+ strand upstream of the mutation and points upstream (←←←).
214
+
215
+ Args:
216
+ full_seq: Full plasmid/context sequence
217
+ mutation_nt_pos: Nucleotide position of mutation start (0-indexed)
218
+ min_tm: Minimum melting temperature
219
+ max_tm: Maximum melting temperature
220
+ min_len: Minimum primer length
221
+ max_len: Maximum primer length
222
+
223
+ Returns:
224
+ Tuple of (primer_seq, tm, gc, length)
225
+
226
+ Raises:
227
+ ValueError: If no valid primer can be found
228
+ """
229
+ # Upstream region ends at mutation position (exclusive)
230
+ upstream_end = mutation_nt_pos
231
+
232
+ best_primer = None
233
+ best_tm_diff = float('inf')
234
+ target_tm = (min_tm + max_tm) / 2
235
+
236
+ for length in range(min_len, max_len + 1):
237
+ upstream_start = upstream_end - length
238
+ if upstream_start < 0:
239
+ break
240
+
241
+ upstream_seq = full_seq[upstream_start:upstream_end]
242
+ tm = calc_tm_binding_region(upstream_seq)
243
+ gc = calc_gc_content(upstream_seq)
244
+
245
+ if min_tm <= tm <= max_tm and MIN_GC <= gc <= MAX_GC:
246
+ tm_diff = abs(tm - target_tm)
247
+ if tm_diff < best_tm_diff:
248
+ best_tm_diff = tm_diff
249
+ primer_seq = str(Seq(upstream_seq).reverse_complement())
250
+ best_primer = (primer_seq, tm, gc, length)
251
+
252
+ if not best_primer:
253
+ # Try to find any primer that meets length constraints
254
+ for length in range(min_len, max_len + 1):
255
+ upstream_start = upstream_end - length
256
+ if upstream_start < 0:
257
+ break
258
+ upstream_seq = full_seq[upstream_start:upstream_end]
259
+ tm = calc_tm_binding_region(upstream_seq)
260
+ gc = calc_gc_content(upstream_seq)
261
+ # Relax constraints slightly
262
+ if tm >= min_tm - 5 and tm <= max_tm + 5:
263
+ primer_seq = str(Seq(upstream_seq).reverse_complement())
264
+ best_primer = (primer_seq, tm, gc, length)
265
+ break
266
+
267
+ if not best_primer:
268
+ raise ValueError(
269
+ f"Cannot find reverse primer meeting constraints "
270
+ f"(pos={mutation_nt_pos}, Tm={min_tm}-{max_tm}°C, len={min_len}-{max_len}bp)"
271
+ )
272
+
273
+ return best_primer
274
+
275
+
276
+ def balance_primer_tms(
277
+ fwd_result: Tuple[str, str, float, float, int],
278
+ rev_result: Tuple[str, float, float, int],
279
+ full_seq: str,
280
+ mutation_nt_pos: int,
281
+ new_codon: str,
282
+ tm_threshold: float = TM_DIFF_THRESHOLD,
283
+ ) -> Tuple[Tuple[str, str, float, float, int], Tuple[str, float, float, int]]:
284
+ """
285
+ Balance Tm between forward and reverse primers by adjusting lengths.
286
+
287
+ If the Tm difference exceeds the threshold, attempt to trim the hotter
288
+ primer's binding region from its 3' end to reduce its Tm.
289
+
290
+ Args:
291
+ fwd_result: Forward primer tuple (full_seq, binding_seq, tm, gc, length)
292
+ rev_result: Reverse primer tuple (primer_seq, tm, gc, length)
293
+ full_seq: Full plasmid/context sequence
294
+ mutation_nt_pos: Nucleotide position of mutation start
295
+ new_codon: The mutated codon sequence
296
+ tm_threshold: Maximum allowed Tm difference
297
+
298
+ Returns:
299
+ Adjusted (fwd_result, rev_result) tuples
300
+ """
301
+ fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len = fwd_result
302
+ rev_seq, rev_tm, rev_gc, rev_len = rev_result
303
+
304
+ tm_diff = abs(fwd_tm - rev_tm)
305
+
306
+ if tm_diff <= tm_threshold:
307
+ return fwd_result, rev_result
308
+
309
+ # Try to trim the hotter primer
310
+ if fwd_tm > rev_tm:
311
+ # Trim forward binding region from 3' end
312
+ while len(fwd_binding) > MIN_BINDING and fwd_tm - rev_tm > tm_threshold:
313
+ fwd_binding = fwd_binding[:-1]
314
+ fwd_tm = calc_tm_binding_region(fwd_binding)
315
+
316
+ fwd_seq = new_codon + fwd_binding
317
+ fwd_gc = calc_gc_content(fwd_seq)
318
+ fwd_len = len(fwd_seq)
319
+ fwd_result = (fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len)
320
+ else:
321
+ # Trim reverse primer from 3' end
322
+ # The reverse primer is already reverse-complemented, so we need to
323
+ # trim from the 3' end of the original upstream sequence
324
+ upstream_end = mutation_nt_pos
325
+ current_len = rev_len
326
+
327
+ while current_len > MIN_BINDING and rev_tm - fwd_tm > tm_threshold:
328
+ current_len -= 1
329
+ upstream_start = upstream_end - current_len
330
+ if upstream_start < 0:
331
+ break
332
+ upstream_seq = full_seq[upstream_start:upstream_end]
333
+ rev_tm = calc_tm_binding_region(upstream_seq)
334
+
335
+ if upstream_start >= 0:
336
+ upstream_seq = full_seq[upstream_start:upstream_end]
337
+ rev_seq = str(Seq(upstream_seq).reverse_complement())
338
+ rev_gc = calc_gc_content(rev_seq)
339
+ rev_len = len(rev_seq)
340
+ rev_result = (rev_seq, rev_tm, rev_gc, rev_len)
341
+
342
+ return fwd_result, rev_result
343
+
344
+
345
+ def run_design_kld(
346
+ gene_fasta: Path,
347
+ context_fasta: Path,
348
+ mutations_csv: Path,
349
+ output_dir: Path,
350
+ log_path: Optional[Path] = None,
351
+ logger: Optional[logging.Logger] = None,
352
+ ) -> Path:
353
+ """
354
+ Design KLD (inverse PCR) primers for mutations.
355
+
356
+ KLD cloning uses two primers per mutation that point away from each other,
357
+ amplifying the entire plasmid. The forward primer has the mutation at its
358
+ 5' end, and the reverse primer's 5' end is adjacent to the forward's.
359
+
360
+ Args:
361
+ gene_fasta: Path to FASTA file containing the gene sequence
362
+ context_fasta: Path to FASTA file containing the plasmid/context sequence
363
+ mutations_csv: CSV file with a 'mutations' column
364
+ output_dir: Directory for output files
365
+ log_path: Optional path for log file
366
+ logger: Optional logger instance
367
+
368
+ Returns:
369
+ Path to the output CSV file
370
+
371
+ Raises:
372
+ ValueError: If inputs are invalid or mutations cannot be processed
373
+ """
374
+ gene_fasta = Path(gene_fasta)
375
+ context_fasta = Path(context_fasta)
376
+ mutations_csv = Path(mutations_csv)
377
+ output_dir = Path(output_dir)
378
+ output_dir.mkdir(parents=True, exist_ok=True)
379
+
380
+ managed_logger = logger is None
381
+ if logger is None:
382
+ logger = logging.getLogger("uht_tooling.design_kld")
383
+ logger.setLevel(logging.INFO)
384
+ handler: logging.Handler
385
+ if log_path:
386
+ handler = logging.FileHandler(log_path, mode="w")
387
+ else:
388
+ handler = logging.StreamHandler()
389
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s: %(message)s"))
390
+ logger.handlers = []
391
+ logger.addHandler(handler)
392
+ logger.propagate = False
393
+
394
+ try:
395
+ # Load sequences
396
+ gene_record = next(SeqIO.parse(str(gene_fasta), "fasta"))
397
+ context_record = next(SeqIO.parse(str(context_fasta), "fasta"))
398
+ gene = str(gene_record.seq).upper()
399
+ context = str(context_record.seq).upper()
400
+ logger.info("Loaded gene (%s nt) and context (%s nt).", len(gene), len(context))
401
+
402
+ # Load mutations
403
+ df = pd.read_csv(mutations_csv)
404
+ if "mutations" not in df.columns:
405
+ raise ValueError("Mutations CSV must contain a 'mutations' column.")
406
+ mutations = df["mutations"].dropna().tolist()
407
+ logger.info("Loaded %s mutation entries.", len(mutations))
408
+
409
+ # Align gene within context
410
+ try:
411
+ gene_offset = context.index(gene)
412
+ logger.info("Gene aligned at offset %s within context.", gene_offset)
413
+ except ValueError as exc:
414
+ message = "Could not align gene within context. No perfect substring match found."
415
+ logger.error(message)
416
+ raise ValueError(message) from exc
417
+
418
+ full_seq = context
419
+
420
+ # Output file
421
+ results_path = output_dir / "KLD_primers.csv"
422
+ with results_path.open("w", newline="") as csvfile:
423
+ writer = csv.writer(csvfile)
424
+ writer.writerow([
425
+ "Primer Name", "Sequence", "Tm (binding)", "GC%", "Length", "Notes"
426
+ ])
427
+
428
+ for mutation in mutations:
429
+ try:
430
+ m = mutation
431
+ m_del = re.match(r"^([A-Z])(\d+)Del$", m)
432
+ m_indel = re.match(r"^([A-Z])(\d+)InDel([A-Z])(\d+)([A-Z]+)$", m)
433
+ m_sub = re.match(r"^([A-Z])(\d+)([A-Z])$", m)
434
+ m_ins = re.match(r"^([A-Z])(\d+)([A-Z]{2,})$", m)
435
+ m_lib = re.match(r"^([A-Z])(\d+):([A-Za-z]{3})$", m)
436
+
437
+ region_start: int
438
+ new_seq: str
439
+ notes: str = ""
440
+
441
+ if m_del:
442
+ # Deletion: remove the codon entirely
443
+ wt_aa, pos1 = m_del.group(1), int(m_del.group(2))
444
+ region_start = gene_offset + (pos1 - 1) * 3
445
+ # For deletion, new_seq is empty - forward primer starts at next codon
446
+ new_seq = ""
447
+ notes = "Deletion"
448
+
449
+ elif m_indel:
450
+ # InDel: replace range with new amino acids
451
+ wt1, pos1_s, wt2, pos2_s, ins_aa = m_indel.groups()
452
+ pos1, pos2 = int(pos1_s), int(pos2_s)
453
+ region_start = gene_offset + (pos1 - 1) * 3
454
+ wt_codon = full_seq[region_start : region_start + 3]
455
+ new_seq = ""
456
+ for aa in ins_aa:
457
+ codon = pick_mutant_codon(wt_codon, aa)
458
+ if not codon:
459
+ logger.error("No codon found for %s->%s", wt1, ins_aa)
460
+ raise ValueError(f"No codon found for {wt1}->{ins_aa}")
461
+ new_seq += codon
462
+ notes = f"InDel: {pos2 - pos1 + 1} AA -> {len(ins_aa)} AA"
463
+
464
+ elif m_ins:
465
+ # Insertion: add amino acids after position
466
+ wt_aa, pos1_s, ins_str = m_ins.groups()
467
+ pos1 = int(pos1_s)
468
+ codon_start_old = gene_offset + (pos1 - 1) * 3
469
+ wt_codon = full_seq[codon_start_old : codon_start_old + 3]
470
+ if ins_str[0] == wt_aa:
471
+ # First AA matches WT, so insert after
472
+ inserted_aas = ins_str[1:]
473
+ region_start = codon_start_old + 3
474
+ else:
475
+ # Replace WT with insertion
476
+ inserted_aas = ins_str
477
+ region_start = codon_start_old
478
+ new_seq = ""
479
+ for aa in inserted_aas:
480
+ codon = pick_mutant_codon(wt_codon, aa)
481
+ if not codon:
482
+ logger.error("No codon for insertion amino acid %s", aa)
483
+ raise ValueError(f"No codon for insertion amino acid {aa}")
484
+ new_seq += codon
485
+ notes = f"Insertion: +{len(inserted_aas)} AA"
486
+
487
+ elif m_sub:
488
+ # Substitution: single amino acid change
489
+ wt_aa, pos1, mut_aa = m_sub.group(1), int(m_sub.group(2)), m_sub.group(3)
490
+ region_start = gene_offset + (pos1 - 1) * 3
491
+ wt_codon = full_seq[region_start : region_start + 3]
492
+ translated = translate_codon(wt_codon)
493
+ if translated != wt_aa:
494
+ logger.error(
495
+ "Expected %s but found %s at codon %s for mutation %s",
496
+ wt_aa, translated, wt_codon, mutation,
497
+ )
498
+ raise ValueError(
499
+ f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
500
+ )
501
+ new_seq = pick_mutant_codon(wt_codon, mut_aa)
502
+ if not new_seq:
503
+ logger.error("No minimal-change codon for %s->%s", wt_aa, mut_aa)
504
+ raise ValueError(f"No minimal-change codon for {wt_aa}->{mut_aa}")
505
+ notes = f"Substitution: {wt_aa}->{mut_aa}"
506
+
507
+ elif m_lib:
508
+ # Library mutation with degenerate codon
509
+ wt_aa, pos_str, degenerate_codon = m_lib.groups()
510
+ pos = int(pos_str)
511
+ degenerate_codon = degenerate_codon.upper()
512
+
513
+ if not is_valid_degenerate_codon(degenerate_codon):
514
+ raise ValueError(f"Invalid degenerate codon: {degenerate_codon}")
515
+
516
+ region_start = gene_offset + (pos - 1) * 3
517
+ wt_codon = full_seq[region_start : region_start + 3]
518
+ translated = translate_codon(wt_codon)
519
+ if translated != wt_aa:
520
+ logger.error(
521
+ "Expected %s but found %s at codon %s for mutation %s",
522
+ wt_aa, translated, wt_codon, mutation,
523
+ )
524
+ raise ValueError(
525
+ f"For {mutation}: expected {wt_aa}, found {translated} at {wt_codon}"
526
+ )
527
+
528
+ new_seq = degenerate_codon
529
+
530
+ # Log library coverage info
531
+ expanded_codons = expand_degenerate_sequence(degenerate_codon)
532
+ unique_aas = set(
533
+ translate_codon(c) for c in expanded_codons if translate_codon(c) != '?'
534
+ )
535
+ logger.info(
536
+ "Library mutation %s: %d possible codons, %d amino acids",
537
+ mutation, len(expanded_codons), len(unique_aas)
538
+ )
539
+ notes = f"Library: {len(expanded_codons)} codons, {len(unique_aas)} AAs"
540
+
541
+ else:
542
+ logger.error("Unknown mutation format: %s", mutation)
543
+ raise ValueError(f"Unknown mutation format: {mutation}")
544
+
545
+ # Handle deletion specially - forward primer starts at next position
546
+ if m_del:
547
+ # For deletion, the forward primer binding region starts
548
+ # immediately after the deleted codon
549
+ fwd_binding_start = region_start + 3
550
+
551
+ # Find optimal forward binding region
552
+ best_fwd = None
553
+ best_tm_diff = float('inf')
554
+ target_tm = (MIN_TM + MAX_TM) / 2
555
+
556
+ for length in range(MIN_LENGTH, MAX_LENGTH + 1):
557
+ binding_end = fwd_binding_start + length
558
+ if binding_end > len(full_seq):
559
+ break
560
+ binding_seq = full_seq[fwd_binding_start:binding_end]
561
+ tm = calc_tm_binding_region(binding_seq)
562
+ gc = calc_gc_content(binding_seq)
563
+ if MIN_TM <= tm <= MAX_TM and MIN_GC <= gc <= MAX_GC:
564
+ tm_diff = abs(tm - target_tm)
565
+ if tm_diff < best_tm_diff:
566
+ best_tm_diff = tm_diff
567
+ best_fwd = (binding_seq, binding_seq, tm, gc, length)
568
+
569
+ if not best_fwd:
570
+ raise ValueError(
571
+ f"Cannot find forward primer for deletion {mutation}"
572
+ )
573
+
574
+ fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len = best_fwd
575
+
576
+ # Reverse primer design is the same
577
+ rev_seq, rev_tm, rev_gc, rev_len = design_reverse_primer(
578
+ full_seq, region_start
579
+ )
580
+ else:
581
+ # Standard primer design
582
+ fwd_result = design_forward_primer(
583
+ full_seq, region_start, new_seq
584
+ )
585
+ rev_result = design_reverse_primer(full_seq, region_start)
586
+
587
+ # Balance Tms
588
+ fwd_result, rev_result = balance_primer_tms(
589
+ fwd_result, rev_result, full_seq, region_start, new_seq
590
+ )
591
+
592
+ fwd_seq, fwd_binding, fwd_tm, fwd_gc, fwd_len = fwd_result
593
+ rev_seq, rev_tm, rev_gc, rev_len = rev_result
594
+
595
+ # Write forward primer
596
+ writer.writerow([
597
+ f"{mutation}_F",
598
+ fwd_seq,
599
+ f"{fwd_tm:.1f}",
600
+ f"{fwd_gc:.1f}",
601
+ fwd_len,
602
+ notes,
603
+ ])
604
+
605
+ # Write reverse primer
606
+ writer.writerow([
607
+ f"{mutation}_R",
608
+ rev_seq,
609
+ f"{rev_tm:.1f}",
610
+ f"{rev_gc:.1f}",
611
+ rev_len,
612
+ "",
613
+ ])
614
+
615
+ logger.info(
616
+ "Designed KLD primers for %s: F_Tm=%.1f°C, R_Tm=%.1f°C, diff=%.1f°C",
617
+ mutation, fwd_tm, rev_tm, abs(fwd_tm - rev_tm)
618
+ )
619
+
620
+ except Exception as exc:
621
+ logger.error("Error processing mutation %s: %s", mutation, exc)
622
+ raise
623
+
624
+ logger.info("KLD primer design completed successfully. Output written to %s", results_path)
625
+ return results_path
626
+
627
+ finally:
628
+ if managed_logger and logger:
629
+ for handler in list(logger.handlers):
630
+ handler.close()
631
+ logger.removeHandler(handler)
632
+ logger.propagate = True
633
+
634
+
635
+ def build_parser() -> argparse.ArgumentParser:
636
+ """Build argument parser for command-line usage."""
637
+ parser = argparse.ArgumentParser(
638
+ description="Design KLD (inverse PCR) primers from user-provided inputs."
639
+ )
640
+ parser.add_argument(
641
+ "--gene-fasta",
642
+ required=True,
643
+ type=Path,
644
+ help="Path to FASTA file containing the gene sequence.",
645
+ )
646
+ parser.add_argument(
647
+ "--context-fasta",
648
+ required=True,
649
+ type=Path,
650
+ help="Path to FASTA file containing the plasmid or genomic context.",
651
+ )
652
+ parser.add_argument(
653
+ "--mutations-csv",
654
+ required=True,
655
+ type=Path,
656
+ help="CSV file containing a 'mutations' column with each mutation specification.",
657
+ )
658
+ parser.add_argument(
659
+ "--output-dir",
660
+ required=True,
661
+ type=Path,
662
+ help="Directory where results and logs will be written.",
663
+ )
664
+ parser.add_argument(
665
+ "--log-path",
666
+ default=None,
667
+ type=Path,
668
+ help="Optional path for the run log (defaults to console logging).",
669
+ )
670
+ return parser
671
+
672
+
673
+ def main(argv: Optional[List[str]] = None):
674
+ """Main entry point for command-line usage."""
675
+ parser = build_parser()
676
+ args = parser.parse_args(argv)
677
+ run_design_kld(
678
+ gene_fasta=args.gene_fasta,
679
+ context_fasta=args.context_fasta,
680
+ mutations_csv=args.mutations_csv,
681
+ output_dir=args.output_dir,
682
+ log_path=args.log_path,
683
+ )
684
+
685
+
686
+ if __name__ == "__main__":
687
+ main()
@@ -22,6 +22,7 @@ except ImportError as exc: # pragma: no cover - handled at runtime
22
22
  ) from exc
23
23
 
24
24
  from uht_tooling.workflows.design_gibson import run_design_gibson
25
+ from uht_tooling.workflows.design_kld import run_design_kld
25
26
  from uht_tooling.workflows.design_slim import run_design_slim
26
27
  from uht_tooling.workflows.mut_rate import run_ep_library_profile
27
28
  from uht_tooling.workflows.mutation_caller import run_mutation_caller
@@ -187,6 +188,47 @@ def run_gui_design_slim(
187
188
  _clean_temp_path(locals().get("output_dir", Path()))
188
189
 
189
190
 
191
+ def run_gui_design_kld(
192
+ template_gene_content: str,
193
+ context_content: str,
194
+ mutations_text: str,
195
+ ) -> Tuple[str, Optional[str]]:
196
+ try:
197
+ gene_seq = _ensure_text(template_gene_content, "Template gene sequence")
198
+ context_seq = _ensure_text(context_content, "Context sequence")
199
+ mutation_lines = [line.strip() for line in mutations_text.splitlines() if line.strip()]
200
+ if not mutation_lines:
201
+ raise ValueError("Provide at least one mutation (e.g., A123G).")
202
+
203
+ work_dir = Path(tempfile.mkdtemp(prefix="uht_gui_kld_work_"))
204
+ output_dir = Path(tempfile.mkdtemp(prefix="uht_gui_kld_out_"))
205
+
206
+ gene_fasta = work_dir / "template_gene.fasta"
207
+ context_fasta = work_dir / "context.fasta"
208
+ mutations_csv = work_dir / "mutations.csv"
209
+
210
+ gene_fasta.write_text(f">template\n{gene_seq}\n")
211
+ context_fasta.write_text(f">context\n{context_seq}\n")
212
+ mutations_csv.write_text("mutations\n" + "\n".join(mutation_lines) + "\n")
213
+
214
+ result_csv = run_design_kld(
215
+ gene_fasta=gene_fasta,
216
+ context_fasta=context_fasta,
217
+ mutations_csv=mutations_csv,
218
+ output_dir=output_dir,
219
+ )
220
+
221
+ summary = _format_header("KLD Primer Design") + _preview_csv(result_csv)
222
+ archive = _zip_paths([output_dir], "kld")
223
+ return summary, str(archive)
224
+ except Exception as exc: # pragma: no cover
225
+ _LOGGER.exception("KLD GUI failure")
226
+ return f"⚠️ Error: {exc}", None
227
+ finally:
228
+ _clean_temp_path(locals().get("work_dir", Path()))
229
+ _clean_temp_path(locals().get("output_dir", Path()))
230
+
231
+
190
232
  def run_gui_design_gibson(
191
233
  template_gene_content: str,
192
234
  context_content: str,
@@ -612,6 +654,7 @@ def create_gui() -> gr.Blocks:
612
654
  - Target gene coding sequence (FASTA content).
613
655
  - Plasmid or genomic context containing the gene.
614
656
  - Mutations (one per line, e.g. substitution `A123G`, deletion `T241Del`, insertion `T241TS`).
657
+ - Library codons are supported via `AApos:COD` syntax (e.g. `R57:NNK`).
615
658
 
616
659
  **Outputs**
617
660
  - `SLIM_primers.csv` with primer sequences and annealing temperatures.
@@ -642,6 +685,37 @@ def create_gui() -> gr.Blocks:
642
685
  )
643
686
  )
644
687
 
688
+ with gr.Tab("KLD"):
689
+ gr.Markdown(
690
+ textwrap.dedent(
691
+ """
692
+ ### KLD (Inverse PCR) Primer Design
693
+ Designs inverse-PCR primers for KLD cloning. Forward primers carry the mutation at the 5' end, and reverse primers bind upstream to re-amplify the full plasmid.
694
+
695
+ **Inputs**
696
+ - Target gene coding sequence (FASTA content).
697
+ - Plasmid or genomic context containing the gene.
698
+ - Mutations (one per line, e.g. substitution `A123G`, deletion `T241Del`, insertion `T241TS`).
699
+ - Library codons are supported via `AApos:COD` syntax (e.g. `R57:NNK`).
700
+
701
+ **Outputs**
702
+ - `KLD_primers.csv` with primer sequences and annealing temperatures.
703
+ - Log file capturing primer QC and any design warnings.
704
+ """
705
+ )
706
+ )
707
+ kld_gene = gr.Textbox(label="Gene sequence", lines=4)
708
+ kld_context = gr.Textbox(label="Plasmid context", lines=4)
709
+ kld_mutations = gr.Textbox(label="Mutations (one per line)", lines=6)
710
+ kld_btn = gr.Button("Design KLD primers", variant="primary")
711
+ kld_summary = gr.Markdown(label="Summary")
712
+ kld_download = gr.File(label="Download primers", file_count="single")
713
+ kld_btn.click(
714
+ fn=run_gui_design_kld,
715
+ inputs=[kld_gene, kld_context, kld_mutations],
716
+ outputs=[kld_summary, kld_download],
717
+ )
718
+
645
719
  with gr.Tab("Gibson"):
646
720
  gr.Markdown(
647
721
  textwrap.dedent(
@@ -539,7 +539,7 @@ def run_qc_analysis(fastq_path, results_dir, ref_hit_fasta, plasmid_fasta):
539
539
  f.write(
540
540
  "Q-score\tMean_AA\tStd_AA\tCI_Lower\tCI_Upper\tMappable_Bases\tSegments\n"
541
541
  )
542
- for result in qc_results:
542
+ for result in qc_results:
543
543
  f.write(
544
544
  f"{result['quality_threshold']}\t"
545
545
  f"{result['mean_aa_mutations']:.6f}\t"
@@ -2380,7 +2380,7 @@ def run_ep_library_profile(
2380
2380
  output_dir.mkdir(parents=True, exist_ok=True)
2381
2381
  work_dir.mkdir(parents=True, exist_ok=True)
2382
2382
 
2383
- master_summary_path = output_dir / "master_summary.txt"
2383
+ master_summary_path = output_dir / "master_summary.txt"
2384
2384
  header = "\t".join(
2385
2385
  [
2386
2386
  "Sample",
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uht-tooling
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Tooling for ultra-high throughput screening workflows.
5
5
  Author: Matt115A
6
- License: MIT
6
+ License-Expression: MIT
7
7
  Requires-Python: >=3.8
8
8
  Description-Content-Type: text/markdown
9
9
  Requires-Dist: biopython==1.85
@@ -82,6 +82,7 @@ Each command mirrors a workflow module. Common entry points:
82
82
  | --- | --- |
83
83
  | `uht-tooling nextera-primers` | Generate Nextera XT primer pairs from a binding-region CSV. |
84
84
  | `uht-tooling design-slim` | Design SLIM mutagenesis primers from FASTA/CSV inputs. |
85
+ | `uht-tooling design-kld` | Design KLD (inverse PCR) mutagenesis primers. |
85
86
  | `uht-tooling design-gibson` | Produce Gibson mutagenesis primers and assembly plans. |
86
87
  | `uht-tooling mutation-caller` | Summarise amino-acid substitutions from long-read FASTQ files. |
87
88
  | `uht-tooling umi-hunter` | Cluster UMIs and call consensus genes. |
@@ -189,6 +190,52 @@ The workflow validates that the wild-type amino acid matches the template sequen
189
190
  - Combine 10 µL from each PCR with 10 µL H-buffer (150 mM Tris pH 8, 400 mM NaCl, 60 mM EDTA) for a 30 µL annealing reaction: 99 °C for 3 min, then two cycles of 65 °C for 5 min followed by 30 °C for 15 min, hold at 4 °C.
190
191
  - Transform directly into NEB 5-alpha or BL21 (DE3) cells without additional cleanup. The protocol has been validated for simultaneous introduction of dozens of mutations.
191
192
 
193
+ ### KLD primer design
194
+
195
+ KLD (Kinase-Ligation-DpnI) is an alternative mutagenesis method using inverse PCR to amplify the entire plasmid with mutations incorporated at the primer junction.
196
+
197
+ - Inputs: Same as SLIM design
198
+ - `data/design_kld/kld_template_gene.fasta`
199
+ - `data/design_kld/kld_context.fasta`
200
+ - `data/design_kld/kld_target_mutations.csv` (single `mutations` column)
201
+ - Run:
202
+ ```bash
203
+ uht-tooling design-kld \
204
+ --gene-fasta data/design_kld/kld_template_gene.fasta \
205
+ --context-fasta data/design_kld/kld_context.fasta \
206
+ --mutations-csv data/design_kld/kld_target_mutations.csv \
207
+ --output-dir results/design_kld/
208
+ ```
209
+ - Output: `results/design_kld/KLD_primers.csv` plus logs.
210
+
211
+ Mutation nomenclature: Same as SLIM (substitution, deletion, insertion, indel, library).
212
+
213
+ #### KLD vs SLIM
214
+
215
+ | Method | Primers | Mechanism | Best for |
216
+ |--------|---------|-----------|----------|
217
+ | SLIM | 4 per mutation | Overlap assembly | Multiple simultaneous mutations |
218
+ | KLD | 2 per mutation | Inverse PCR + ligation | Single mutations, simpler workflow |
219
+
220
+ #### KLD primer design rules
221
+
222
+ - Forward primer: Mutation codon at 5' end + downstream template-binding region
223
+ - Reverse primer: Reverse complement of upstream region, 5' end adjacent to forward
224
+ - Tm calculated on template-binding regions only (50-65°C target)
225
+ - Tm difference between primers kept within 5°C
226
+ - GC content 40-60%
227
+ - Binding region 18-24 bp
228
+
229
+ #### Experimental workflow
230
+
231
+ 1. PCR amplify entire plasmid with KLD primer pair
232
+ 2. DpnI digest to remove methylated template
233
+ 3. T4 PNK phosphorylation of 5' ends
234
+ 4. T4 DNA ligase to circularize
235
+ 5. Transform into competent cells
236
+
237
+ NEB sells a KLD Enzyme Mix (M0554) that combines these steps.
238
+
192
239
  ### Gibson assembly primers
193
240
 
194
241
  - Inputs mirror the SLIM workflow but use `data/design_gibson/`.
@@ -293,12 +340,13 @@ Key points:
293
340
  ### Tabs and capabilities
294
341
 
295
342
  1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
296
- 2. **SLIM** – template/context FASTA text areas plus mutation list.
297
- 3. **Gibson** – multi-mutation support using `+` syntax.
298
- 4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
299
- 5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
300
- 6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
301
- 7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
343
+ 2. **SLIM** – template/context FASTA text areas plus mutation list (supports library codons like `R57:NNK`).
344
+ 3. **KLD** – inverse-PCR primer design using the same mutation list format (including library codons like `R57:NNK`).
345
+ 4. **Gibson** – multi-mutation support using `+` syntax.
346
+ 5. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
347
+ 6. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
348
+ 7. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
349
+ 8. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
302
350
 
303
351
  ### Workflow tips
304
352
 
@@ -1,17 +1,18 @@
1
1
  uht_tooling/__init__.py,sha256=hf0tJaa4_9y9aYb8OB1FtJh1FOuX08dQ6_MCveWFNAc,242
2
- uht_tooling/cli.py,sha256=XnpJbMiuB3g5GL-d2bLf4TsDsd9eWDG-tjaAaMnAPTk,13008
2
+ uht_tooling/cli.py,sha256=3QUxYBFqhQyeZ9xM_JTlqhr_UJhb_PRj7Y_UMH5Tslc,14366
3
3
  uht_tooling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  uht_tooling/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  uht_tooling/workflows/design_gibson.py,sha256=SQEThq6dxPMPCsUrwqMUaG5I-diE9jUXPRii9Y7O_7U,13617
6
+ uht_tooling/workflows/design_kld.py,sha256=SWbKVfi1JgJ7cN9TU3dLEiYmZT7LQiGL_mUZ-n3PdzE,27368
6
7
  uht_tooling/workflows/design_slim.py,sha256=wGXnmaJCzlAZTjf2SRupwt_3MBl5cgZr1O9nnMQyoGo,17767
7
- uht_tooling/workflows/gui.py,sha256=2TctLdsoqA9sx37erWWkUGjnQerPl1tPf2ShEfdL76k,43041
8
- uht_tooling/workflows/mut_rate.py,sha256=jyqZbUE7617jF_gOF4m7gX-Rgc6-WV4fWS9oVxhnAUU,109082
8
+ uht_tooling/workflows/gui.py,sha256=FpzxgjOo8SQCPJRM7ltVLk3bcwZ_AxjQzZxwz7J_c1M,46436
9
+ uht_tooling/workflows/mut_rate.py,sha256=Sv4OU68RNTOOsKV0QSbJ7FOgxh3vQeUeib_5mrXqyHg,109074
9
10
  uht_tooling/workflows/mutation_caller.py,sha256=BczuNATOSUcmlw-x6qTzEQfW8MBbvGclEyqiQiBX0cg,16222
10
11
  uht_tooling/workflows/nextera_designer.py,sha256=8MZ_DyQ0JwPojXH5mZ6bAGAkqki_0qQGac45T_Ll8FQ,6170
11
12
  uht_tooling/workflows/profile_inserts.py,sha256=C-SZ10YefiV_4QZbo1oEkI4qYipwaYqPP5jF-MC5O58,16947
12
13
  uht_tooling/workflows/umi_hunter.py,sha256=baycWycqVzUfMp5u2WZdHRl0sNuykTjy-iqtj5ahucU,15075
13
- uht_tooling-0.1.8.dist-info/METADATA,sha256=dQ8u8XSyBvbujsLyWIKAZqcDxqIkYb8BU1fFddAjxDs,14436
14
- uht_tooling-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
- uht_tooling-0.1.8.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
16
- uht_tooling-0.1.8.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
17
- uht_tooling-0.1.8.dist-info/RECORD,,
14
+ uht_tooling-0.1.9.dist-info/METADATA,sha256=mMC92ln1dMYhDQFlKRfBsMMsMVvy0p0LDY8s6aX-4Ig,16399
15
+ uht_tooling-0.1.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ uht_tooling-0.1.9.dist-info/entry_points.txt,sha256=t3_bMkEnlnV4vd6nrjNQxHDsHzHHoZenhmxuIYLcRBY,53
17
+ uht_tooling-0.1.9.dist-info/top_level.txt,sha256=iTCCiSn0OjrTx1VOdxXhUlPi1TR9LxaJEZJoMyRcv9c,12
18
+ uht_tooling-0.1.9.dist-info/RECORD,,