uht-tooling 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/PKG-INFO +103 -11
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/README.md +101 -9
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/pyproject.toml +2 -2
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/cli.py +40 -0
- uht_tooling-0.2.0/src/uht_tooling/workflows/design_kld.py +687 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/gui.py +74 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/mut_rate.py +478 -124
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling.egg-info/PKG-INFO +103 -11
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling.egg-info/SOURCES.txt +3 -1
- uht_tooling-0.2.0/tests/test_design_kld.py +169 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/setup.cfg +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/__init__.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/models/__init__.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/__init__.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/design_gibson.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/design_slim.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/mutation_caller.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/nextera_designer.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/profile_inserts.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling/workflows/umi_hunter.py +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling.egg-info/dependency_links.txt +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling.egg-info/entry_points.txt +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling.egg-info/requires.txt +0 -0
- {uht_tooling-0.1.8 → uht_tooling-0.2.0}/src/uht_tooling.egg-info/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: uht-tooling
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Tooling for ultra-high throughput screening workflows.
|
|
5
5
|
Author: Matt115A
|
|
6
|
-
License: MIT
|
|
6
|
+
License-Expression: MIT
|
|
7
7
|
Requires-Python: >=3.8
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: biopython==1.85
|
|
@@ -82,6 +82,7 @@ Each command mirrors a workflow module. Common entry points:
|
|
|
82
82
|
| --- | --- |
|
|
83
83
|
| `uht-tooling nextera-primers` | Generate Nextera XT primer pairs from a binding-region CSV. |
|
|
84
84
|
| `uht-tooling design-slim` | Design SLIM mutagenesis primers from FASTA/CSV inputs. |
|
|
85
|
+
| `uht-tooling design-kld` | Design KLD (inverse PCR) mutagenesis primers. |
|
|
85
86
|
| `uht-tooling design-gibson` | Produce Gibson mutagenesis primers and assembly plans. |
|
|
86
87
|
| `uht-tooling mutation-caller` | Summarise amino-acid substitutions from long-read FASTQ files. |
|
|
87
88
|
| `uht-tooling umi-hunter` | Cluster UMIs and call consensus genes. |
|
|
@@ -189,6 +190,52 @@ The workflow validates that the wild-type amino acid matches the template sequen
|
|
|
189
190
|
- Combine 10 µL from each PCR with 10 µL H-buffer (150 mM Tris pH 8, 400 mM NaCl, 60 mM EDTA) for a 30 µL annealing reaction: 99 °C for 3 min, then two cycles of 65 °C for 5 min followed by 30 °C for 15 min, hold at 4 °C.
|
|
190
191
|
- Transform directly into NEB 5-alpha or BL21 (DE3) cells without additional cleanup. The protocol has been validated for simultaneous introduction of dozens of mutations.
|
|
191
192
|
|
|
193
|
+
### KLD primer design
|
|
194
|
+
|
|
195
|
+
KLD (Kinase-Ligation-DpnI) is an alternative mutagenesis method using inverse PCR to amplify the entire plasmid with mutations incorporated at the primer junction.
|
|
196
|
+
|
|
197
|
+
- Inputs: Same as SLIM design
|
|
198
|
+
- `data/design_kld/kld_template_gene.fasta`
|
|
199
|
+
- `data/design_kld/kld_context.fasta`
|
|
200
|
+
- `data/design_kld/kld_target_mutations.csv` (single `mutations` column)
|
|
201
|
+
- Run:
|
|
202
|
+
```bash
|
|
203
|
+
uht-tooling design-kld \
|
|
204
|
+
--gene-fasta data/design_kld/kld_template_gene.fasta \
|
|
205
|
+
--context-fasta data/design_kld/kld_context.fasta \
|
|
206
|
+
--mutations-csv data/design_kld/kld_target_mutations.csv \
|
|
207
|
+
--output-dir results/design_kld/
|
|
208
|
+
```
|
|
209
|
+
- Output: `results/design_kld/KLD_primers.csv` plus logs.
|
|
210
|
+
|
|
211
|
+
Mutation nomenclature: Same as SLIM (substitution, deletion, insertion, indel, library).
|
|
212
|
+
|
|
213
|
+
#### KLD vs SLIM
|
|
214
|
+
|
|
215
|
+
| Method | Primers | Mechanism | Best for |
|
|
216
|
+
|--------|---------|-----------|----------|
|
|
217
|
+
| SLIM | 4 per mutation | Overlap assembly | Multiple simultaneous mutations |
|
|
218
|
+
| KLD | 2 per mutation | Inverse PCR + ligation | Single mutations, simpler workflow |
|
|
219
|
+
|
|
220
|
+
#### KLD primer design rules
|
|
221
|
+
|
|
222
|
+
- Forward primer: Mutation codon at 5' end + downstream template-binding region
|
|
223
|
+
- Reverse primer: Reverse complement of upstream region, 5' end adjacent to forward
|
|
224
|
+
- Tm calculated on template-binding regions only (50-65°C target)
|
|
225
|
+
- Tm difference between primers kept within 5°C
|
|
226
|
+
- GC content 40-60%
|
|
227
|
+
- Binding region 18-24 bp
|
|
228
|
+
|
|
229
|
+
#### Experimental workflow
|
|
230
|
+
|
|
231
|
+
1. PCR amplify entire plasmid with KLD primer pair
|
|
232
|
+
2. DpnI digest to remove methylated template
|
|
233
|
+
3. T4 PNK phosphorylation of 5' ends
|
|
234
|
+
4. T4 DNA ligase to circularize
|
|
235
|
+
5. Transform into competent cells
|
|
236
|
+
|
|
237
|
+
NEB sells a KLD Enzyme Mix (M0554) that combines these steps.
|
|
238
|
+
|
|
192
239
|
### Gibson assembly primers
|
|
193
240
|
|
|
194
241
|
- Inputs mirror the SLIM workflow but use `data/design_gibson/`.
|
|
@@ -266,13 +313,57 @@ Please be aware, this toolkit will not scale well beyond around 50k reads/sample
|
|
|
266
313
|
--fastq data/ep-library-profile/*.fastq.gz \
|
|
267
314
|
--output-dir results/ep-library-profile/
|
|
268
315
|
```
|
|
269
|
-
|
|
316
|
+
|
|
317
|
+
**Output structure**
|
|
318
|
+
|
|
319
|
+
Each sample produces an organized output directory:
|
|
320
|
+
|
|
321
|
+
```
|
|
322
|
+
sample_name/
|
|
323
|
+
├── KEY_FINDINGS.txt # Lay-user executive summary
|
|
324
|
+
├── summary_panels.png/pdf # Main visualization
|
|
325
|
+
├── aa_mutation_consensus.txt # Consensus estimate details
|
|
326
|
+
├── run.log # Analysis log
|
|
327
|
+
└── detailed/ # Technical outputs
|
|
328
|
+
├── methodology_notes.txt # Documents which lambda drives what
|
|
329
|
+
├── lambda_comparison.csv # Side-by-side lambda comparison
|
|
330
|
+
├── gene_mismatch_rates.csv
|
|
331
|
+
├── base_distribution.csv
|
|
332
|
+
├── aa_substitutions.csv
|
|
333
|
+
├── plasmid_coverage.csv
|
|
334
|
+
├── aa_mutation_distribution.csv
|
|
335
|
+
├── comprehensive_qc_data.csv
|
|
336
|
+
├── simple_qc_data.csv
|
|
337
|
+
└── qc_plots/ # QC visualizations
|
|
338
|
+
├── qc_plot_*.png
|
|
339
|
+
├── comprehensive_qc_analysis.png
|
|
340
|
+
├── error_analysis.png
|
|
341
|
+
└── qc_mutation_rate_vs_quality.png/csv
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
**Lambda estimates: which to use**
|
|
345
|
+
|
|
346
|
+
The profiler calculates lambda (mutations per gene copy) via two methods:
|
|
347
|
+
|
|
348
|
+
| Method | Formula | Error Quantified? | Used For |
|
|
349
|
+
|--------|---------|-------------------|----------|
|
|
350
|
+
| Simple | `(hit_rate - bg_rate) × seq_len` | No | KDE plot, Monte Carlo simulation |
|
|
351
|
+
| Consensus | Precision-weighted average across Q-scores | Yes | Recommended for reporting |
|
|
352
|
+
|
|
353
|
+
- **For publication/reporting**: Use the consensus value from `KEY_FINDINGS.txt` or `aa_mutation_consensus.txt`.
|
|
354
|
+
- **For understanding distribution shape**: See the KDE plot in `summary_panels.png` (note: uses simple lambda).
|
|
355
|
+
- **For detailed error analysis**: See `detailed/comprehensive_qc_data.csv`.
|
|
356
|
+
|
|
357
|
+
The `KEY_FINDINGS.txt` file provides a plain-language summary including:
|
|
358
|
+
- Expected AA mutations per gene copy
|
|
359
|
+
- Poisson-based interpretation (% wild-type, % 1 mutation, % 2+ mutations)
|
|
360
|
+
- Quality assessment (GOOD/ACCEPTABLE/LOW COVERAGE)
|
|
270
361
|
|
|
271
362
|
**How the mutation rate and AA expectations are derived**
|
|
272
363
|
|
|
273
|
-
1. Reads are aligned to both the region of interest and the full plasmid. Mismatches in the region define the
|
|
364
|
+
1. Reads are aligned to both the region of interest and the full plasmid. Mismatches in the region define the "target" rate; mismatches elsewhere provide the background.
|
|
274
365
|
2. The per-base background rate is subtracted from the target rate to yield a net nucleotide mutation rate, and the standard deviation reflects binomial sampling and quality-score uncertainty.
|
|
275
|
-
3. The net rate is multiplied by the CDS length to estimate λ_bp (mutations per copy). Monte Carlo simulations then flip random bases, translate the mutated CDS, and count amino-acid differences across 1,000 trials—these
|
|
366
|
+
3. The net rate is multiplied by the CDS length to estimate λ_bp (mutations per copy). Monte Carlo simulations then flip random bases, translate the mutated CDS, and count amino-acid differences across 1,000 trials—these drive the AA mutation mean/variance that appear in the panel plot.
|
|
276
367
|
4. If multiple Q-score thresholds are analysed, the CLI aggregates them via a precision-weighted consensus (1 / standard deviation weighting) after filtering out thresholds with insufficient coverage; the consensus value is written to `aa_mutation_consensus.txt` and plotted as a horizontal guide.
|
|
277
368
|
|
|
278
369
|
---
|
|
@@ -293,12 +384,13 @@ Key points:
|
|
|
293
384
|
### Tabs and capabilities
|
|
294
385
|
|
|
295
386
|
1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
|
|
296
|
-
2. **SLIM** – template/context FASTA text areas plus mutation list.
|
|
297
|
-
3. **
|
|
298
|
-
4. **
|
|
299
|
-
5. **
|
|
300
|
-
6. **
|
|
301
|
-
7. **
|
|
387
|
+
2. **SLIM** – template/context FASTA text areas plus mutation list (supports library codons like `R57:NNK`).
|
|
388
|
+
3. **KLD** – inverse-PCR primer design using the same mutation list format (including library codons like `R57:NNK`).
|
|
389
|
+
4. **Gibson** – multi-mutation support using `+` syntax.
|
|
390
|
+
5. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
|
|
391
|
+
6. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
|
|
392
|
+
7. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
|
|
393
|
+
8. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
|
|
302
394
|
|
|
303
395
|
### Workflow tips
|
|
304
396
|
|
|
@@ -53,6 +53,7 @@ Each command mirrors a workflow module. Common entry points:
|
|
|
53
53
|
| --- | --- |
|
|
54
54
|
| `uht-tooling nextera-primers` | Generate Nextera XT primer pairs from a binding-region CSV. |
|
|
55
55
|
| `uht-tooling design-slim` | Design SLIM mutagenesis primers from FASTA/CSV inputs. |
|
|
56
|
+
| `uht-tooling design-kld` | Design KLD (inverse PCR) mutagenesis primers. |
|
|
56
57
|
| `uht-tooling design-gibson` | Produce Gibson mutagenesis primers and assembly plans. |
|
|
57
58
|
| `uht-tooling mutation-caller` | Summarise amino-acid substitutions from long-read FASTQ files. |
|
|
58
59
|
| `uht-tooling umi-hunter` | Cluster UMIs and call consensus genes. |
|
|
@@ -160,6 +161,52 @@ The workflow validates that the wild-type amino acid matches the template sequen
|
|
|
160
161
|
- Combine 10 µL from each PCR with 10 µL H-buffer (150 mM Tris pH 8, 400 mM NaCl, 60 mM EDTA) for a 30 µL annealing reaction: 99 °C for 3 min, then two cycles of 65 °C for 5 min followed by 30 °C for 15 min, hold at 4 °C.
|
|
161
162
|
- Transform directly into NEB 5-alpha or BL21 (DE3) cells without additional cleanup. The protocol has been validated for simultaneous introduction of dozens of mutations.
|
|
162
163
|
|
|
164
|
+
### KLD primer design
|
|
165
|
+
|
|
166
|
+
KLD (Kinase-Ligation-DpnI) is an alternative mutagenesis method using inverse PCR to amplify the entire plasmid with mutations incorporated at the primer junction.
|
|
167
|
+
|
|
168
|
+
- Inputs: Same as SLIM design
|
|
169
|
+
- `data/design_kld/kld_template_gene.fasta`
|
|
170
|
+
- `data/design_kld/kld_context.fasta`
|
|
171
|
+
- `data/design_kld/kld_target_mutations.csv` (single `mutations` column)
|
|
172
|
+
- Run:
|
|
173
|
+
```bash
|
|
174
|
+
uht-tooling design-kld \
|
|
175
|
+
--gene-fasta data/design_kld/kld_template_gene.fasta \
|
|
176
|
+
--context-fasta data/design_kld/kld_context.fasta \
|
|
177
|
+
--mutations-csv data/design_kld/kld_target_mutations.csv \
|
|
178
|
+
--output-dir results/design_kld/
|
|
179
|
+
```
|
|
180
|
+
- Output: `results/design_kld/KLD_primers.csv` plus logs.
|
|
181
|
+
|
|
182
|
+
Mutation nomenclature: Same as SLIM (substitution, deletion, insertion, indel, library).
|
|
183
|
+
|
|
184
|
+
#### KLD vs SLIM
|
|
185
|
+
|
|
186
|
+
| Method | Primers | Mechanism | Best for |
|
|
187
|
+
|--------|---------|-----------|----------|
|
|
188
|
+
| SLIM | 4 per mutation | Overlap assembly | Multiple simultaneous mutations |
|
|
189
|
+
| KLD | 2 per mutation | Inverse PCR + ligation | Single mutations, simpler workflow |
|
|
190
|
+
|
|
191
|
+
#### KLD primer design rules
|
|
192
|
+
|
|
193
|
+
- Forward primer: Mutation codon at 5' end + downstream template-binding region
|
|
194
|
+
- Reverse primer: Reverse complement of upstream region, 5' end adjacent to forward
|
|
195
|
+
- Tm calculated on template-binding regions only (50-65°C target)
|
|
196
|
+
- Tm difference between primers kept within 5°C
|
|
197
|
+
- GC content 40-60%
|
|
198
|
+
- Binding region 18-24 bp
|
|
199
|
+
|
|
200
|
+
#### Experimental workflow
|
|
201
|
+
|
|
202
|
+
1. PCR amplify entire plasmid with KLD primer pair
|
|
203
|
+
2. DpnI digest to remove methylated template
|
|
204
|
+
3. T4 PNK phosphorylation of 5' ends
|
|
205
|
+
4. T4 DNA ligase to circularize
|
|
206
|
+
5. Transform into competent cells
|
|
207
|
+
|
|
208
|
+
NEB sells a KLD Enzyme Mix (M0554) that combines these steps.
|
|
209
|
+
|
|
163
210
|
### Gibson assembly primers
|
|
164
211
|
|
|
165
212
|
- Inputs mirror the SLIM workflow but use `data/design_gibson/`.
|
|
@@ -237,13 +284,57 @@ Please be aware, this toolkit will not scale well beyond around 50k reads/sample
|
|
|
237
284
|
--fastq data/ep-library-profile/*.fastq.gz \
|
|
238
285
|
--output-dir results/ep-library-profile/
|
|
239
286
|
```
|
|
240
|
-
|
|
287
|
+
|
|
288
|
+
**Output structure**
|
|
289
|
+
|
|
290
|
+
Each sample produces an organized output directory:
|
|
291
|
+
|
|
292
|
+
```
|
|
293
|
+
sample_name/
|
|
294
|
+
├── KEY_FINDINGS.txt # Lay-user executive summary
|
|
295
|
+
├── summary_panels.png/pdf # Main visualization
|
|
296
|
+
├── aa_mutation_consensus.txt # Consensus estimate details
|
|
297
|
+
├── run.log # Analysis log
|
|
298
|
+
└── detailed/ # Technical outputs
|
|
299
|
+
├── methodology_notes.txt # Documents which lambda drives what
|
|
300
|
+
├── lambda_comparison.csv # Side-by-side lambda comparison
|
|
301
|
+
├── gene_mismatch_rates.csv
|
|
302
|
+
├── base_distribution.csv
|
|
303
|
+
├── aa_substitutions.csv
|
|
304
|
+
├── plasmid_coverage.csv
|
|
305
|
+
├── aa_mutation_distribution.csv
|
|
306
|
+
├── comprehensive_qc_data.csv
|
|
307
|
+
├── simple_qc_data.csv
|
|
308
|
+
└── qc_plots/ # QC visualizations
|
|
309
|
+
├── qc_plot_*.png
|
|
310
|
+
├── comprehensive_qc_analysis.png
|
|
311
|
+
├── error_analysis.png
|
|
312
|
+
└── qc_mutation_rate_vs_quality.png/csv
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
**Lambda estimates: which to use**
|
|
316
|
+
|
|
317
|
+
The profiler calculates lambda (mutations per gene copy) via two methods:
|
|
318
|
+
|
|
319
|
+
| Method | Formula | Error Quantified? | Used For |
|
|
320
|
+
|--------|---------|-------------------|----------|
|
|
321
|
+
| Simple | `(hit_rate - bg_rate) × seq_len` | No | KDE plot, Monte Carlo simulation |
|
|
322
|
+
| Consensus | Precision-weighted average across Q-scores | Yes | Recommended for reporting |
|
|
323
|
+
|
|
324
|
+
- **For publication/reporting**: Use the consensus value from `KEY_FINDINGS.txt` or `aa_mutation_consensus.txt`.
|
|
325
|
+
- **For understanding distribution shape**: See the KDE plot in `summary_panels.png` (note: uses simple lambda).
|
|
326
|
+
- **For detailed error analysis**: See `detailed/comprehensive_qc_data.csv`.
|
|
327
|
+
|
|
328
|
+
The `KEY_FINDINGS.txt` file provides a plain-language summary including:
|
|
329
|
+
- Expected AA mutations per gene copy
|
|
330
|
+
- Poisson-based interpretation (% wild-type, % 1 mutation, % 2+ mutations)
|
|
331
|
+
- Quality assessment (GOOD/ACCEPTABLE/LOW COVERAGE)
|
|
241
332
|
|
|
242
333
|
**How the mutation rate and AA expectations are derived**
|
|
243
334
|
|
|
244
|
-
1. Reads are aligned to both the region of interest and the full plasmid. Mismatches in the region define the
|
|
335
|
+
1. Reads are aligned to both the region of interest and the full plasmid. Mismatches in the region define the "target" rate; mismatches elsewhere provide the background.
|
|
245
336
|
2. The per-base background rate is subtracted from the target rate to yield a net nucleotide mutation rate, and the standard deviation reflects binomial sampling and quality-score uncertainty.
|
|
246
|
-
3. The net rate is multiplied by the CDS length to estimate λ_bp (mutations per copy). Monte Carlo simulations then flip random bases, translate the mutated CDS, and count amino-acid differences across 1,000 trials—these
|
|
337
|
+
3. The net rate is multiplied by the CDS length to estimate λ_bp (mutations per copy). Monte Carlo simulations then flip random bases, translate the mutated CDS, and count amino-acid differences across 1,000 trials—these drive the AA mutation mean/variance that appear in the panel plot.
|
|
247
338
|
4. If multiple Q-score thresholds are analysed, the CLI aggregates them via a precision-weighted consensus (1 / standard deviation weighting) after filtering out thresholds with insufficient coverage; the consensus value is written to `aa_mutation_consensus.txt` and plotted as a horizontal guide.
|
|
248
339
|
|
|
249
340
|
---
|
|
@@ -264,12 +355,13 @@ Key points:
|
|
|
264
355
|
### Tabs and capabilities
|
|
265
356
|
|
|
266
357
|
1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
|
|
267
|
-
2. **SLIM** – template/context FASTA text areas plus mutation list.
|
|
268
|
-
3. **
|
|
269
|
-
4. **
|
|
270
|
-
5. **
|
|
271
|
-
6. **
|
|
272
|
-
7. **
|
|
358
|
+
2. **SLIM** – template/context FASTA text areas plus mutation list (supports library codons like `R57:NNK`).
|
|
359
|
+
3. **KLD** – inverse-PCR primer design using the same mutation list format (including library codons like `R57:NNK`).
|
|
360
|
+
4. **Gibson** – multi-mutation support using `+` syntax.
|
|
361
|
+
5. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
|
|
362
|
+
6. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
|
|
363
|
+
7. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
|
|
364
|
+
8. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
|
|
273
365
|
|
|
274
366
|
### Workflow tips
|
|
275
367
|
|
|
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "uht-tooling"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Tooling for ultra-high throughput screening workflows."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
11
|
-
license =
|
|
11
|
+
license = "MIT"
|
|
12
12
|
authors = [{ name = "Matt115A" }]
|
|
13
13
|
dependencies = [
|
|
14
14
|
"biopython==1.85",
|
|
@@ -4,6 +4,7 @@ from typing import Optional
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
6
6
|
from uht_tooling.workflows.design_gibson import run_design_gibson
|
|
7
|
+
from uht_tooling.workflows.design_kld import run_design_kld
|
|
7
8
|
from uht_tooling.workflows.design_slim import run_design_slim
|
|
8
9
|
from uht_tooling.workflows.mutation_caller import (
|
|
9
10
|
expand_fastq_inputs as expand_fastq_inputs_mutation,
|
|
@@ -66,6 +67,45 @@ def design_slim_command(
|
|
|
66
67
|
typer.echo(f"SLIM primers written to {output_dir / 'SLIM_primers.csv'}")
|
|
67
68
|
|
|
68
69
|
|
|
70
|
+
@app.command("design-kld", help="Design KLD (inverse PCR) primers from user-specified FASTA/CSV inputs.")
|
|
71
|
+
def design_kld_command(
|
|
72
|
+
gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
|
|
73
|
+
context_fasta: Path = typer.Option(
|
|
74
|
+
...,
|
|
75
|
+
exists=True,
|
|
76
|
+
readable=True,
|
|
77
|
+
help="Path to the context FASTA file containing the plasmid or genomic sequence.",
|
|
78
|
+
),
|
|
79
|
+
mutations_csv: Path = typer.Option(
|
|
80
|
+
...,
|
|
81
|
+
exists=True,
|
|
82
|
+
readable=True,
|
|
83
|
+
help="CSV file containing a 'mutations' column with the desired edits.",
|
|
84
|
+
),
|
|
85
|
+
output_dir: Path = typer.Option(
|
|
86
|
+
...,
|
|
87
|
+
dir_okay=True,
|
|
88
|
+
writable=True,
|
|
89
|
+
help="Directory where results will be written.",
|
|
90
|
+
),
|
|
91
|
+
log_path: Optional[Path] = typer.Option(
|
|
92
|
+
None,
|
|
93
|
+
dir_okay=False,
|
|
94
|
+
writable=True,
|
|
95
|
+
help="Optional path to write a dedicated log file for this run.",
|
|
96
|
+
),
|
|
97
|
+
):
|
|
98
|
+
"""Design KLD (inverse PCR) primers from user-provided inputs."""
|
|
99
|
+
result_path = run_design_kld(
|
|
100
|
+
gene_fasta=gene_fasta,
|
|
101
|
+
context_fasta=context_fasta,
|
|
102
|
+
mutations_csv=mutations_csv,
|
|
103
|
+
output_dir=output_dir,
|
|
104
|
+
log_path=log_path,
|
|
105
|
+
)
|
|
106
|
+
typer.echo(f"KLD primers written to {result_path}")
|
|
107
|
+
|
|
108
|
+
|
|
69
109
|
@app.command("nextera-primers", help="Generate Nextera XT primers from binding region CSV input.")
|
|
70
110
|
def nextera_primers_command(
|
|
71
111
|
binding_csv: Path = typer.Option(
|