uht-tooling 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/PKG-INFO +17 -9
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/README.md +16 -8
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/pyproject.toml +1 -1
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/cli.py +12 -1
- uht_tooling-0.1.5/src/uht_tooling/workflows/gui.py +1007 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/mut_rate.py +203 -106
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/umi_hunter.py +17 -2
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling.egg-info/PKG-INFO +17 -9
- uht_tooling-0.1.3/src/uht_tooling/workflows/gui.py +0 -595
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/setup.cfg +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/__init__.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/models/__init__.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/__init__.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/design_gibson.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/design_slim.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/mutation_caller.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/nextera_designer.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling/workflows/profile_inserts.py +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling.egg-info/SOURCES.txt +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling.egg-info/dependency_links.txt +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling.egg-info/entry_points.txt +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling.egg-info/requires.txt +0 -0
- {uht_tooling-0.1.3 → uht_tooling-0.1.5}/src/uht_tooling.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: uht-tooling
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Tooling for ultra-high throughput screening workflows.
|
|
5
5
|
Author: Matt115A
|
|
6
6
|
License: MIT
|
|
@@ -35,7 +35,7 @@ Automation helpers for ultra-high-throughput molecular biology workflows. The pa
|
|
|
35
35
|
|
|
36
36
|
### Quick install (recommended, easiest file maintainance)
|
|
37
37
|
```bash
|
|
38
|
-
pip install "uht-tooling[gui]==0.1.
|
|
38
|
+
pip install "uht-tooling[gui]==0.1.4"
|
|
39
39
|
|
|
40
40
|
```
|
|
41
41
|
|
|
@@ -189,9 +189,10 @@ If mutations fall within overlapping primer windows, design sequential reactions
|
|
|
189
189
|
--fastq data/umi_hunter/*.fastq.gz \
|
|
190
190
|
--output-dir results/umi_hunter/
|
|
191
191
|
```
|
|
192
|
-
- Tunable parameters include `--umi-identity-threshold
|
|
193
|
-
-
|
|
194
|
-
-
|
|
192
|
+
- Tunable parameters include `--umi-identity-threshold`, `--consensus-mutation-threshold`, and `--min-cluster-size`.
|
|
193
|
+
- `--umi-identity-threshold` (0–1) controls how similar two UMIs must be to fall into the same cluster.
|
|
194
|
+
- `--consensus-mutation-threshold` (0–1) is the fraction of reads within a cluster that must agree on a base before it is written into the consensus sequence.
|
|
195
|
+
- `--min-cluster-size` sets the minimum number of reads required in a cluster before a consensus is generated (smaller clusters remain listed in the raw UMI CSV but no consensus FASTA is produced).
|
|
195
196
|
|
|
196
197
|
Please be aware, this toolkit will not scale well beyond around 50k reads/sample. See UMIC-seq pipelines for efficient UMI-gene dictionary generation.
|
|
197
198
|
|
|
@@ -221,7 +222,14 @@ Please be aware, this toolkit will not scale well beyond around 50k reads/sample
|
|
|
221
222
|
--fastq data/ep-library-profile/*.fastq.gz \
|
|
222
223
|
--output-dir results/ep-library-profile/
|
|
223
224
|
```
|
|
224
|
-
- Output bundle includes per-sample directories
|
|
225
|
+
- Output bundle includes per-sample directories, a master summary TSV, and a `summary_panels` figure that visualises positional mutation rates, coverage, and amino-acid simulations.
|
|
226
|
+
|
|
227
|
+
**How the mutation rate and AA expectations are derived**
|
|
228
|
+
|
|
229
|
+
1. Reads are aligned to both the region of interest and the full plasmid. Mismatches in the region define the “target” rate; mismatches elsewhere provide the background.
|
|
230
|
+
2. The per-base background rate is subtracted from the target rate to yield a net nucleotide mutation rate, and the standard deviation reflects binomial sampling and quality-score uncertainty.
|
|
231
|
+
3. The net rate is multiplied by the CDS length to estimate λ_bp (mutations per copy). Monte Carlo simulations then flip random bases, translate the mutated CDS, and count amino-acid differences across 1,000 trials—these drives the AA mutation mean/variance that appear in the panel plot.
|
|
232
|
+
4. If multiple Q-score thresholds are analysed, the CLI aggregates them via a precision-weighted consensus (1 / standard deviation weighting) after filtering out thresholds with insufficient coverage; the consensus value is written to `aa_mutation_consensus.txt` and plotted as a horizontal guide.
|
|
225
233
|
|
|
226
234
|
---
|
|
227
235
|
|
|
@@ -243,9 +251,9 @@ Key points:
|
|
|
243
251
|
1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
|
|
244
252
|
2. **SLIM** – template/context FASTA text areas plus mutation list.
|
|
245
253
|
3. **Gibson** – multi-mutation support using `+` syntax.
|
|
246
|
-
4. **Mutation Caller** – upload FASTQ
|
|
247
|
-
5. **UMI Hunter** – long-read UMI clustering with
|
|
248
|
-
6. **Profile Inserts** – probe
|
|
254
|
+
4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
|
|
255
|
+
5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
|
|
256
|
+
6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
|
|
249
257
|
7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
|
|
250
258
|
|
|
251
259
|
### Workflow tips
|
|
@@ -8,7 +8,7 @@ Automation helpers for ultra-high-throughput molecular biology workflows. The pa
|
|
|
8
8
|
|
|
9
9
|
### Quick install (recommended, easiest file maintainance)
|
|
10
10
|
```bash
|
|
11
|
-
pip install "uht-tooling[gui]==0.1.
|
|
11
|
+
pip install "uht-tooling[gui]==0.1.4"
|
|
12
12
|
|
|
13
13
|
```
|
|
14
14
|
|
|
@@ -162,9 +162,10 @@ If mutations fall within overlapping primer windows, design sequential reactions
|
|
|
162
162
|
--fastq data/umi_hunter/*.fastq.gz \
|
|
163
163
|
--output-dir results/umi_hunter/
|
|
164
164
|
```
|
|
165
|
-
- Tunable parameters include `--umi-identity-threshold
|
|
166
|
-
-
|
|
167
|
-
-
|
|
165
|
+
- Tunable parameters include `--umi-identity-threshold`, `--consensus-mutation-threshold`, and `--min-cluster-size`.
|
|
166
|
+
- `--umi-identity-threshold` (0–1) controls how similar two UMIs must be to fall into the same cluster.
|
|
167
|
+
- `--consensus-mutation-threshold` (0–1) is the fraction of reads within a cluster that must agree on a base before it is written into the consensus sequence.
|
|
168
|
+
- `--min-cluster-size` sets the minimum number of reads required in a cluster before a consensus is generated (smaller clusters remain listed in the raw UMI CSV but no consensus FASTA is produced).
|
|
168
169
|
|
|
169
170
|
Please be aware, this toolkit will not scale well beyond around 50k reads/sample. See UMIC-seq pipelines for efficient UMI-gene dictionary generation.
|
|
170
171
|
|
|
@@ -194,7 +195,14 @@ Please be aware, this toolkit will not scale well beyond around 50k reads/sample
|
|
|
194
195
|
--fastq data/ep-library-profile/*.fastq.gz \
|
|
195
196
|
--output-dir results/ep-library-profile/
|
|
196
197
|
```
|
|
197
|
-
- Output bundle includes per-sample directories
|
|
198
|
+
- Output bundle includes per-sample directories, a master summary TSV, and a `summary_panels` figure that visualises positional mutation rates, coverage, and amino-acid simulations.
|
|
199
|
+
|
|
200
|
+
**How the mutation rate and AA expectations are derived**
|
|
201
|
+
|
|
202
|
+
1. Reads are aligned to both the region of interest and the full plasmid. Mismatches in the region define the “target” rate; mismatches elsewhere provide the background.
|
|
203
|
+
2. The per-base background rate is subtracted from the target rate to yield a net nucleotide mutation rate, and the standard deviation reflects binomial sampling and quality-score uncertainty.
|
|
204
|
+
3. The net rate is multiplied by the CDS length to estimate λ_bp (mutations per copy). Monte Carlo simulations then flip random bases, translate the mutated CDS, and count amino-acid differences across 1,000 trials—these drives the AA mutation mean/variance that appear in the panel plot.
|
|
205
|
+
4. If multiple Q-score thresholds are analysed, the CLI aggregates them via a precision-weighted consensus (1 / standard deviation weighting) after filtering out thresholds with insufficient coverage; the consensus value is written to `aa_mutation_consensus.txt` and plotted as a horizontal guide.
|
|
198
206
|
|
|
199
207
|
---
|
|
200
208
|
|
|
@@ -216,9 +224,9 @@ Key points:
|
|
|
216
224
|
1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
|
|
217
225
|
2. **SLIM** – template/context FASTA text areas plus mutation list.
|
|
218
226
|
3. **Gibson** – multi-mutation support using `+` syntax.
|
|
219
|
-
4. **Mutation Caller** – upload FASTQ
|
|
220
|
-
5. **UMI Hunter** – long-read UMI clustering with
|
|
221
|
-
6. **Profile Inserts** – probe
|
|
227
|
+
4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
|
|
228
|
+
5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
|
|
229
|
+
6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
|
|
222
230
|
7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
|
|
223
231
|
|
|
224
232
|
### Workflow tips
|
|
@@ -233,6 +233,11 @@ def umi_hunter_command(
|
|
|
233
233
|
max=1.0,
|
|
234
234
|
help="Mutation threshold for consensus calling (default: 0.7).",
|
|
235
235
|
),
|
|
236
|
+
min_cluster_size: int = typer.Option(
|
|
237
|
+
1,
|
|
238
|
+
min=1,
|
|
239
|
+
help="Minimum number of reads required in a UMI cluster before a consensus is generated.",
|
|
240
|
+
),
|
|
236
241
|
log_path: Optional[Path] = typer.Option(
|
|
237
242
|
None,
|
|
238
243
|
dir_okay=False,
|
|
@@ -249,6 +254,7 @@ def umi_hunter_command(
|
|
|
249
254
|
output_dir=output_dir,
|
|
250
255
|
umi_identity_threshold=umi_identity_threshold,
|
|
251
256
|
consensus_mutation_threshold=consensus_mutation_threshold,
|
|
257
|
+
min_cluster_size=min_cluster_size,
|
|
252
258
|
log_path=log_path,
|
|
253
259
|
)
|
|
254
260
|
if not results:
|
|
@@ -256,7 +262,12 @@ def umi_hunter_command(
|
|
|
256
262
|
else:
|
|
257
263
|
typer.echo("UMI hunter outputs:")
|
|
258
264
|
for entry in results:
|
|
259
|
-
|
|
265
|
+
total_clusters = entry.get("clusters_total", entry.get("clusters", 0))
|
|
266
|
+
typer.echo(
|
|
267
|
+
f" Sample {entry['sample']}: "
|
|
268
|
+
f"{entry.get('clusters', 0)} consensus clusters "
|
|
269
|
+
f"(from {total_clusters} total) → {entry['directory']}"
|
|
270
|
+
)
|
|
260
271
|
|
|
261
272
|
|
|
262
273
|
@app.command("ep-library-profile", help="Profile mutation rates for ep-library sequencing data.")
|