uht-tooling 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/PKG-INFO +8 -7
  2. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/README.md +7 -6
  3. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/pyproject.toml +1 -1
  4. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/cli.py +12 -1
  5. uht_tooling-0.1.4/src/uht_tooling/workflows/gui.py +1006 -0
  6. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/umi_hunter.py +17 -2
  7. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/PKG-INFO +8 -7
  8. uht_tooling-0.1.3/src/uht_tooling/workflows/gui.py +0 -595
  9. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/setup.cfg +0 -0
  10. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/__init__.py +0 -0
  11. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/models/__init__.py +0 -0
  12. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/__init__.py +0 -0
  13. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/design_gibson.py +0 -0
  14. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/design_slim.py +0 -0
  15. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/mut_rate.py +0 -0
  16. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/mutation_caller.py +0 -0
  17. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/nextera_designer.py +0 -0
  18. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling/workflows/profile_inserts.py +0 -0
  19. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/SOURCES.txt +0 -0
  20. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/dependency_links.txt +0 -0
  21. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/entry_points.txt +0 -0
  22. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/requires.txt +0 -0
  23. {uht_tooling-0.1.3 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uht-tooling
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Tooling for ultra-high throughput screening workflows.
5
5
  Author: Matt115A
6
6
  License: MIT
@@ -189,9 +189,10 @@ If mutations fall within overlapping primer windows, design sequential reactions
189
189
  --fastq data/umi_hunter/*.fastq.gz \
190
190
  --output-dir results/umi_hunter/
191
191
  ```
192
- - Tunable parameters include `--umi-identity-threshold` and `--consensus-mutation-threshold`.
193
- - --umi-identity-threshold is a decimal between 0-1 and defines how similar two UMIs have to be to be considered grouped.
194
- - --consensus-mutation-threshold is the minimum group size to report a consensus sequence.
192
+ - Tunable parameters include `--umi-identity-threshold`, `--consensus-mutation-threshold`, and `--min-cluster-size`.
193
+ - `--umi-identity-threshold` (01) controls how similar two UMIs must be to fall into the same cluster.
194
+ - `--consensus-mutation-threshold` (0–1) is the fraction of reads within a cluster that must agree on a base before it is written into the consensus sequence.
195
+ - `--min-cluster-size` sets the minimum number of reads required in a cluster before a consensus is generated (smaller clusters remain listed in the raw UMI CSV but no consensus FASTA is produced).
195
196
 
196
197
  Please be aware, this toolkit will not scale well beyond around 50k reads/sample. See UMIC-seq pipelines for efficient UMI-gene dictionary generation.
197
198
 
@@ -243,9 +244,9 @@ Key points:
243
244
  1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
244
245
  2. **SLIM** – template/context FASTA text areas plus mutation list.
245
246
  3. **Gibson** – multi-mutation support using `+` syntax.
246
- 4. **Mutation Caller** – upload FASTQ, template FASTA, and configuration CSV.
247
- 5. **UMI Hunter** – long-read UMI clustering with configurable thresholds.
248
- 6. **Profile Inserts** – probe CSV and multiple FASTQ uploads.
247
+ 4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
248
+ 5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
249
+ 6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
249
250
  7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
250
251
 
251
252
  ### Workflow tips
@@ -162,9 +162,10 @@ If mutations fall within overlapping primer windows, design sequential reactions
162
162
  --fastq data/umi_hunter/*.fastq.gz \
163
163
  --output-dir results/umi_hunter/
164
164
  ```
165
- - Tunable parameters include `--umi-identity-threshold` and `--consensus-mutation-threshold`.
166
- - --umi-identity-threshold is a decimal between 0-1 and defines how similar two UMIs have to be to be considered grouped.
167
- - --consensus-mutation-threshold is the minimum group size to report a consensus sequence.
165
+ - Tunable parameters include `--umi-identity-threshold`, `--consensus-mutation-threshold`, and `--min-cluster-size`.
166
+ - `--umi-identity-threshold` (01) controls how similar two UMIs must be to fall into the same cluster.
167
+ - `--consensus-mutation-threshold` (0–1) is the fraction of reads within a cluster that must agree on a base before it is written into the consensus sequence.
168
+ - `--min-cluster-size` sets the minimum number of reads required in a cluster before a consensus is generated (smaller clusters remain listed in the raw UMI CSV but no consensus FASTA is produced).
168
169
 
169
170
  Please be aware, this toolkit will not scale well beyond around 50k reads/sample. See UMIC-seq pipelines for efficient UMI-gene dictionary generation.
170
171
 
@@ -216,9 +217,9 @@ Key points:
216
217
  1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
217
218
  2. **SLIM** – template/context FASTA text areas plus mutation list.
218
219
  3. **Gibson** – multi-mutation support using `+` syntax.
219
- 4. **Mutation Caller** – upload FASTQ, template FASTA, and configuration CSV.
220
- 5. **UMI Hunter** – long-read UMI clustering with configurable thresholds.
221
- 6. **Profile Inserts** – probe CSV and multiple FASTQ uploads.
220
+ 4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
221
+ 5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
222
+ 6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
222
223
  7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
223
224
 
224
225
  ### Workflow tips
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "uht-tooling"
7
- version = "0.1.3"
7
+ version = "0.1.4"
8
8
  description = "Tooling for ultra-high throughput screening workflows."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -233,6 +233,11 @@ def umi_hunter_command(
233
233
  max=1.0,
234
234
  help="Mutation threshold for consensus calling (default: 0.7).",
235
235
  ),
236
+ min_cluster_size: int = typer.Option(
237
+ 1,
238
+ min=1,
239
+ help="Minimum number of reads required in a UMI cluster before a consensus is generated.",
240
+ ),
236
241
  log_path: Optional[Path] = typer.Option(
237
242
  None,
238
243
  dir_okay=False,
@@ -249,6 +254,7 @@ def umi_hunter_command(
249
254
  output_dir=output_dir,
250
255
  umi_identity_threshold=umi_identity_threshold,
251
256
  consensus_mutation_threshold=consensus_mutation_threshold,
257
+ min_cluster_size=min_cluster_size,
252
258
  log_path=log_path,
253
259
  )
254
260
  if not results:
@@ -256,7 +262,12 @@ def umi_hunter_command(
256
262
  else:
257
263
  typer.echo("UMI hunter outputs:")
258
264
  for entry in results:
259
- typer.echo(f" Sample {entry['sample']}: {entry['directory']}")
265
+ total_clusters = entry.get("clusters_total", entry.get("clusters", 0))
266
+ typer.echo(
267
+ f" Sample {entry['sample']}: "
268
+ f"{entry.get('clusters', 0)} consensus clusters "
269
+ f"(from {total_clusters} total) → {entry['directory']}"
270
+ )
260
271
 
261
272
 
262
273
  @app.command("ep-library-profile", help="Profile mutation rates for ep-library sequencing data.")