uht-tooling 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/PKG-INFO +25 -19
  2. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/README.md +24 -18
  3. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/pyproject.toml +1 -1
  4. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/cli.py +41 -4
  5. uht_tooling-0.1.4/src/uht_tooling/workflows/gui.py +1006 -0
  6. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/umi_hunter.py +17 -2
  7. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/PKG-INFO +25 -19
  8. uht_tooling-0.1.2/src/uht_tooling/workflows/gui.py +0 -595
  9. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/setup.cfg +0 -0
  10. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/__init__.py +0 -0
  11. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/models/__init__.py +0 -0
  12. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/__init__.py +0 -0
  13. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/design_gibson.py +0 -0
  14. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/design_slim.py +0 -0
  15. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/mut_rate.py +0 -0
  16. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/mutation_caller.py +0 -0
  17. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/nextera_designer.py +0 -0
  18. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling/workflows/profile_inserts.py +0 -0
  19. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/SOURCES.txt +0 -0
  20. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/dependency_links.txt +0 -0
  21. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/entry_points.txt +0 -0
  22. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/requires.txt +0 -0
  23. {uht_tooling-0.1.2 → uht_tooling-0.1.4}/src/uht_tooling.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uht-tooling
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Tooling for ultra-high throughput screening workflows.
5
5
  Author: Matt115A
6
6
  License: MIT
@@ -27,7 +27,7 @@ Requires-Dist: ruff==0.14.4; extra == "dev"
27
27
 
28
28
  # uht-tooling
29
29
 
30
- Automation helpers for ultra-high-throughput molecular biology workflows. The package ships both a Typer-based CLI and an optional Gradio GUI that wrap the same workflow code paths.
30
+ Automation helpers for ultra-high-throughput molecular biology workflows. The package ships both a CLI and an optional GUI that wrap the same workflow code paths.
31
31
 
32
32
  ---
33
33
 
@@ -35,19 +35,20 @@ Automation helpers for ultra-high-throughput molecular biology workflows. The pa
35
35
 
36
36
  ### Quick install (recommended, easiest file maintainance)
37
37
  ```bash
38
- python -m pip install "uht-tooling[gui]"
38
+ pip install "uht-tooling[gui]==0.1.3"
39
+
39
40
  ```
40
41
 
41
42
  This installs the core workflows plus the optional GUI dependencies (Gradio, pandas). Omit the `[gui]` extras if you only need the CLI:
42
43
 
43
44
  ```bash
44
- python -m pip install uht-tooling
45
+ pip install uht-tooling
45
46
  ```
46
47
 
47
48
  ### Development install
48
49
  ```bash
49
- git clone https://github.com/Matt115A/uht-tooling.git
50
- cd uht-tooling
50
+ git clone https://github.com/Matt115A/uht-tooling-packaged.git
51
+ cd uht-tooling-packaged
51
52
  python -m pip install -e ".[gui,dev]"
52
53
  ```
53
54
 
@@ -57,7 +58,7 @@ The editable install exposes the latest sources, while the `dev` extras add lint
57
58
 
58
59
  ## Directory layout
59
60
 
60
- - Reference inputs live under `data/<workflow>/`.
61
+ - Reference inputs can be found anywhere (you specify in the cli), but we recommend using `data/<workflow>/`.
61
62
  - Outputs (CSV, FASTA, plots, logs) are written to `results/<workflow>/`.
62
63
  - All workflows log to `results/<workflow>/run.log` for reproducibility and debugging.
63
64
 
@@ -79,9 +80,9 @@ Each command mirrors a workflow module. Common entry points:
79
80
  | `uht-tooling design-slim` | Design SLIM mutagenesis primers from FASTA/CSV inputs. |
80
81
  | `uht-tooling design-gibson` | Produce Gibson mutagenesis primers and assembly plans. |
81
82
  | `uht-tooling mutation-caller` | Summarise amino-acid substitutions from long-read FASTQ files. |
82
- | `uht-tooling umi-hunter` | Cluster UMIs and call consensus alleles. |
83
- | `uht-tooling ep-library-profile` | Measure mutation rates without UMIs. |
84
- | `uht-tooling profile-inserts` | Extract inserts defined by probe pairs. |
83
+ | `uht-tooling umi-hunter` | Cluster UMIs and call consensus genes. |
84
+ | `uht-tooling ep-library-profile` | Measure mutation rates in plasmid libraries without UMIs. |
85
+ | `uht-tooling profile-inserts` | Extract and analyse inserts defined by flanking probe pairs. |
85
86
 
86
87
  Each command provides detailed help, including option descriptions and expected file formats:
87
88
 
@@ -107,13 +108,13 @@ You can pass multiple FASTQ paths using repeated `--fastq` options or glob patte
107
108
  ```
108
109
  4. Primer CSVs will be written to `results/nextera_designer/`, accompanied by a log file.
109
110
 
110
- The helper is preloaded with twelve i5 and twelve i7 indices, enabling up to 144 unique amplicons. Downstream lab workflow suggestions (qPCR monitoring, SPRIselect cleanup) remain unchanged from earlier releases.
111
+ The helper is preloaded with twelve i5 and twelve i7 indices, enabling up to 144 unique amplicons.
111
112
 
112
113
  #### Wet-lab workflow notes
113
114
 
114
115
  - Perform the initial amplification with an i5/i7 primer pair and monitor a small aliquot by qPCR. Cap thermocycling early so you only generate ~10% of the theoretical yield—this minimizes amplification bias.
115
116
  - Purify the product with SPRIselect beads at approximately a 0.65:1 bead:DNA volume ratio to remove residual primers and short fragments.
116
- - Confirm primer removal using electrophoresis (e.g., BioAnalyzer DNA chip) before moving to sequencing prep.
117
+ - Confirm primer removal and quantify DNA using electrophoresis (e.g., BioAnalyzer DNA chip) before moving to the flow cell.
117
118
 
118
119
  ### SLIM primer design
119
120
 
@@ -158,7 +159,7 @@ Mutation nomenclature examples:
158
159
  ```
159
160
  - Outputs include primer sets and an assembly-plan CSV.
160
161
 
161
- If mutations fall within overlapping primer windows, design sequential reactions to avoid excessive primer reuse.
162
+ If mutations fall within overlapping primer windows, design sequential reactions.
162
163
 
163
164
  ### Mutation caller (no UMIs)
164
165
 
@@ -175,7 +176,7 @@ If mutations fall within overlapping primer windows, design sequential reactions
175
176
  --output-dir results/mutation_caller/ \
176
177
  --threshold 10
177
178
  ```
178
- 3. Outputs: per-sample subdirectories with substitution summaries, co-occurrence matrices, and logs.
179
+ 3. Outputs: per-sample subdirectories with substitution summaries, co-occurrence matrices, and logs. Co-occurence matrices are experimental and are not yet to be relied on.
179
180
 
180
181
  ### UMI Hunter
181
182
 
@@ -188,7 +189,12 @@ If mutations fall within overlapping primer windows, design sequential reactions
188
189
  --fastq data/umi_hunter/*.fastq.gz \
189
190
  --output-dir results/umi_hunter/
190
191
  ```
191
- - Tunable parameters include `--umi-identity-threshold` and `--consensus-mutation-threshold`.
192
+ - Tunable parameters include `--umi-identity-threshold`, `--consensus-mutation-threshold`, and `--min-cluster-size`.
193
+ - `--umi-identity-threshold` (0–1) controls how similar two UMIs must be to fall into the same cluster.
194
+ - `--consensus-mutation-threshold` (0–1) is the fraction of reads within a cluster that must agree on a base before it is written into the consensus sequence.
195
+ - `--min-cluster-size` sets the minimum number of reads required in a cluster before a consensus is generated (smaller clusters remain listed in the raw UMI CSV but no consensus FASTA is produced).
196
+
197
+ Please be aware, this toolkit will not scale well beyond around 50k reads/sample. See UMIC-seq pipelines for efficient UMI-gene dictionary generation.
192
198
 
193
199
  ### Profile inserts
194
200
 
@@ -229,7 +235,7 @@ python -m uht_tooling.workflows.gui
229
235
  ```
230
236
 
231
237
  Key points:
232
- - The server binds to `http://127.0.0.1:7860` by default and falls back to an available port if 7860 is busy. Copy http://127.0.0.1:7860 into your browser.
238
+ - The server binds to `http://127.0.0.1:7860` by default and falls back to an available port if 7860 is busy. Copy http://127.0.0.1:7860 into your browser to interface with the GUI.
233
239
  - Temporary working directories are created under the system temp folder and cleaned automatically.
234
240
  - Output archives (ZIP files) mirror the directory structure produced by the CLI.
235
241
 
@@ -238,9 +244,9 @@ Key points:
238
244
  1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
239
245
  2. **SLIM** – template/context FASTA text areas plus mutation list.
240
246
  3. **Gibson** – multi-mutation support using `+` syntax.
241
- 4. **Mutation Caller** – upload FASTQ, template FASTA, and configuration CSV.
242
- 5. **UMI Hunter** – long-read UMI clustering with configurable thresholds.
243
- 6. **Profile Inserts** – probe CSV and multiple FASTQ uploads.
247
+ 4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
248
+ 5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
249
+ 6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
244
250
  7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
245
251
 
246
252
  ### Workflow tips
@@ -1,6 +1,6 @@
1
1
  # uht-tooling
2
2
 
3
- Automation helpers for ultra-high-throughput molecular biology workflows. The package ships both a Typer-based CLI and an optional Gradio GUI that wrap the same workflow code paths.
3
+ Automation helpers for ultra-high-throughput molecular biology workflows. The package ships both a CLI and an optional GUI that wrap the same workflow code paths.
4
4
 
5
5
  ---
6
6
 
@@ -8,19 +8,20 @@ Automation helpers for ultra-high-throughput molecular biology workflows. The pa
8
8
 
9
9
  ### Quick install (recommended, easiest file maintainance)
10
10
  ```bash
11
- python -m pip install "uht-tooling[gui]"
11
+ pip install "uht-tooling[gui]==0.1.3"
12
+
12
13
  ```
13
14
 
14
15
  This installs the core workflows plus the optional GUI dependencies (Gradio, pandas). Omit the `[gui]` extras if you only need the CLI:
15
16
 
16
17
  ```bash
17
- python -m pip install uht-tooling
18
+ pip install uht-tooling
18
19
  ```
19
20
 
20
21
  ### Development install
21
22
  ```bash
22
- git clone https://github.com/Matt115A/uht-tooling.git
23
- cd uht-tooling
23
+ git clone https://github.com/Matt115A/uht-tooling-packaged.git
24
+ cd uht-tooling-packaged
24
25
  python -m pip install -e ".[gui,dev]"
25
26
  ```
26
27
 
@@ -30,7 +31,7 @@ The editable install exposes the latest sources, while the `dev` extras add lint
30
31
 
31
32
  ## Directory layout
32
33
 
33
- - Reference inputs live under `data/<workflow>/`.
34
+ - Reference inputs can be found anywhere (you specify in the cli), but we recommend using `data/<workflow>/`.
34
35
  - Outputs (CSV, FASTA, plots, logs) are written to `results/<workflow>/`.
35
36
  - All workflows log to `results/<workflow>/run.log` for reproducibility and debugging.
36
37
 
@@ -52,9 +53,9 @@ Each command mirrors a workflow module. Common entry points:
52
53
  | `uht-tooling design-slim` | Design SLIM mutagenesis primers from FASTA/CSV inputs. |
53
54
  | `uht-tooling design-gibson` | Produce Gibson mutagenesis primers and assembly plans. |
54
55
  | `uht-tooling mutation-caller` | Summarise amino-acid substitutions from long-read FASTQ files. |
55
- | `uht-tooling umi-hunter` | Cluster UMIs and call consensus alleles. |
56
- | `uht-tooling ep-library-profile` | Measure mutation rates without UMIs. |
57
- | `uht-tooling profile-inserts` | Extract inserts defined by probe pairs. |
56
+ | `uht-tooling umi-hunter` | Cluster UMIs and call consensus genes. |
57
+ | `uht-tooling ep-library-profile` | Measure mutation rates in plasmid libraries without UMIs. |
58
+ | `uht-tooling profile-inserts` | Extract and analyse inserts defined by flanking probe pairs. |
58
59
 
59
60
  Each command provides detailed help, including option descriptions and expected file formats:
60
61
 
@@ -80,13 +81,13 @@ You can pass multiple FASTQ paths using repeated `--fastq` options or glob patte
80
81
  ```
81
82
  4. Primer CSVs will be written to `results/nextera_designer/`, accompanied by a log file.
82
83
 
83
- The helper is preloaded with twelve i5 and twelve i7 indices, enabling up to 144 unique amplicons. Downstream lab workflow suggestions (qPCR monitoring, SPRIselect cleanup) remain unchanged from earlier releases.
84
+ The helper is preloaded with twelve i5 and twelve i7 indices, enabling up to 144 unique amplicons.
84
85
 
85
86
  #### Wet-lab workflow notes
86
87
 
87
88
  - Perform the initial amplification with an i5/i7 primer pair and monitor a small aliquot by qPCR. Cap thermocycling early so you only generate ~10% of the theoretical yield—this minimizes amplification bias.
88
89
  - Purify the product with SPRIselect beads at approximately a 0.65:1 bead:DNA volume ratio to remove residual primers and short fragments.
89
- - Confirm primer removal using electrophoresis (e.g., BioAnalyzer DNA chip) before moving to sequencing prep.
90
+ - Confirm primer removal and quantify DNA using electrophoresis (e.g., BioAnalyzer DNA chip) before moving to the flow cell.
90
91
 
91
92
  ### SLIM primer design
92
93
 
@@ -131,7 +132,7 @@ Mutation nomenclature examples:
131
132
  ```
132
133
  - Outputs include primer sets and an assembly-plan CSV.
133
134
 
134
- If mutations fall within overlapping primer windows, design sequential reactions to avoid excessive primer reuse.
135
+ If mutations fall within overlapping primer windows, design sequential reactions.
135
136
 
136
137
  ### Mutation caller (no UMIs)
137
138
 
@@ -148,7 +149,7 @@ If mutations fall within overlapping primer windows, design sequential reactions
148
149
  --output-dir results/mutation_caller/ \
149
150
  --threshold 10
150
151
  ```
151
- 3. Outputs: per-sample subdirectories with substitution summaries, co-occurrence matrices, and logs.
152
+ 3. Outputs: per-sample subdirectories with substitution summaries, co-occurrence matrices, and logs. Co-occurence matrices are experimental and are not yet to be relied on.
152
153
 
153
154
  ### UMI Hunter
154
155
 
@@ -161,7 +162,12 @@ If mutations fall within overlapping primer windows, design sequential reactions
161
162
  --fastq data/umi_hunter/*.fastq.gz \
162
163
  --output-dir results/umi_hunter/
163
164
  ```
164
- - Tunable parameters include `--umi-identity-threshold` and `--consensus-mutation-threshold`.
165
+ - Tunable parameters include `--umi-identity-threshold`, `--consensus-mutation-threshold`, and `--min-cluster-size`.
166
+ - `--umi-identity-threshold` (0–1) controls how similar two UMIs must be to fall into the same cluster.
167
+ - `--consensus-mutation-threshold` (0–1) is the fraction of reads within a cluster that must agree on a base before it is written into the consensus sequence.
168
+ - `--min-cluster-size` sets the minimum number of reads required in a cluster before a consensus is generated (smaller clusters remain listed in the raw UMI CSV but no consensus FASTA is produced).
169
+
170
+ Please be aware, this toolkit will not scale well beyond around 50k reads/sample. See UMIC-seq pipelines for efficient UMI-gene dictionary generation.
165
171
 
166
172
  ### Profile inserts
167
173
 
@@ -202,7 +208,7 @@ python -m uht_tooling.workflows.gui
202
208
  ```
203
209
 
204
210
  Key points:
205
- - The server binds to `http://127.0.0.1:7860` by default and falls back to an available port if 7860 is busy. Copy http://127.0.0.1:7860 into your browser.
211
+ - The server binds to `http://127.0.0.1:7860` by default and falls back to an available port if 7860 is busy. Copy http://127.0.0.1:7860 into your browser to interface with the GUI.
206
212
  - Temporary working directories are created under the system temp folder and cleaned automatically.
207
213
  - Output archives (ZIP files) mirror the directory structure produced by the CLI.
208
214
 
@@ -211,9 +217,9 @@ Key points:
211
217
  1. **Nextera XT** – forward/reverse primer inputs with CSV preview.
212
218
  2. **SLIM** – template/context FASTA text areas plus mutation list.
213
219
  3. **Gibson** – multi-mutation support using `+` syntax.
214
- 4. **Mutation Caller** – upload FASTQ, template FASTA, and configuration CSV.
215
- 5. **UMI Hunter** – long-read UMI clustering with configurable thresholds.
216
- 6. **Profile Inserts** – probe CSV and multiple FASTQ uploads.
220
+ 4. **Mutation Caller** – upload FASTQ and template FASTA, then enter flanks and gene length bounds inline.
221
+ 5. **UMI Hunter** – long-read UMI clustering with flank entry, UMI length bounds, mutation threshold, and minimum cluster size.
222
+ 6. **Profile Inserts** – interactive probe table plus multiple FASTQ uploads with adjustable fuzzy-match ratio.
217
223
  7. **EP Library Profile** – FASTQ uploads plus plasmid and region FASTA inputs.
218
224
 
219
225
  ### Workflow tips
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "uht-tooling"
7
- version = "0.1.2"
7
+ version = "0.1.4"
8
8
  description = "Tooling for ultra-high throughput screening workflows."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -18,6 +18,7 @@ from uht_tooling.workflows.umi_hunter import (
18
18
  expand_fastq_inputs as expand_fastq_inputs_umi,
19
19
  run_umi_hunter,
20
20
  )
21
+ from uht_tooling.workflows.gui import launch_gui
21
22
 
22
23
  app = typer.Typer(help="Command-line interface for the uht-tooling package.")
23
24
 
@@ -232,6 +233,11 @@ def umi_hunter_command(
232
233
  max=1.0,
233
234
  help="Mutation threshold for consensus calling (default: 0.7).",
234
235
  ),
236
+ min_cluster_size: int = typer.Option(
237
+ 1,
238
+ min=1,
239
+ help="Minimum number of reads required in a UMI cluster before a consensus is generated.",
240
+ ),
235
241
  log_path: Optional[Path] = typer.Option(
236
242
  None,
237
243
  dir_okay=False,
@@ -248,6 +254,7 @@ def umi_hunter_command(
248
254
  output_dir=output_dir,
249
255
  umi_identity_threshold=umi_identity_threshold,
250
256
  consensus_mutation_threshold=consensus_mutation_threshold,
257
+ min_cluster_size=min_cluster_size,
251
258
  log_path=log_path,
252
259
  )
253
260
  if not results:
@@ -255,7 +262,12 @@ def umi_hunter_command(
255
262
  else:
256
263
  typer.echo("UMI hunter outputs:")
257
264
  for entry in results:
258
- typer.echo(f" Sample {entry['sample']}: {entry['directory']}")
265
+ total_clusters = entry.get("clusters_total", entry.get("clusters", 0))
266
+ typer.echo(
267
+ f" Sample {entry['sample']}: "
268
+ f"{entry.get('clusters', 0)} consensus clusters "
269
+ f"(from {total_clusters} total) → {entry['directory']}"
270
+ )
259
271
 
260
272
 
261
273
  @app.command("ep-library-profile", help="Profile mutation rates for ep-library sequencing data.")
@@ -355,9 +367,34 @@ def profile_inserts_command(
355
367
  typer.echo(f" Sample {entry['sample']}: {entry['directory']}")
356
368
 
357
369
 
358
- @app.command("gui", help="Launch the graphical interface (currently under refactor).")
359
- def gui_command():
360
- raise NotImplementedError("The GUI is being updated to work with user-specified data directories.")
370
+ @app.command("gui", help="Launch the graphical interface.")
371
+ def gui_command(
372
+ server_name: str = typer.Option(
373
+ "127.0.0.1",
374
+ "--server-name",
375
+ "-n",
376
+ help="Hostname or IP address to bind the GUI server.",
377
+ ),
378
+ server_port: Optional[int] = typer.Option(
379
+ 7860,
380
+ "--server-port",
381
+ "-p",
382
+ help="Preferred port for the GUI (falls back automatically if unavailable).",
383
+ ),
384
+ share: bool = typer.Option(
385
+ False,
386
+ "--share",
387
+ help="Enable Gradio's public sharing tunnel (requires network access).",
388
+ ),
389
+ ):
390
+ """Launch the Gradio GUI."""
391
+ try:
392
+ launch_gui(server_name=server_name, server_port=server_port, share=share)
393
+ except KeyboardInterrupt:
394
+ typer.echo("GUI stopped by user.")
395
+ except Exception as exc:
396
+ typer.echo(f"Failed to start GUI: {exc}")
397
+ raise typer.Exit(1)
361
398
 
362
399
 
363
400
  def main():