uht-tooling 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ """Core package initialization for uht_tooling."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ __all__ = ["__version__"]
6
+
7
+ try:
8
+ __version__ = version("uht-tooling")
9
+ except PackageNotFoundError:
10
+ __version__ = "0.0.0"
uht_tooling/cli.py ADDED
@@ -0,0 +1,368 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import typer
5
+
6
+ from uht_tooling.workflows.design_gibson import run_design_gibson
7
+ from uht_tooling.workflows.design_slim import run_design_slim
8
+ from uht_tooling.workflows.mutation_caller import (
9
+ expand_fastq_inputs as expand_fastq_inputs_mutation,
10
+ run_mutation_caller,
11
+ )
12
+ from uht_tooling.workflows.nextera_designer import run_nextera_primer_design
13
+ from uht_tooling.workflows.profile_inserts import (
14
+ expand_fastq_inputs as expand_fastq_inputs_profile,
15
+ run_profile_inserts,
16
+ )
17
+ from uht_tooling.workflows.umi_hunter import (
18
+ expand_fastq_inputs as expand_fastq_inputs_umi,
19
+ run_umi_hunter,
20
+ )
21
+
22
+ app = typer.Typer(help="Command-line interface for the uht-tooling package.")
23
+
24
+
25
+ @app.command("design-slim", help="Design SLIM primers from user-specified FASTA/CSV inputs.")
26
+ def design_slim_command(
27
+ gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
28
+ context_fasta: Path = typer.Option(
29
+ ...,
30
+ exists=True,
31
+ readable=True,
32
+ help="Path to the context FASTA file containing the plasmid or genomic sequence.",
33
+ ),
34
+ mutations_csv: Path = typer.Option(
35
+ ...,
36
+ exists=True,
37
+ readable=True,
38
+ help="CSV file containing a 'mutations' column with the desired edits.",
39
+ ),
40
+ output_dir: Path = typer.Option(
41
+ ...,
42
+ dir_okay=True,
43
+ writable=True,
44
+ help="Directory where results will be written.",
45
+ ),
46
+ log_path: Optional[Path] = typer.Option(
47
+ None,
48
+ dir_okay=False,
49
+ writable=True,
50
+ help="Optional path to write a dedicated log file for this run.",
51
+ ),
52
+ ):
53
+ """Design SLIM primers from user-provided inputs."""
54
+ run_design_slim(
55
+ gene_fasta=gene_fasta,
56
+ context_fasta=context_fasta,
57
+ mutations_csv=mutations_csv,
58
+ output_dir=output_dir,
59
+ log_path=log_path,
60
+ )
61
+ typer.echo(f"SLIM primers written to {output_dir / 'SLIM_primers.csv'}")
62
+
63
+
64
+ @app.command("nextera-primers", help="Generate Nextera XT primers from binding region CSV input.")
65
+ def nextera_primers_command(
66
+ binding_csv: Path = typer.Option(
67
+ ...,
68
+ exists=True,
69
+ readable=True,
70
+ help="CSV file with a 'binding_region' column; first row is i7, second row is i5.",
71
+ ),
72
+ output_csv: Path = typer.Option(
73
+ ...,
74
+ dir_okay=False,
75
+ writable=True,
76
+ help="Path to write the generated primer CSV.",
77
+ ),
78
+ log_path: Optional[Path] = typer.Option(
79
+ None,
80
+ dir_okay=False,
81
+ writable=True,
82
+ help="Optional path to write a dedicated log file.",
83
+ ),
84
+ config: Optional[Path] = typer.Option(
85
+ None,
86
+ exists=True,
87
+ readable=True,
88
+ help="Optional YAML file providing overrides for indexes/prefixes/suffixes.",
89
+ ),
90
+ ):
91
+ """Generate Nextera XT primers from user-supplied binding regions."""
92
+ result_path = run_nextera_primer_design(
93
+ binding_csv=binding_csv,
94
+ output_csv=output_csv,
95
+ log_path=log_path,
96
+ config_path=config,
97
+ )
98
+ typer.echo(f"Nextera primers written to {result_path}")
99
+
100
+
101
+ @app.command("design-gibson", help="Design Gibson assembly primers and assembly plans.")
102
+ def design_gibson_command(
103
+ gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
104
+ context_fasta: Path = typer.Option(
105
+ ...,
106
+ exists=True,
107
+ readable=True,
108
+ help="Path to the circular context FASTA file.",
109
+ ),
110
+ mutations_csv: Path = typer.Option(
111
+ ...,
112
+ exists=True,
113
+ readable=True,
114
+ help="CSV file with a 'mutations' column (use '+' to link sub-mutations).",
115
+ ),
116
+ output_dir: Path = typer.Option(
117
+ ...,
118
+ dir_okay=True,
119
+ writable=True,
120
+ help="Directory where primer and assembly plan CSVs will be written.",
121
+ ),
122
+ log_path: Optional[Path] = typer.Option(
123
+ None,
124
+ dir_okay=False,
125
+ writable=True,
126
+ help="Optional path for a dedicated log file.",
127
+ ),
128
+ ):
129
+ """Design Gibson assembly primers for user-defined mutations."""
130
+ outputs = run_design_gibson(
131
+ gene_fasta=gene_fasta,
132
+ context_fasta=context_fasta,
133
+ mutations_csv=mutations_csv,
134
+ output_dir=output_dir,
135
+ log_path=log_path,
136
+ )
137
+ typer.echo("Gibson outputs written:")
138
+ for name, path in outputs.items():
139
+ typer.echo(f" {name}: {path}")
140
+
141
+
142
+ @app.command(
143
+ "mutation-caller",
144
+ help="Identify amino-acid substitutions from long-read data without UMIs.",
145
+ )
146
+ def mutation_caller_command(
147
+ template_fasta: Path = typer.Option(
148
+ ...,
149
+ exists=True,
150
+ readable=True,
151
+ help="FASTA file containing the mutation caller template sequence.",
152
+ ),
153
+ flanks_csv: Path = typer.Option(
154
+ ...,
155
+ exists=True,
156
+ readable=True,
157
+ help="CSV file describing gene flanks and min/max lengths.",
158
+ ),
159
+ fastq: list[str] = typer.Option(
160
+ ...,
161
+ help="One or more FASTQ(.gz) paths or glob patterns (provide multiple --fastq options as needed).",
162
+ ),
163
+ output_dir: Path = typer.Option(
164
+ ...,
165
+ dir_okay=True,
166
+ writable=True,
167
+ help="Directory where per-sample outputs will be written.",
168
+ ),
169
+ threshold: int = typer.Option(
170
+ 10,
171
+ min=1,
172
+ help="Minimum AA substitution count to include in the frequent-substitution report.",
173
+ ),
174
+ log_path: Optional[Path] = typer.Option(
175
+ None,
176
+ dir_okay=False,
177
+ writable=True,
178
+ help="Optional path to write a dedicated log file.",
179
+ ),
180
+ ):
181
+ """Identify and summarise amino-acid substitutions."""
182
+ fastq_files = expand_fastq_inputs_mutation(fastq)
183
+ results = run_mutation_caller(
184
+ template_fasta=template_fasta,
185
+ flanks_csv=flanks_csv,
186
+ fastq_files=fastq_files,
187
+ output_dir=output_dir,
188
+ threshold=threshold,
189
+ log_path=log_path,
190
+ )
191
+ if not results:
192
+ typer.echo("No outputs were generated. Check inputs and threshold settings.")
193
+ else:
194
+ typer.echo("Mutation caller outputs:")
195
+ for entry in results:
196
+ typer.echo(f" Sample {entry['sample']}: {entry['directory']}")
197
+
198
+
199
+ @app.command("umi-hunter", help="Cluster UMIs and produce consensus genes from long-read data.")
200
+ def umi_hunter_command(
201
+ template_fasta: Path = typer.Option(
202
+ ...,
203
+ exists=True,
204
+ readable=True,
205
+ help="Template FASTA file for consensus generation.",
206
+ ),
207
+ config_csv: Path = typer.Option(
208
+ ...,
209
+ exists=True,
210
+ readable=True,
211
+ help="CSV describing UMI/gene flanks and length bounds.",
212
+ ),
213
+ fastq: list[str] = typer.Option(
214
+ ...,
215
+ help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
216
+ ),
217
+ output_dir: Path = typer.Option(
218
+ ...,
219
+ dir_okay=True,
220
+ writable=True,
221
+ help="Directory where UMI hunter outputs will be stored.",
222
+ ),
223
+ umi_identity_threshold: float = typer.Option(
224
+ 0.9,
225
+ min=0.0,
226
+ max=1.0,
227
+ help="UMI clustering identity threshold (default: 0.9).",
228
+ ),
229
+ consensus_mutation_threshold: float = typer.Option(
230
+ 0.7,
231
+ min=0.0,
232
+ max=1.0,
233
+ help="Mutation threshold for consensus calling (default: 0.7).",
234
+ ),
235
+ log_path: Optional[Path] = typer.Option(
236
+ None,
237
+ dir_okay=False,
238
+ writable=True,
239
+ help="Optional path to write a dedicated log file.",
240
+ ),
241
+ ):
242
+ """Cluster UMIs and generate consensus sequences from long-read FASTQ data."""
243
+ fastq_files = expand_fastq_inputs_umi(fastq)
244
+ results = run_umi_hunter(
245
+ template_fasta=template_fasta,
246
+ config_csv=config_csv,
247
+ fastq_files=fastq_files,
248
+ output_dir=output_dir,
249
+ umi_identity_threshold=umi_identity_threshold,
250
+ consensus_mutation_threshold=consensus_mutation_threshold,
251
+ log_path=log_path,
252
+ )
253
+ if not results:
254
+ typer.echo("No UMI hunter outputs generated.")
255
+ else:
256
+ typer.echo("UMI hunter outputs:")
257
+ for entry in results:
258
+ typer.echo(f" Sample {entry['sample']}: {entry['directory']}")
259
+
260
+
261
+ @app.command("ep-library-profile", help="Profile mutation rates for ep-library sequencing data.")
262
+ def ep_library_profile_command(
263
+ region_fasta: Path = typer.Option(
264
+ ...,
265
+ exists=True,
266
+ readable=True,
267
+ help="FASTA file describing the region of interest.",
268
+ ),
269
+ plasmid_fasta: Path = typer.Option(
270
+ ...,
271
+ exists=True,
272
+ readable=True,
273
+ help="FASTA file with the full plasmid sequence.",
274
+ ),
275
+ fastq: list[str] = typer.Option(
276
+ ...,
277
+ help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
278
+ ),
279
+ output_dir: Path = typer.Option(
280
+ ...,
281
+ dir_okay=True,
282
+ writable=True,
283
+ help="Directory for per-sample outputs.",
284
+ ),
285
+ work_dir: Optional[Path] = typer.Option(
286
+ None,
287
+ dir_okay=True,
288
+ writable=True,
289
+ help="Optional scratch directory for intermediate files (defaults to output/tmp).",
290
+ ),
291
+ ):
292
+ """Quantify mutation rates for ep-library sequencing experiments."""
293
+ fastq_files = expand_fastq_inputs_ep(fastq)
294
+ results = run_ep_library_profile(
295
+ fastq_paths=fastq_files,
296
+ region_fasta=region_fasta,
297
+ plasmid_fasta=plasmid_fasta,
298
+ output_dir=output_dir,
299
+ work_dir=work_dir,
300
+ )
301
+ samples = results.get("samples", [])
302
+ if not samples:
303
+ typer.echo("No ep-library profile outputs generated.")
304
+ else:
305
+ typer.echo(f"Master summary written to {results['master_summary']}")
306
+ for sample in samples:
307
+ typer.echo(f" Sample {sample['sample']}: {sample['results_dir']}")
308
+
309
+
310
+ @app.command("profile-inserts", help="Extract and profile inserts using probe pairs.")
311
+ def profile_inserts_command(
312
+ probes_csv: Path = typer.Option(
313
+ ...,
314
+ exists=True,
315
+ readable=True,
316
+ help="CSV file containing upstream/downstream probes.",
317
+ ),
318
+ fastq: list[str] = typer.Option(
319
+ ...,
320
+ help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
321
+ ),
322
+ output_dir: Path = typer.Option(
323
+ ...,
324
+ dir_okay=True,
325
+ writable=True,
326
+ help="Directory for per-sample outputs.",
327
+ ),
328
+ min_ratio: int = typer.Option(
329
+ 80,
330
+ min=0,
331
+ max=100,
332
+ help="Minimum fuzzy match ratio for probe detection (default: 80).",
333
+ ),
334
+ log_path: Optional[Path] = typer.Option(
335
+ None,
336
+ dir_okay=False,
337
+ writable=True,
338
+ help="Optional path to write a dedicated log file.",
339
+ ),
340
+ ):
341
+ """Profile inserts in FASTQ reads using probe pairs and produce QC outputs."""
342
+ fastq_files = expand_fastq_inputs_profile(fastq)
343
+ results = run_profile_inserts(
344
+ probes_csv=probes_csv,
345
+ fastq_files=fastq_files,
346
+ output_dir=output_dir,
347
+ min_ratio=min_ratio,
348
+ log_path=log_path,
349
+ )
350
+ if not results:
351
+ typer.echo("No profile inserts outputs generated.")
352
+ else:
353
+ typer.echo("Profile inserts outputs:")
354
+ for entry in results:
355
+ typer.echo(f" Sample {entry['sample']}: {entry['directory']}")
356
+
357
+
358
+ @app.command("gui", help="Launch the graphical interface (currently under refactor).")
359
+ def gui_command():
360
+ raise NotImplementedError("The GUI is being updated to work with user-specified data directories.")
361
+
362
+
363
+ def main():
364
+ app()
365
+
366
+
367
+ if __name__ == "__main__":
368
+ main()
File without changes
File without changes