uht-tooling 0.1.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
uht_tooling/cli.py CHANGED
@@ -3,6 +3,8 @@ from typing import Optional
3
3
 
4
4
  import typer
5
5
 
6
+ from uht_tooling.config import get_option, load_config
7
+ from uht_tooling.tools import ToolNotFoundError, validate_workflow_tools
6
8
  from uht_tooling.workflows.design_gibson import run_design_gibson
7
9
  from uht_tooling.workflows.design_kld import run_design_kld
8
10
  from uht_tooling.workflows.design_slim import run_design_slim
@@ -28,29 +30,57 @@ from uht_tooling.workflows.gui import launch_gui
28
30
  app = typer.Typer(help="Command-line interface for the uht-tooling package.")
29
31
 
30
32
 
33
+ @app.callback()
34
+ def main_callback(
35
+ ctx: typer.Context,
36
+ config: Optional[Path] = typer.Option(
37
+ None,
38
+ "--config",
39
+ "-K",
40
+ exists=True,
41
+ readable=True,
42
+ help="Path to YAML configuration file for default options.",
43
+ ),
44
+ ):
45
+ """Global callback to load configuration file."""
46
+ ctx.ensure_object(dict)
47
+ ctx.obj["config"] = load_config(config)
48
+
49
+
31
50
  @app.command("design-slim", help="Design SLIM primers from user-specified FASTA/CSV inputs.")
32
51
  def design_slim_command(
33
- gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
52
+ ctx: typer.Context,
53
+ gene_fasta: Path = typer.Option(
54
+ ..., "--gene-fasta", "-g", exists=True, readable=True, help="Path to the gene FASTA file."
55
+ ),
34
56
  context_fasta: Path = typer.Option(
35
57
  ...,
58
+ "--context-fasta",
59
+ "-c",
36
60
  exists=True,
37
61
  readable=True,
38
62
  help="Path to the context FASTA file containing the plasmid or genomic sequence.",
39
63
  ),
40
64
  mutations_csv: Path = typer.Option(
41
65
  ...,
66
+ "--mutations-csv",
67
+ "-m",
42
68
  exists=True,
43
69
  readable=True,
44
70
  help="CSV file containing a 'mutations' column with the desired edits.",
45
71
  ),
46
72
  output_dir: Path = typer.Option(
47
73
  ...,
74
+ "--output-dir",
75
+ "-o",
48
76
  dir_okay=True,
49
77
  writable=True,
50
78
  help="Directory where results will be written.",
51
79
  ),
52
80
  log_path: Optional[Path] = typer.Option(
53
81
  None,
82
+ "--log-path",
83
+ "-l",
54
84
  dir_okay=False,
55
85
  writable=True,
56
86
  help="Optional path to write a dedicated log file for this run.",
@@ -69,27 +99,38 @@ def design_slim_command(
69
99
 
70
100
  @app.command("design-kld", help="Design KLD (inverse PCR) primers from user-specified FASTA/CSV inputs.")
71
101
  def design_kld_command(
72
- gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
102
+ ctx: typer.Context,
103
+ gene_fasta: Path = typer.Option(
104
+ ..., "--gene-fasta", "-g", exists=True, readable=True, help="Path to the gene FASTA file."
105
+ ),
73
106
  context_fasta: Path = typer.Option(
74
107
  ...,
108
+ "--context-fasta",
109
+ "-c",
75
110
  exists=True,
76
111
  readable=True,
77
112
  help="Path to the context FASTA file containing the plasmid or genomic sequence.",
78
113
  ),
79
114
  mutations_csv: Path = typer.Option(
80
115
  ...,
116
+ "--mutations-csv",
117
+ "-m",
81
118
  exists=True,
82
119
  readable=True,
83
120
  help="CSV file containing a 'mutations' column with the desired edits.",
84
121
  ),
85
122
  output_dir: Path = typer.Option(
86
123
  ...,
124
+ "--output-dir",
125
+ "-o",
87
126
  dir_okay=True,
88
127
  writable=True,
89
128
  help="Directory where results will be written.",
90
129
  ),
91
130
  log_path: Optional[Path] = typer.Option(
92
131
  None,
132
+ "--log-path",
133
+ "-l",
93
134
  dir_okay=False,
94
135
  writable=True,
95
136
  help="Optional path to write a dedicated log file for this run.",
@@ -108,26 +149,34 @@ def design_kld_command(
108
149
 
109
150
  @app.command("nextera-primers", help="Generate Nextera XT primers from binding region CSV input.")
110
151
  def nextera_primers_command(
152
+ ctx: typer.Context,
111
153
  binding_csv: Path = typer.Option(
112
154
  ...,
155
+ "--binding-csv",
156
+ "-b",
113
157
  exists=True,
114
158
  readable=True,
115
159
  help="CSV file with a 'binding_region' column; first row is i7, second row is i5.",
116
160
  ),
117
161
  output_csv: Path = typer.Option(
118
162
  ...,
163
+ "--output-csv",
164
+ "-o",
119
165
  dir_okay=False,
120
166
  writable=True,
121
167
  help="Path to write the generated primer CSV.",
122
168
  ),
123
169
  log_path: Optional[Path] = typer.Option(
124
170
  None,
171
+ "--log-path",
172
+ "-l",
125
173
  dir_okay=False,
126
174
  writable=True,
127
175
  help="Optional path to write a dedicated log file.",
128
176
  ),
129
177
  config: Optional[Path] = typer.Option(
130
178
  None,
179
+ "--nextera-config",
131
180
  exists=True,
132
181
  readable=True,
133
182
  help="Optional YAML file providing overrides for indexes/prefixes/suffixes.",
@@ -145,27 +194,38 @@ def nextera_primers_command(
145
194
 
146
195
  @app.command("design-gibson", help="Design Gibson assembly primers and assembly plans.")
147
196
  def design_gibson_command(
148
- gene_fasta: Path = typer.Option(..., exists=True, readable=True, help="Path to the gene FASTA file."),
197
+ ctx: typer.Context,
198
+ gene_fasta: Path = typer.Option(
199
+ ..., "--gene-fasta", "-g", exists=True, readable=True, help="Path to the gene FASTA file."
200
+ ),
149
201
  context_fasta: Path = typer.Option(
150
202
  ...,
203
+ "--context-fasta",
204
+ "-c",
151
205
  exists=True,
152
206
  readable=True,
153
207
  help="Path to the circular context FASTA file.",
154
208
  ),
155
209
  mutations_csv: Path = typer.Option(
156
210
  ...,
211
+ "--mutations-csv",
212
+ "-m",
157
213
  exists=True,
158
214
  readable=True,
159
215
  help="CSV file with a 'mutations' column (use '+' to link sub-mutations).",
160
216
  ),
161
217
  output_dir: Path = typer.Option(
162
218
  ...,
219
+ "--output-dir",
220
+ "-o",
163
221
  dir_okay=True,
164
222
  writable=True,
165
223
  help="Directory where primer and assembly plan CSVs will be written.",
166
224
  ),
167
225
  log_path: Optional[Path] = typer.Option(
168
226
  None,
227
+ "--log-path",
228
+ "-l",
169
229
  dir_okay=False,
170
230
  writable=True,
171
231
  help="Optional path for a dedicated log file.",
@@ -189,41 +249,65 @@ def design_gibson_command(
189
249
  help="Identify amino-acid substitutions from long-read data without UMIs.",
190
250
  )
191
251
  def mutation_caller_command(
252
+ ctx: typer.Context,
192
253
  template_fasta: Path = typer.Option(
193
254
  ...,
255
+ "--template-fasta",
256
+ "-t",
194
257
  exists=True,
195
258
  readable=True,
196
259
  help="FASTA file containing the mutation caller template sequence.",
197
260
  ),
198
261
  flanks_csv: Path = typer.Option(
199
262
  ...,
263
+ "--flanks-csv",
264
+ "-f",
200
265
  exists=True,
201
266
  readable=True,
202
267
  help="CSV file describing gene flanks and min/max lengths.",
203
268
  ),
204
269
  fastq: list[str] = typer.Option(
205
270
  ...,
271
+ "--fastq",
272
+ "-q",
206
273
  help="One or more FASTQ(.gz) paths or glob patterns (provide multiple --fastq options as needed).",
207
274
  ),
208
275
  output_dir: Path = typer.Option(
209
276
  ...,
277
+ "--output-dir",
278
+ "-o",
210
279
  dir_okay=True,
211
280
  writable=True,
212
281
  help="Directory where per-sample outputs will be written.",
213
282
  ),
214
283
  threshold: int = typer.Option(
215
284
  10,
285
+ "--threshold",
286
+ "-T",
216
287
  min=1,
217
288
  help="Minimum AA substitution count to include in the frequent-substitution report.",
218
289
  ),
219
290
  log_path: Optional[Path] = typer.Option(
220
291
  None,
292
+ "--log-path",
293
+ "-l",
221
294
  dir_okay=False,
222
295
  writable=True,
223
296
  help="Optional path to write a dedicated log file.",
224
297
  ),
225
298
  ):
226
299
  """Identify and summarise amino-acid substitutions."""
300
+ # Validate required external tools
301
+ try:
302
+ validate_workflow_tools("mutation_caller")
303
+ except ToolNotFoundError as e:
304
+ typer.echo(f"Error: {e}", err=True)
305
+ raise typer.Exit(1)
306
+
307
+ # Apply config defaults
308
+ config = ctx.obj.get("config", {}) if ctx.obj else {}
309
+ threshold = get_option(config, "threshold", threshold, default=10, workflow="mutation_caller")
310
+
227
311
  fastq_files = expand_fastq_inputs_mutation(fastq)
228
312
  results = run_mutation_caller(
229
313
  template_fasta=template_fasta,
@@ -243,53 +327,89 @@ def mutation_caller_command(
243
327
 
244
328
  @app.command("umi-hunter", help="Cluster UMIs and produce consensus genes from long-read data.")
245
329
  def umi_hunter_command(
330
+ ctx: typer.Context,
246
331
  template_fasta: Path = typer.Option(
247
332
  ...,
333
+ "--template-fasta",
334
+ "-t",
248
335
  exists=True,
249
336
  readable=True,
250
337
  help="Template FASTA file for consensus generation.",
251
338
  ),
252
339
  config_csv: Path = typer.Option(
253
340
  ...,
341
+ "--config-csv",
342
+ "-C",
254
343
  exists=True,
255
344
  readable=True,
256
345
  help="CSV describing UMI/gene flanks and length bounds.",
257
346
  ),
258
347
  fastq: list[str] = typer.Option(
259
348
  ...,
349
+ "--fastq",
350
+ "-q",
260
351
  help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
261
352
  ),
262
353
  output_dir: Path = typer.Option(
263
354
  ...,
355
+ "--output-dir",
356
+ "-o",
264
357
  dir_okay=True,
265
358
  writable=True,
266
359
  help="Directory where UMI hunter outputs will be stored.",
267
360
  ),
268
361
  umi_identity_threshold: float = typer.Option(
269
362
  0.9,
363
+ "--umi-identity-threshold",
364
+ "-u",
270
365
  min=0.0,
271
366
  max=1.0,
272
367
  help="UMI clustering identity threshold (default: 0.9).",
273
368
  ),
274
369
  consensus_mutation_threshold: float = typer.Option(
275
370
  0.7,
371
+ "--consensus-mutation-threshold",
372
+ "-M",
276
373
  min=0.0,
277
374
  max=1.0,
278
375
  help="Mutation threshold for consensus calling (default: 0.7).",
279
376
  ),
280
377
  min_cluster_size: int = typer.Option(
281
378
  1,
379
+ "--min-cluster-size",
380
+ "-s",
282
381
  min=1,
283
382
  help="Minimum number of reads required in a UMI cluster before a consensus is generated.",
284
383
  ),
285
384
  log_path: Optional[Path] = typer.Option(
286
385
  None,
386
+ "--log-path",
387
+ "-l",
287
388
  dir_okay=False,
288
389
  writable=True,
289
390
  help="Optional path to write a dedicated log file.",
290
391
  ),
291
392
  ):
292
393
  """Cluster UMIs and generate consensus sequences from long-read FASTQ data."""
394
+ # Validate required external tools
395
+ try:
396
+ validate_workflow_tools("umi_hunter")
397
+ except ToolNotFoundError as e:
398
+ typer.echo(f"Error: {e}", err=True)
399
+ raise typer.Exit(1)
400
+
401
+ # Apply config defaults
402
+ config = ctx.obj.get("config", {}) if ctx.obj else {}
403
+ umi_identity_threshold = get_option(
404
+ config, "umi_identity_threshold", umi_identity_threshold, default=0.9, workflow="umi_hunter"
405
+ )
406
+ consensus_mutation_threshold = get_option(
407
+ config, "consensus_mutation_threshold", consensus_mutation_threshold, default=0.7, workflow="umi_hunter"
408
+ )
409
+ min_cluster_size = get_option(
410
+ config, "min_cluster_size", min_cluster_size, default=1, workflow="umi_hunter"
411
+ )
412
+
293
413
  fastq_files = expand_fastq_inputs_umi(fastq)
294
414
  results = run_umi_hunter(
295
415
  template_fasta=template_fasta,
@@ -310,42 +430,60 @@ def umi_hunter_command(
310
430
  typer.echo(
311
431
  f" Sample {entry['sample']}: "
312
432
  f"{entry.get('clusters', 0)} consensus clusters "
313
- f"(from {total_clusters} total) {entry['directory']}"
433
+ f"(from {total_clusters} total) -> {entry['directory']}"
314
434
  )
315
435
 
316
436
 
317
437
  @app.command("ep-library-profile", help="Profile mutation rates for ep-library sequencing data.")
318
438
  def ep_library_profile_command(
439
+ ctx: typer.Context,
319
440
  region_fasta: Path = typer.Option(
320
441
  ...,
442
+ "--region-fasta",
443
+ "-R",
321
444
  exists=True,
322
445
  readable=True,
323
446
  help="FASTA file describing the region of interest.",
324
447
  ),
325
448
  plasmid_fasta: Path = typer.Option(
326
449
  ...,
450
+ "--plasmid-fasta",
451
+ "-p",
327
452
  exists=True,
328
453
  readable=True,
329
454
  help="FASTA file with the full plasmid sequence.",
330
455
  ),
331
456
  fastq: list[str] = typer.Option(
332
457
  ...,
458
+ "--fastq",
459
+ "-q",
333
460
  help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
334
461
  ),
335
462
  output_dir: Path = typer.Option(
336
463
  ...,
464
+ "--output-dir",
465
+ "-o",
337
466
  dir_okay=True,
338
467
  writable=True,
339
468
  help="Directory for per-sample outputs.",
340
469
  ),
341
470
  work_dir: Optional[Path] = typer.Option(
342
471
  None,
472
+ "--work-dir",
473
+ "-w",
343
474
  dir_okay=True,
344
475
  writable=True,
345
476
  help="Optional scratch directory for intermediate files (defaults to output/tmp).",
346
477
  ),
347
478
  ):
348
479
  """Quantify mutation rates for ep-library sequencing experiments."""
480
+ # Validate required external tools
481
+ try:
482
+ validate_workflow_tools("ep_library_profile")
483
+ except ToolNotFoundError as e:
484
+ typer.echo(f"Error: {e}", err=True)
485
+ raise typer.Exit(1)
486
+
349
487
  fastq_files = expand_fastq_inputs_ep(fastq)
350
488
  results = run_ep_library_profile(
351
489
  fastq_paths=fastq_files,
@@ -365,30 +503,41 @@ def ep_library_profile_command(
365
503
 
366
504
  @app.command("profile-inserts", help="Extract and profile inserts using probe pairs.")
367
505
  def profile_inserts_command(
506
+ ctx: typer.Context,
368
507
  probes_csv: Path = typer.Option(
369
508
  ...,
509
+ "--probes-csv",
510
+ "-P",
370
511
  exists=True,
371
512
  readable=True,
372
513
  help="CSV file containing upstream/downstream probes.",
373
514
  ),
374
515
  fastq: list[str] = typer.Option(
375
516
  ...,
517
+ "--fastq",
518
+ "-q",
376
519
  help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
377
520
  ),
378
521
  output_dir: Path = typer.Option(
379
522
  ...,
523
+ "--output-dir",
524
+ "-o",
380
525
  dir_okay=True,
381
526
  writable=True,
382
527
  help="Directory for per-sample outputs.",
383
528
  ),
384
529
  min_ratio: int = typer.Option(
385
530
  80,
531
+ "--min-ratio",
532
+ "-r",
386
533
  min=0,
387
534
  max=100,
388
535
  help="Minimum fuzzy match ratio for probe detection (default: 80).",
389
536
  ),
390
537
  log_path: Optional[Path] = typer.Option(
391
538
  None,
539
+ "--log-path",
540
+ "-l",
392
541
  dir_okay=False,
393
542
  writable=True,
394
543
  help="Optional path to write a dedicated log file.",
uht_tooling/config.py ADDED
@@ -0,0 +1,137 @@
1
+ """Global configuration file support."""
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Optional
5
+
6
+ try:
7
+ import yaml
8
+
9
+ HAVE_YAML = True
10
+ except ImportError:
11
+ HAVE_YAML = False
12
+
13
+
14
+ DEFAULT_CONFIG_PATHS = [
15
+ Path.home() / ".uht-tooling.yaml",
16
+ Path.home() / ".config" / "uht-tooling" / "config.yaml",
17
+ Path(".uht-tooling.yaml"),
18
+ ]
19
+
20
+
21
+ def find_config_file() -> Optional[Path]:
22
+ """
23
+ Find a configuration file from environment variable or default locations.
24
+
25
+ Search order:
26
+ 1. $UHT_TOOLING_CONFIG environment variable
27
+ 2. ~/.uht-tooling.yaml
28
+ 3. ~/.config/uht-tooling/config.yaml
29
+ 4. .uht-tooling.yaml (current directory)
30
+
31
+ Returns:
32
+ Path to the config file if found, None otherwise.
33
+ """
34
+ # Check environment variable first
35
+ env_path = os.environ.get("UHT_TOOLING_CONFIG")
36
+ if env_path:
37
+ path = Path(env_path)
38
+ if path.exists():
39
+ return path
40
+
41
+ # Check default locations
42
+ for path in DEFAULT_CONFIG_PATHS:
43
+ if path.exists():
44
+ return path
45
+
46
+ return None
47
+
48
+
49
+ def load_config(config_path: Optional[Path] = None) -> Dict[str, Any]:
50
+ """
51
+ Load YAML configuration, auto-discovering if path not provided.
52
+
53
+ Args:
54
+ config_path: Explicit path to config file. If None, auto-discover.
55
+
56
+ Returns:
57
+ Dictionary containing configuration. Empty dict if no config found
58
+ or if YAML is not available.
59
+ """
60
+ if not HAVE_YAML:
61
+ return {}
62
+
63
+ if config_path is None:
64
+ config_path = find_config_file()
65
+
66
+ if config_path is not None:
67
+ config_path = Path(config_path)
68
+
69
+ if config_path is None or not config_path.exists():
70
+ return {}
71
+
72
+ try:
73
+ with open(config_path, "r", encoding="utf-8") as f:
74
+ config = yaml.safe_load(f)
75
+ return config if isinstance(config, dict) else {}
76
+ except Exception:
77
+ return {}
78
+
79
+
80
+ def get_option(
81
+ config: Dict[str, Any],
82
+ key: str,
83
+ cli_value: Any,
84
+ default: Any = None,
85
+ workflow: Optional[str] = None,
86
+ ) -> Any:
87
+ """
88
+ Get an option with precedence: CLI > workflow-specific config > global config > default.
89
+
90
+ Args:
91
+ config: Configuration dictionary from load_config().
92
+ key: The option key to look up.
93
+ cli_value: Value from CLI (takes precedence if not None).
94
+ default: Default value if not found anywhere.
95
+ workflow: Optional workflow name for workflow-specific defaults.
96
+
97
+ Returns:
98
+ The resolved option value.
99
+ """
100
+ # CLI value always takes precedence if explicitly provided
101
+ if cli_value is not None:
102
+ return cli_value
103
+
104
+ # Check workflow-specific defaults
105
+ if workflow:
106
+ workflow_defaults = config.get("defaults", {}).get(workflow, {})
107
+ if key in workflow_defaults:
108
+ return workflow_defaults[key]
109
+
110
+ # Check global paths config
111
+ paths_config = config.get("paths", {})
112
+ if key in paths_config:
113
+ value = paths_config[key]
114
+ # Expand ~ in paths
115
+ if isinstance(value, str):
116
+ return os.path.expanduser(value)
117
+ return value
118
+
119
+ # Check top-level config
120
+ if key in config:
121
+ return config[key]
122
+
123
+ return default
124
+
125
+
126
+ def get_workflow_defaults(config: Dict[str, Any], workflow: str) -> Dict[str, Any]:
127
+ """
128
+ Get all default values for a specific workflow.
129
+
130
+ Args:
131
+ config: Configuration dictionary from load_config().
132
+ workflow: Workflow name (e.g., "mutation_caller", "umi_hunter").
133
+
134
+ Returns:
135
+ Dictionary of default values for the workflow.
136
+ """
137
+ return config.get("defaults", {}).get(workflow, {})