uht-tooling 0.1.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uht_tooling/cli.py +153 -4
- uht_tooling/config.py +137 -0
- uht_tooling/tools.py +143 -0
- uht_tooling/workflows/gui.py +19 -0
- uht_tooling/workflows/mut_rate.py +484 -124
- uht_tooling/workflows/mutation_caller.py +11 -2
- uht_tooling/workflows/umi_hunter.py +9 -4
- {uht_tooling-0.1.9.dist-info → uht_tooling-0.3.0.dist-info}/METADATA +123 -5
- uht_tooling-0.3.0.dist-info/RECORD +20 -0
- uht_tooling-0.1.9.dist-info/RECORD +0 -18
- {uht_tooling-0.1.9.dist-info → uht_tooling-0.3.0.dist-info}/WHEEL +0 -0
- {uht_tooling-0.1.9.dist-info → uht_tooling-0.3.0.dist-info}/entry_points.txt +0 -0
- {uht_tooling-0.1.9.dist-info → uht_tooling-0.3.0.dist-info}/top_level.txt +0 -0
uht_tooling/cli.py
CHANGED
|
@@ -3,6 +3,8 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
6
|
+
from uht_tooling.config import get_option, load_config
|
|
7
|
+
from uht_tooling.tools import ToolNotFoundError, validate_workflow_tools
|
|
6
8
|
from uht_tooling.workflows.design_gibson import run_design_gibson
|
|
7
9
|
from uht_tooling.workflows.design_kld import run_design_kld
|
|
8
10
|
from uht_tooling.workflows.design_slim import run_design_slim
|
|
@@ -28,29 +30,57 @@ from uht_tooling.workflows.gui import launch_gui
|
|
|
28
30
|
app = typer.Typer(help="Command-line interface for the uht-tooling package.")
|
|
29
31
|
|
|
30
32
|
|
|
33
|
+
@app.callback()
|
|
34
|
+
def main_callback(
|
|
35
|
+
ctx: typer.Context,
|
|
36
|
+
config: Optional[Path] = typer.Option(
|
|
37
|
+
None,
|
|
38
|
+
"--config",
|
|
39
|
+
"-K",
|
|
40
|
+
exists=True,
|
|
41
|
+
readable=True,
|
|
42
|
+
help="Path to YAML configuration file for default options.",
|
|
43
|
+
),
|
|
44
|
+
):
|
|
45
|
+
"""Global callback to load configuration file."""
|
|
46
|
+
ctx.ensure_object(dict)
|
|
47
|
+
ctx.obj["config"] = load_config(config)
|
|
48
|
+
|
|
49
|
+
|
|
31
50
|
@app.command("design-slim", help="Design SLIM primers from user-specified FASTA/CSV inputs.")
|
|
32
51
|
def design_slim_command(
|
|
33
|
-
|
|
52
|
+
ctx: typer.Context,
|
|
53
|
+
gene_fasta: Path = typer.Option(
|
|
54
|
+
..., "--gene-fasta", "-g", exists=True, readable=True, help="Path to the gene FASTA file."
|
|
55
|
+
),
|
|
34
56
|
context_fasta: Path = typer.Option(
|
|
35
57
|
...,
|
|
58
|
+
"--context-fasta",
|
|
59
|
+
"-c",
|
|
36
60
|
exists=True,
|
|
37
61
|
readable=True,
|
|
38
62
|
help="Path to the context FASTA file containing the plasmid or genomic sequence.",
|
|
39
63
|
),
|
|
40
64
|
mutations_csv: Path = typer.Option(
|
|
41
65
|
...,
|
|
66
|
+
"--mutations-csv",
|
|
67
|
+
"-m",
|
|
42
68
|
exists=True,
|
|
43
69
|
readable=True,
|
|
44
70
|
help="CSV file containing a 'mutations' column with the desired edits.",
|
|
45
71
|
),
|
|
46
72
|
output_dir: Path = typer.Option(
|
|
47
73
|
...,
|
|
74
|
+
"--output-dir",
|
|
75
|
+
"-o",
|
|
48
76
|
dir_okay=True,
|
|
49
77
|
writable=True,
|
|
50
78
|
help="Directory where results will be written.",
|
|
51
79
|
),
|
|
52
80
|
log_path: Optional[Path] = typer.Option(
|
|
53
81
|
None,
|
|
82
|
+
"--log-path",
|
|
83
|
+
"-l",
|
|
54
84
|
dir_okay=False,
|
|
55
85
|
writable=True,
|
|
56
86
|
help="Optional path to write a dedicated log file for this run.",
|
|
@@ -69,27 +99,38 @@ def design_slim_command(
|
|
|
69
99
|
|
|
70
100
|
@app.command("design-kld", help="Design KLD (inverse PCR) primers from user-specified FASTA/CSV inputs.")
|
|
71
101
|
def design_kld_command(
|
|
72
|
-
|
|
102
|
+
ctx: typer.Context,
|
|
103
|
+
gene_fasta: Path = typer.Option(
|
|
104
|
+
..., "--gene-fasta", "-g", exists=True, readable=True, help="Path to the gene FASTA file."
|
|
105
|
+
),
|
|
73
106
|
context_fasta: Path = typer.Option(
|
|
74
107
|
...,
|
|
108
|
+
"--context-fasta",
|
|
109
|
+
"-c",
|
|
75
110
|
exists=True,
|
|
76
111
|
readable=True,
|
|
77
112
|
help="Path to the context FASTA file containing the plasmid or genomic sequence.",
|
|
78
113
|
),
|
|
79
114
|
mutations_csv: Path = typer.Option(
|
|
80
115
|
...,
|
|
116
|
+
"--mutations-csv",
|
|
117
|
+
"-m",
|
|
81
118
|
exists=True,
|
|
82
119
|
readable=True,
|
|
83
120
|
help="CSV file containing a 'mutations' column with the desired edits.",
|
|
84
121
|
),
|
|
85
122
|
output_dir: Path = typer.Option(
|
|
86
123
|
...,
|
|
124
|
+
"--output-dir",
|
|
125
|
+
"-o",
|
|
87
126
|
dir_okay=True,
|
|
88
127
|
writable=True,
|
|
89
128
|
help="Directory where results will be written.",
|
|
90
129
|
),
|
|
91
130
|
log_path: Optional[Path] = typer.Option(
|
|
92
131
|
None,
|
|
132
|
+
"--log-path",
|
|
133
|
+
"-l",
|
|
93
134
|
dir_okay=False,
|
|
94
135
|
writable=True,
|
|
95
136
|
help="Optional path to write a dedicated log file for this run.",
|
|
@@ -108,26 +149,34 @@ def design_kld_command(
|
|
|
108
149
|
|
|
109
150
|
@app.command("nextera-primers", help="Generate Nextera XT primers from binding region CSV input.")
|
|
110
151
|
def nextera_primers_command(
|
|
152
|
+
ctx: typer.Context,
|
|
111
153
|
binding_csv: Path = typer.Option(
|
|
112
154
|
...,
|
|
155
|
+
"--binding-csv",
|
|
156
|
+
"-b",
|
|
113
157
|
exists=True,
|
|
114
158
|
readable=True,
|
|
115
159
|
help="CSV file with a 'binding_region' column; first row is i7, second row is i5.",
|
|
116
160
|
),
|
|
117
161
|
output_csv: Path = typer.Option(
|
|
118
162
|
...,
|
|
163
|
+
"--output-csv",
|
|
164
|
+
"-o",
|
|
119
165
|
dir_okay=False,
|
|
120
166
|
writable=True,
|
|
121
167
|
help="Path to write the generated primer CSV.",
|
|
122
168
|
),
|
|
123
169
|
log_path: Optional[Path] = typer.Option(
|
|
124
170
|
None,
|
|
171
|
+
"--log-path",
|
|
172
|
+
"-l",
|
|
125
173
|
dir_okay=False,
|
|
126
174
|
writable=True,
|
|
127
175
|
help="Optional path to write a dedicated log file.",
|
|
128
176
|
),
|
|
129
177
|
config: Optional[Path] = typer.Option(
|
|
130
178
|
None,
|
|
179
|
+
"--nextera-config",
|
|
131
180
|
exists=True,
|
|
132
181
|
readable=True,
|
|
133
182
|
help="Optional YAML file providing overrides for indexes/prefixes/suffixes.",
|
|
@@ -145,27 +194,38 @@ def nextera_primers_command(
|
|
|
145
194
|
|
|
146
195
|
@app.command("design-gibson", help="Design Gibson assembly primers and assembly plans.")
|
|
147
196
|
def design_gibson_command(
|
|
148
|
-
|
|
197
|
+
ctx: typer.Context,
|
|
198
|
+
gene_fasta: Path = typer.Option(
|
|
199
|
+
..., "--gene-fasta", "-g", exists=True, readable=True, help="Path to the gene FASTA file."
|
|
200
|
+
),
|
|
149
201
|
context_fasta: Path = typer.Option(
|
|
150
202
|
...,
|
|
203
|
+
"--context-fasta",
|
|
204
|
+
"-c",
|
|
151
205
|
exists=True,
|
|
152
206
|
readable=True,
|
|
153
207
|
help="Path to the circular context FASTA file.",
|
|
154
208
|
),
|
|
155
209
|
mutations_csv: Path = typer.Option(
|
|
156
210
|
...,
|
|
211
|
+
"--mutations-csv",
|
|
212
|
+
"-m",
|
|
157
213
|
exists=True,
|
|
158
214
|
readable=True,
|
|
159
215
|
help="CSV file with a 'mutations' column (use '+' to link sub-mutations).",
|
|
160
216
|
),
|
|
161
217
|
output_dir: Path = typer.Option(
|
|
162
218
|
...,
|
|
219
|
+
"--output-dir",
|
|
220
|
+
"-o",
|
|
163
221
|
dir_okay=True,
|
|
164
222
|
writable=True,
|
|
165
223
|
help="Directory where primer and assembly plan CSVs will be written.",
|
|
166
224
|
),
|
|
167
225
|
log_path: Optional[Path] = typer.Option(
|
|
168
226
|
None,
|
|
227
|
+
"--log-path",
|
|
228
|
+
"-l",
|
|
169
229
|
dir_okay=False,
|
|
170
230
|
writable=True,
|
|
171
231
|
help="Optional path for a dedicated log file.",
|
|
@@ -189,41 +249,65 @@ def design_gibson_command(
|
|
|
189
249
|
help="Identify amino-acid substitutions from long-read data without UMIs.",
|
|
190
250
|
)
|
|
191
251
|
def mutation_caller_command(
|
|
252
|
+
ctx: typer.Context,
|
|
192
253
|
template_fasta: Path = typer.Option(
|
|
193
254
|
...,
|
|
255
|
+
"--template-fasta",
|
|
256
|
+
"-t",
|
|
194
257
|
exists=True,
|
|
195
258
|
readable=True,
|
|
196
259
|
help="FASTA file containing the mutation caller template sequence.",
|
|
197
260
|
),
|
|
198
261
|
flanks_csv: Path = typer.Option(
|
|
199
262
|
...,
|
|
263
|
+
"--flanks-csv",
|
|
264
|
+
"-f",
|
|
200
265
|
exists=True,
|
|
201
266
|
readable=True,
|
|
202
267
|
help="CSV file describing gene flanks and min/max lengths.",
|
|
203
268
|
),
|
|
204
269
|
fastq: list[str] = typer.Option(
|
|
205
270
|
...,
|
|
271
|
+
"--fastq",
|
|
272
|
+
"-q",
|
|
206
273
|
help="One or more FASTQ(.gz) paths or glob patterns (provide multiple --fastq options as needed).",
|
|
207
274
|
),
|
|
208
275
|
output_dir: Path = typer.Option(
|
|
209
276
|
...,
|
|
277
|
+
"--output-dir",
|
|
278
|
+
"-o",
|
|
210
279
|
dir_okay=True,
|
|
211
280
|
writable=True,
|
|
212
281
|
help="Directory where per-sample outputs will be written.",
|
|
213
282
|
),
|
|
214
283
|
threshold: int = typer.Option(
|
|
215
284
|
10,
|
|
285
|
+
"--threshold",
|
|
286
|
+
"-T",
|
|
216
287
|
min=1,
|
|
217
288
|
help="Minimum AA substitution count to include in the frequent-substitution report.",
|
|
218
289
|
),
|
|
219
290
|
log_path: Optional[Path] = typer.Option(
|
|
220
291
|
None,
|
|
292
|
+
"--log-path",
|
|
293
|
+
"-l",
|
|
221
294
|
dir_okay=False,
|
|
222
295
|
writable=True,
|
|
223
296
|
help="Optional path to write a dedicated log file.",
|
|
224
297
|
),
|
|
225
298
|
):
|
|
226
299
|
"""Identify and summarise amino-acid substitutions."""
|
|
300
|
+
# Validate required external tools
|
|
301
|
+
try:
|
|
302
|
+
validate_workflow_tools("mutation_caller")
|
|
303
|
+
except ToolNotFoundError as e:
|
|
304
|
+
typer.echo(f"Error: {e}", err=True)
|
|
305
|
+
raise typer.Exit(1)
|
|
306
|
+
|
|
307
|
+
# Apply config defaults
|
|
308
|
+
config = ctx.obj.get("config", {}) if ctx.obj else {}
|
|
309
|
+
threshold = get_option(config, "threshold", threshold, default=10, workflow="mutation_caller")
|
|
310
|
+
|
|
227
311
|
fastq_files = expand_fastq_inputs_mutation(fastq)
|
|
228
312
|
results = run_mutation_caller(
|
|
229
313
|
template_fasta=template_fasta,
|
|
@@ -243,53 +327,89 @@ def mutation_caller_command(
|
|
|
243
327
|
|
|
244
328
|
@app.command("umi-hunter", help="Cluster UMIs and produce consensus genes from long-read data.")
|
|
245
329
|
def umi_hunter_command(
|
|
330
|
+
ctx: typer.Context,
|
|
246
331
|
template_fasta: Path = typer.Option(
|
|
247
332
|
...,
|
|
333
|
+
"--template-fasta",
|
|
334
|
+
"-t",
|
|
248
335
|
exists=True,
|
|
249
336
|
readable=True,
|
|
250
337
|
help="Template FASTA file for consensus generation.",
|
|
251
338
|
),
|
|
252
339
|
config_csv: Path = typer.Option(
|
|
253
340
|
...,
|
|
341
|
+
"--config-csv",
|
|
342
|
+
"-C",
|
|
254
343
|
exists=True,
|
|
255
344
|
readable=True,
|
|
256
345
|
help="CSV describing UMI/gene flanks and length bounds.",
|
|
257
346
|
),
|
|
258
347
|
fastq: list[str] = typer.Option(
|
|
259
348
|
...,
|
|
349
|
+
"--fastq",
|
|
350
|
+
"-q",
|
|
260
351
|
help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
|
|
261
352
|
),
|
|
262
353
|
output_dir: Path = typer.Option(
|
|
263
354
|
...,
|
|
355
|
+
"--output-dir",
|
|
356
|
+
"-o",
|
|
264
357
|
dir_okay=True,
|
|
265
358
|
writable=True,
|
|
266
359
|
help="Directory where UMI hunter outputs will be stored.",
|
|
267
360
|
),
|
|
268
361
|
umi_identity_threshold: float = typer.Option(
|
|
269
362
|
0.9,
|
|
363
|
+
"--umi-identity-threshold",
|
|
364
|
+
"-u",
|
|
270
365
|
min=0.0,
|
|
271
366
|
max=1.0,
|
|
272
367
|
help="UMI clustering identity threshold (default: 0.9).",
|
|
273
368
|
),
|
|
274
369
|
consensus_mutation_threshold: float = typer.Option(
|
|
275
370
|
0.7,
|
|
371
|
+
"--consensus-mutation-threshold",
|
|
372
|
+
"-M",
|
|
276
373
|
min=0.0,
|
|
277
374
|
max=1.0,
|
|
278
375
|
help="Mutation threshold for consensus calling (default: 0.7).",
|
|
279
376
|
),
|
|
280
377
|
min_cluster_size: int = typer.Option(
|
|
281
378
|
1,
|
|
379
|
+
"--min-cluster-size",
|
|
380
|
+
"-s",
|
|
282
381
|
min=1,
|
|
283
382
|
help="Minimum number of reads required in a UMI cluster before a consensus is generated.",
|
|
284
383
|
),
|
|
285
384
|
log_path: Optional[Path] = typer.Option(
|
|
286
385
|
None,
|
|
386
|
+
"--log-path",
|
|
387
|
+
"-l",
|
|
287
388
|
dir_okay=False,
|
|
288
389
|
writable=True,
|
|
289
390
|
help="Optional path to write a dedicated log file.",
|
|
290
391
|
),
|
|
291
392
|
):
|
|
292
393
|
"""Cluster UMIs and generate consensus sequences from long-read FASTQ data."""
|
|
394
|
+
# Validate required external tools
|
|
395
|
+
try:
|
|
396
|
+
validate_workflow_tools("umi_hunter")
|
|
397
|
+
except ToolNotFoundError as e:
|
|
398
|
+
typer.echo(f"Error: {e}", err=True)
|
|
399
|
+
raise typer.Exit(1)
|
|
400
|
+
|
|
401
|
+
# Apply config defaults
|
|
402
|
+
config = ctx.obj.get("config", {}) if ctx.obj else {}
|
|
403
|
+
umi_identity_threshold = get_option(
|
|
404
|
+
config, "umi_identity_threshold", umi_identity_threshold, default=0.9, workflow="umi_hunter"
|
|
405
|
+
)
|
|
406
|
+
consensus_mutation_threshold = get_option(
|
|
407
|
+
config, "consensus_mutation_threshold", consensus_mutation_threshold, default=0.7, workflow="umi_hunter"
|
|
408
|
+
)
|
|
409
|
+
min_cluster_size = get_option(
|
|
410
|
+
config, "min_cluster_size", min_cluster_size, default=1, workflow="umi_hunter"
|
|
411
|
+
)
|
|
412
|
+
|
|
293
413
|
fastq_files = expand_fastq_inputs_umi(fastq)
|
|
294
414
|
results = run_umi_hunter(
|
|
295
415
|
template_fasta=template_fasta,
|
|
@@ -310,42 +430,60 @@ def umi_hunter_command(
|
|
|
310
430
|
typer.echo(
|
|
311
431
|
f" Sample {entry['sample']}: "
|
|
312
432
|
f"{entry.get('clusters', 0)} consensus clusters "
|
|
313
|
-
f"(from {total_clusters} total)
|
|
433
|
+
f"(from {total_clusters} total) -> {entry['directory']}"
|
|
314
434
|
)
|
|
315
435
|
|
|
316
436
|
|
|
317
437
|
@app.command("ep-library-profile", help="Profile mutation rates for ep-library sequencing data.")
|
|
318
438
|
def ep_library_profile_command(
|
|
439
|
+
ctx: typer.Context,
|
|
319
440
|
region_fasta: Path = typer.Option(
|
|
320
441
|
...,
|
|
442
|
+
"--region-fasta",
|
|
443
|
+
"-R",
|
|
321
444
|
exists=True,
|
|
322
445
|
readable=True,
|
|
323
446
|
help="FASTA file describing the region of interest.",
|
|
324
447
|
),
|
|
325
448
|
plasmid_fasta: Path = typer.Option(
|
|
326
449
|
...,
|
|
450
|
+
"--plasmid-fasta",
|
|
451
|
+
"-p",
|
|
327
452
|
exists=True,
|
|
328
453
|
readable=True,
|
|
329
454
|
help="FASTA file with the full plasmid sequence.",
|
|
330
455
|
),
|
|
331
456
|
fastq: list[str] = typer.Option(
|
|
332
457
|
...,
|
|
458
|
+
"--fastq",
|
|
459
|
+
"-q",
|
|
333
460
|
help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
|
|
334
461
|
),
|
|
335
462
|
output_dir: Path = typer.Option(
|
|
336
463
|
...,
|
|
464
|
+
"--output-dir",
|
|
465
|
+
"-o",
|
|
337
466
|
dir_okay=True,
|
|
338
467
|
writable=True,
|
|
339
468
|
help="Directory for per-sample outputs.",
|
|
340
469
|
),
|
|
341
470
|
work_dir: Optional[Path] = typer.Option(
|
|
342
471
|
None,
|
|
472
|
+
"--work-dir",
|
|
473
|
+
"-w",
|
|
343
474
|
dir_okay=True,
|
|
344
475
|
writable=True,
|
|
345
476
|
help="Optional scratch directory for intermediate files (defaults to output/tmp).",
|
|
346
477
|
),
|
|
347
478
|
):
|
|
348
479
|
"""Quantify mutation rates for ep-library sequencing experiments."""
|
|
480
|
+
# Validate required external tools
|
|
481
|
+
try:
|
|
482
|
+
validate_workflow_tools("ep_library_profile")
|
|
483
|
+
except ToolNotFoundError as e:
|
|
484
|
+
typer.echo(f"Error: {e}", err=True)
|
|
485
|
+
raise typer.Exit(1)
|
|
486
|
+
|
|
349
487
|
fastq_files = expand_fastq_inputs_ep(fastq)
|
|
350
488
|
results = run_ep_library_profile(
|
|
351
489
|
fastq_paths=fastq_files,
|
|
@@ -365,30 +503,41 @@ def ep_library_profile_command(
|
|
|
365
503
|
|
|
366
504
|
@app.command("profile-inserts", help="Extract and profile inserts using probe pairs.")
|
|
367
505
|
def profile_inserts_command(
|
|
506
|
+
ctx: typer.Context,
|
|
368
507
|
probes_csv: Path = typer.Option(
|
|
369
508
|
...,
|
|
509
|
+
"--probes-csv",
|
|
510
|
+
"-P",
|
|
370
511
|
exists=True,
|
|
371
512
|
readable=True,
|
|
372
513
|
help="CSV file containing upstream/downstream probes.",
|
|
373
514
|
),
|
|
374
515
|
fastq: list[str] = typer.Option(
|
|
375
516
|
...,
|
|
517
|
+
"--fastq",
|
|
518
|
+
"-q",
|
|
376
519
|
help="One or more FASTQ(.gz) paths or glob patterns (multiple --fastq options allowed).",
|
|
377
520
|
),
|
|
378
521
|
output_dir: Path = typer.Option(
|
|
379
522
|
...,
|
|
523
|
+
"--output-dir",
|
|
524
|
+
"-o",
|
|
380
525
|
dir_okay=True,
|
|
381
526
|
writable=True,
|
|
382
527
|
help="Directory for per-sample outputs.",
|
|
383
528
|
),
|
|
384
529
|
min_ratio: int = typer.Option(
|
|
385
530
|
80,
|
|
531
|
+
"--min-ratio",
|
|
532
|
+
"-r",
|
|
386
533
|
min=0,
|
|
387
534
|
max=100,
|
|
388
535
|
help="Minimum fuzzy match ratio for probe detection (default: 80).",
|
|
389
536
|
),
|
|
390
537
|
log_path: Optional[Path] = typer.Option(
|
|
391
538
|
None,
|
|
539
|
+
"--log-path",
|
|
540
|
+
"-l",
|
|
392
541
|
dir_okay=False,
|
|
393
542
|
writable=True,
|
|
394
543
|
help="Optional path to write a dedicated log file.",
|
uht_tooling/config.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Global configuration file support."""
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
HAVE_YAML = True
|
|
10
|
+
except ImportError:
|
|
11
|
+
HAVE_YAML = False
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
DEFAULT_CONFIG_PATHS = [
|
|
15
|
+
Path.home() / ".uht-tooling.yaml",
|
|
16
|
+
Path.home() / ".config" / "uht-tooling" / "config.yaml",
|
|
17
|
+
Path(".uht-tooling.yaml"),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def find_config_file() -> Optional[Path]:
|
|
22
|
+
"""
|
|
23
|
+
Find a configuration file from environment variable or default locations.
|
|
24
|
+
|
|
25
|
+
Search order:
|
|
26
|
+
1. $UHT_TOOLING_CONFIG environment variable
|
|
27
|
+
2. ~/.uht-tooling.yaml
|
|
28
|
+
3. ~/.config/uht-tooling/config.yaml
|
|
29
|
+
4. .uht-tooling.yaml (current directory)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Path to the config file if found, None otherwise.
|
|
33
|
+
"""
|
|
34
|
+
# Check environment variable first
|
|
35
|
+
env_path = os.environ.get("UHT_TOOLING_CONFIG")
|
|
36
|
+
if env_path:
|
|
37
|
+
path = Path(env_path)
|
|
38
|
+
if path.exists():
|
|
39
|
+
return path
|
|
40
|
+
|
|
41
|
+
# Check default locations
|
|
42
|
+
for path in DEFAULT_CONFIG_PATHS:
|
|
43
|
+
if path.exists():
|
|
44
|
+
return path
|
|
45
|
+
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_config(config_path: Optional[Path] = None) -> Dict[str, Any]:
|
|
50
|
+
"""
|
|
51
|
+
Load YAML configuration, auto-discovering if path not provided.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
config_path: Explicit path to config file. If None, auto-discover.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Dictionary containing configuration. Empty dict if no config found
|
|
58
|
+
or if YAML is not available.
|
|
59
|
+
"""
|
|
60
|
+
if not HAVE_YAML:
|
|
61
|
+
return {}
|
|
62
|
+
|
|
63
|
+
if config_path is None:
|
|
64
|
+
config_path = find_config_file()
|
|
65
|
+
|
|
66
|
+
if config_path is not None:
|
|
67
|
+
config_path = Path(config_path)
|
|
68
|
+
|
|
69
|
+
if config_path is None or not config_path.exists():
|
|
70
|
+
return {}
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
|
74
|
+
config = yaml.safe_load(f)
|
|
75
|
+
return config if isinstance(config, dict) else {}
|
|
76
|
+
except Exception:
|
|
77
|
+
return {}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_option(
|
|
81
|
+
config: Dict[str, Any],
|
|
82
|
+
key: str,
|
|
83
|
+
cli_value: Any,
|
|
84
|
+
default: Any = None,
|
|
85
|
+
workflow: Optional[str] = None,
|
|
86
|
+
) -> Any:
|
|
87
|
+
"""
|
|
88
|
+
Get an option with precedence: CLI > workflow-specific config > global config > default.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
config: Configuration dictionary from load_config().
|
|
92
|
+
key: The option key to look up.
|
|
93
|
+
cli_value: Value from CLI (takes precedence if not None).
|
|
94
|
+
default: Default value if not found anywhere.
|
|
95
|
+
workflow: Optional workflow name for workflow-specific defaults.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
The resolved option value.
|
|
99
|
+
"""
|
|
100
|
+
# CLI value always takes precedence if explicitly provided
|
|
101
|
+
if cli_value is not None:
|
|
102
|
+
return cli_value
|
|
103
|
+
|
|
104
|
+
# Check workflow-specific defaults
|
|
105
|
+
if workflow:
|
|
106
|
+
workflow_defaults = config.get("defaults", {}).get(workflow, {})
|
|
107
|
+
if key in workflow_defaults:
|
|
108
|
+
return workflow_defaults[key]
|
|
109
|
+
|
|
110
|
+
# Check global paths config
|
|
111
|
+
paths_config = config.get("paths", {})
|
|
112
|
+
if key in paths_config:
|
|
113
|
+
value = paths_config[key]
|
|
114
|
+
# Expand ~ in paths
|
|
115
|
+
if isinstance(value, str):
|
|
116
|
+
return os.path.expanduser(value)
|
|
117
|
+
return value
|
|
118
|
+
|
|
119
|
+
# Check top-level config
|
|
120
|
+
if key in config:
|
|
121
|
+
return config[key]
|
|
122
|
+
|
|
123
|
+
return default
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_workflow_defaults(config: Dict[str, Any], workflow: str) -> Dict[str, Any]:
|
|
127
|
+
"""
|
|
128
|
+
Get all default values for a specific workflow.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
config: Configuration dictionary from load_config().
|
|
132
|
+
workflow: Workflow name (e.g., "mutation_caller", "umi_hunter").
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Dictionary of default values for the workflow.
|
|
136
|
+
"""
|
|
137
|
+
return config.get("defaults", {}).get(workflow, {})
|