uht-tooling 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/PKG-INFO +1 -1
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/pyproject.toml +1 -1
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/mut_rate.py +88 -27
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/PKG-INFO +1 -1
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/README.md +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/setup.cfg +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/__init__.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/cli.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/config.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/models/__init__.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/tools.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/__init__.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/design_gibson.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/design_kld.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/design_slim.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/gui.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/mutation_caller.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/nextera_designer.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/profile_inserts.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/umi_hunter.py +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/SOURCES.txt +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/dependency_links.txt +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/entry_points.txt +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/requires.txt +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/top_level.txt +0 -0
- {uht_tooling-0.3.2 → uht_tooling-0.3.4}/tests/test_design_kld.py +0 -0
|
@@ -62,6 +62,8 @@ def _ensure_workspace(path: Path, purpose: str) -> Path:
|
|
|
62
62
|
root = _workspace_root(path)
|
|
63
63
|
if root is None:
|
|
64
64
|
root = _maybe_init_temp_workspace(path)
|
|
65
|
+
if root is None and _is_temp_path(path):
|
|
66
|
+
return Path(path).resolve()
|
|
65
67
|
if root is None:
|
|
66
68
|
raise ValueError(_workspace_error(path, purpose))
|
|
67
69
|
return root
|
|
@@ -72,8 +74,8 @@ def _maybe_init_temp_workspace(path: Path) -> Optional[Path]:
|
|
|
72
74
|
resolved = Path(path).resolve()
|
|
73
75
|
except FileNotFoundError:
|
|
74
76
|
return None
|
|
75
|
-
|
|
76
|
-
if resolved ==
|
|
77
|
+
tmp_roots = _temp_roots()
|
|
78
|
+
if any(resolved == root or root in resolved.parents for root in tmp_roots):
|
|
77
79
|
sentinel_path = resolved / WORKSPACE_SENTINEL
|
|
78
80
|
if not sentinel_path.exists():
|
|
79
81
|
try:
|
|
@@ -84,6 +86,21 @@ def _maybe_init_temp_workspace(path: Path) -> Optional[Path]:
|
|
|
84
86
|
return None
|
|
85
87
|
|
|
86
88
|
|
|
89
|
+
def _is_temp_path(path: Path) -> bool:
|
|
90
|
+
try:
|
|
91
|
+
resolved = Path(path).resolve()
|
|
92
|
+
except FileNotFoundError:
|
|
93
|
+
return False
|
|
94
|
+
tmp_roots = _temp_roots()
|
|
95
|
+
return any(resolved == root or root in resolved.parents for root in tmp_roots)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _temp_roots() -> List[Path]:
|
|
99
|
+
roots = {Path(tempfile.gettempdir()).resolve(), Path("/tmp").resolve()}
|
|
100
|
+
roots.add(Path("/private/tmp").resolve())
|
|
101
|
+
return [root for root in roots if root.exists()]
|
|
102
|
+
|
|
103
|
+
|
|
87
104
|
def _safe_rmtree(path: Optional[Path], *, allowed_base: Optional[Path] = None, label: str = "") -> bool:
|
|
88
105
|
if not path:
|
|
89
106
|
return False
|
|
@@ -393,25 +410,56 @@ def run_nanofilt_filtering(input_fastq, quality_threshold, output_fastq):
|
|
|
393
410
|
bool: True if successful, False otherwise
|
|
394
411
|
"""
|
|
395
412
|
try:
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
413
|
+
logging.info(
|
|
414
|
+
"Running NanoFilt with quality threshold %s and min length 30bp: %s -> %s",
|
|
415
|
+
quality_threshold,
|
|
416
|
+
input_fastq,
|
|
417
|
+
output_fastq,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
with open(output_fastq, "wb") as out_fh:
|
|
421
|
+
gunzip_proc = subprocess.Popen(
|
|
422
|
+
["gunzip", "-c", input_fastq],
|
|
423
|
+
stdout=subprocess.PIPE,
|
|
424
|
+
stderr=subprocess.PIPE,
|
|
425
|
+
)
|
|
426
|
+
nanofilt_proc = subprocess.Popen(
|
|
427
|
+
["NanoFilt", "-q", str(quality_threshold), "-l", "30"],
|
|
428
|
+
stdin=gunzip_proc.stdout,
|
|
429
|
+
stdout=subprocess.PIPE,
|
|
430
|
+
stderr=subprocess.PIPE,
|
|
431
|
+
)
|
|
432
|
+
gunzip_proc.stdout.close()
|
|
433
|
+
gzip_proc = subprocess.Popen(
|
|
434
|
+
["gzip"],
|
|
435
|
+
stdin=nanofilt_proc.stdout,
|
|
436
|
+
stdout=out_fh,
|
|
437
|
+
stderr=subprocess.PIPE,
|
|
438
|
+
)
|
|
439
|
+
nanofilt_proc.stdout.close()
|
|
440
|
+
|
|
441
|
+
_, gunzip_err = gunzip_proc.communicate()
|
|
442
|
+
_, nanofilt_err = nanofilt_proc.communicate()
|
|
443
|
+
_, gzip_err = gzip_proc.communicate()
|
|
444
|
+
|
|
445
|
+
if gunzip_proc.returncode != 0:
|
|
446
|
+
logging.error("gunzip failed (%s): %s", gunzip_proc.returncode, gunzip_err.decode())
|
|
403
447
|
return False
|
|
404
|
-
|
|
405
|
-
|
|
448
|
+
if nanofilt_proc.returncode != 0:
|
|
449
|
+
logging.error("NanoFilt failed (%s): %s", nanofilt_proc.returncode, nanofilt_err.decode())
|
|
450
|
+
return False
|
|
451
|
+
if gzip_proc.returncode != 0:
|
|
452
|
+
logging.error("gzip failed (%s): %s", gzip_proc.returncode, gzip_err.decode())
|
|
453
|
+
return False
|
|
454
|
+
|
|
406
455
|
if os.path.exists(output_fastq) and os.path.getsize(output_fastq) > 0:
|
|
407
|
-
logging.info(
|
|
456
|
+
logging.info("Successfully created filtered FASTQ: %s", output_fastq)
|
|
408
457
|
return True
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
458
|
+
logging.error("Output file %s was not created or is empty", output_fastq)
|
|
459
|
+
return False
|
|
460
|
+
|
|
413
461
|
except Exception as e:
|
|
414
|
-
logging.error(
|
|
462
|
+
logging.error("Error running NanoFilt: %s", e)
|
|
415
463
|
return False
|
|
416
464
|
|
|
417
465
|
def calculate_mutation_rate_for_quality(fastq_path, quality_threshold, work_dir, ref_hit_fasta, plasmid_fasta):
|
|
@@ -563,17 +611,21 @@ def run_qc_analysis(fastq_path, results_dir, ref_hit_fasta, plasmid_fasta):
|
|
|
563
611
|
quality_thresholds = [10, 12, 14, 16, 18, 20, 22, 24, 26]
|
|
564
612
|
|
|
565
613
|
|
|
566
|
-
# Segment the input FASTQ file
|
|
567
|
-
logging.info("Segmenting FASTQ file into 10 parts for error estimation...")
|
|
568
|
-
segment_files = segment_fastq_file(fastq_path, n_segments=10)
|
|
569
|
-
|
|
570
|
-
if not segment_files:
|
|
571
|
-
logging.error("Failed to segment FASTQ file")
|
|
572
|
-
return
|
|
573
|
-
|
|
574
614
|
# Create temporary work directory for QC analysis
|
|
575
615
|
with tempfile.TemporaryDirectory() as qc_work_dir:
|
|
576
616
|
logging.info(f"Using temporary work directory: {qc_work_dir}")
|
|
617
|
+
|
|
618
|
+
# Segment the input FASTQ file into the temp workspace
|
|
619
|
+
logging.info("Segmenting FASTQ file into 10 parts for error estimation...")
|
|
620
|
+
segment_files = segment_fastq_file(
|
|
621
|
+
fastq_path,
|
|
622
|
+
n_segments=10,
|
|
623
|
+
output_dir=qc_work_dir,
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
if not segment_files:
|
|
627
|
+
logging.error("Failed to segment FASTQ file")
|
|
628
|
+
return
|
|
577
629
|
|
|
578
630
|
# Calculate results for each quality threshold
|
|
579
631
|
qc_results = []
|
|
@@ -1253,7 +1305,7 @@ def qscore_uncertainty_factor(qscore):
|
|
|
1253
1305
|
|
|
1254
1306
|
return uncertainty_factor
|
|
1255
1307
|
|
|
1256
|
-
def segment_fastq_file(input_fastq, n_segments=10):
|
|
1308
|
+
def segment_fastq_file(input_fastq, n_segments=10, output_dir: Optional[str] = None):
|
|
1257
1309
|
"""
|
|
1258
1310
|
Segment a FASTQ file into N parts for error estimation.
|
|
1259
1311
|
|
|
@@ -1270,7 +1322,10 @@ def segment_fastq_file(input_fastq, n_segments=10):
|
|
|
1270
1322
|
|
|
1271
1323
|
# Create output directory
|
|
1272
1324
|
base_name = os.path.splitext(os.path.basename(input_fastq))[0].replace('.fastq', '')
|
|
1273
|
-
|
|
1325
|
+
if output_dir:
|
|
1326
|
+
segment_dir = os.path.join(output_dir, f"{base_name}_segments")
|
|
1327
|
+
else:
|
|
1328
|
+
segment_dir = os.path.join(os.path.dirname(input_fastq), f"{base_name}_segments")
|
|
1274
1329
|
os.makedirs(segment_dir, exist_ok=True)
|
|
1275
1330
|
|
|
1276
1331
|
# Open output files
|
|
@@ -2007,6 +2062,9 @@ def write_key_findings(results_dir, consensus_info, simple_lambda, simple_aa_mea
|
|
|
2007
2062
|
if is_protein and headline_aa is not None:
|
|
2008
2063
|
f.write(f" {headline_aa:.2f} +/- {headline_std:.2f} AA mutations per gene copy\n")
|
|
2009
2064
|
f.write(f" (Method: {method_note})\n\n")
|
|
2065
|
+
f.write(
|
|
2066
|
+
f" Poisson lambda used in summary plots (bp mutations per copy): {simple_lambda:.6f}\n\n"
|
|
2067
|
+
)
|
|
2010
2068
|
|
|
2011
2069
|
# Plain-language interpretation using Poisson distribution
|
|
2012
2070
|
f.write("WHAT THIS MEANS (Poisson distribution):\n")
|
|
@@ -2702,6 +2760,9 @@ def run_main_analysis_for_qscore(fastq_path, qscore, qscore_desc, sample_name, w
|
|
|
2702
2760
|
txtf.write(f" • Z‐statistic: {z_stat:.4f}\n")
|
|
2703
2761
|
txtf.write(f" • p‐value: {p_val if p_val is not None else 'N/A'}\n")
|
|
2704
2762
|
txtf.write(f" • Estimated mutations per copy: {est_mut_per_copy:.6e}\n\n")
|
|
2763
|
+
txtf.write(
|
|
2764
|
+
f" • Poisson lambda used in summary plots (bp mutations per copy): {est_mut_per_copy:.6e}\n\n"
|
|
2765
|
+
)
|
|
2705
2766
|
|
|
2706
2767
|
txtf.write("3) Protein‐coding evaluation:\n")
|
|
2707
2768
|
txtf.write(f" • Is protein: {is_protein}\n")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|