uht-tooling 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/PKG-INFO +1 -1
  2. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/pyproject.toml +1 -1
  3. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/mut_rate.py +88 -27
  4. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/PKG-INFO +1 -1
  5. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/README.md +0 -0
  6. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/setup.cfg +0 -0
  7. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/__init__.py +0 -0
  8. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/cli.py +0 -0
  9. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/config.py +0 -0
  10. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/models/__init__.py +0 -0
  11. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/tools.py +0 -0
  12. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/__init__.py +0 -0
  13. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/design_gibson.py +0 -0
  14. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/design_kld.py +0 -0
  15. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/design_slim.py +0 -0
  16. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/gui.py +0 -0
  17. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/mutation_caller.py +0 -0
  18. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/nextera_designer.py +0 -0
  19. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/profile_inserts.py +0 -0
  20. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling/workflows/umi_hunter.py +0 -0
  21. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/SOURCES.txt +0 -0
  22. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/dependency_links.txt +0 -0
  23. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/entry_points.txt +0 -0
  24. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/requires.txt +0 -0
  25. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/src/uht_tooling.egg-info/top_level.txt +0 -0
  26. {uht_tooling-0.3.2 → uht_tooling-0.3.4}/tests/test_design_kld.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uht-tooling
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Tooling for ultra-high throughput screening workflows.
5
5
  Author: Matt115A
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "uht-tooling"
7
- version = "0.3.2"
7
+ version = "0.3.4"
8
8
  description = "Tooling for ultra-high throughput screening workflows."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -62,6 +62,8 @@ def _ensure_workspace(path: Path, purpose: str) -> Path:
62
62
  root = _workspace_root(path)
63
63
  if root is None:
64
64
  root = _maybe_init_temp_workspace(path)
65
+ if root is None and _is_temp_path(path):
66
+ return Path(path).resolve()
65
67
  if root is None:
66
68
  raise ValueError(_workspace_error(path, purpose))
67
69
  return root
@@ -72,8 +74,8 @@ def _maybe_init_temp_workspace(path: Path) -> Optional[Path]:
72
74
  resolved = Path(path).resolve()
73
75
  except FileNotFoundError:
74
76
  return None
75
- tmp_root = Path(tempfile.gettempdir()).resolve()
76
- if resolved == tmp_root or tmp_root in resolved.parents:
77
+ tmp_roots = _temp_roots()
78
+ if any(resolved == root or root in resolved.parents for root in tmp_roots):
77
79
  sentinel_path = resolved / WORKSPACE_SENTINEL
78
80
  if not sentinel_path.exists():
79
81
  try:
@@ -84,6 +86,21 @@ def _maybe_init_temp_workspace(path: Path) -> Optional[Path]:
84
86
  return None
85
87
 
86
88
 
89
+ def _is_temp_path(path: Path) -> bool:
90
+ try:
91
+ resolved = Path(path).resolve()
92
+ except FileNotFoundError:
93
+ return False
94
+ tmp_roots = _temp_roots()
95
+ return any(resolved == root or root in resolved.parents for root in tmp_roots)
96
+
97
+
98
+ def _temp_roots() -> List[Path]:
99
+ roots = {Path(tempfile.gettempdir()).resolve(), Path("/tmp").resolve()}
100
+ roots.add(Path("/private/tmp").resolve())
101
+ return [root for root in roots if root.exists()]
102
+
103
+
87
104
  def _safe_rmtree(path: Optional[Path], *, allowed_base: Optional[Path] = None, label: str = "") -> bool:
88
105
  if not path:
89
106
  return False
@@ -393,25 +410,56 @@ def run_nanofilt_filtering(input_fastq, quality_threshold, output_fastq):
393
410
  bool: True if successful, False otherwise
394
411
  """
395
412
  try:
396
- # Use gunzip to decompress, pipe to NanoFilt with length filter, then compress output
397
- cmd = f"gunzip -c {input_fastq} | NanoFilt -q {quality_threshold} -l 30 | gzip > {output_fastq}"
398
- logging.info(f"Running NanoFilt with quality threshold {quality_threshold} and min length 30bp: {cmd}")
399
-
400
- result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
401
- if result.returncode != 0:
402
- logging.error(f"NanoFilt failed with return code {result.returncode}: {result.stderr}")
413
+ logging.info(
414
+ "Running NanoFilt with quality threshold %s and min length 30bp: %s -> %s",
415
+ quality_threshold,
416
+ input_fastq,
417
+ output_fastq,
418
+ )
419
+
420
+ with open(output_fastq, "wb") as out_fh:
421
+ gunzip_proc = subprocess.Popen(
422
+ ["gunzip", "-c", input_fastq],
423
+ stdout=subprocess.PIPE,
424
+ stderr=subprocess.PIPE,
425
+ )
426
+ nanofilt_proc = subprocess.Popen(
427
+ ["NanoFilt", "-q", str(quality_threshold), "-l", "30"],
428
+ stdin=gunzip_proc.stdout,
429
+ stdout=subprocess.PIPE,
430
+ stderr=subprocess.PIPE,
431
+ )
432
+ gunzip_proc.stdout.close()
433
+ gzip_proc = subprocess.Popen(
434
+ ["gzip"],
435
+ stdin=nanofilt_proc.stdout,
436
+ stdout=out_fh,
437
+ stderr=subprocess.PIPE,
438
+ )
439
+ nanofilt_proc.stdout.close()
440
+
441
+ _, gunzip_err = gunzip_proc.communicate()
442
+ _, nanofilt_err = nanofilt_proc.communicate()
443
+ _, gzip_err = gzip_proc.communicate()
444
+
445
+ if gunzip_proc.returncode != 0:
446
+ logging.error("gunzip failed (%s): %s", gunzip_proc.returncode, gunzip_err.decode())
403
447
  return False
404
-
405
- # Check if output file was created and has content
448
+ if nanofilt_proc.returncode != 0:
449
+ logging.error("NanoFilt failed (%s): %s", nanofilt_proc.returncode, nanofilt_err.decode())
450
+ return False
451
+ if gzip_proc.returncode != 0:
452
+ logging.error("gzip failed (%s): %s", gzip_proc.returncode, gzip_err.decode())
453
+ return False
454
+
406
455
  if os.path.exists(output_fastq) and os.path.getsize(output_fastq) > 0:
407
- logging.info(f"Successfully created filtered FASTQ: {output_fastq}")
456
+ logging.info("Successfully created filtered FASTQ: %s", output_fastq)
408
457
  return True
409
- else:
410
- logging.error(f"Output file {output_fastq} was not created or is empty")
411
- return False
412
-
458
+ logging.error("Output file %s was not created or is empty", output_fastq)
459
+ return False
460
+
413
461
  except Exception as e:
414
- logging.error(f"Error running NanoFilt: {e}")
462
+ logging.error("Error running NanoFilt: %s", e)
415
463
  return False
416
464
 
417
465
  def calculate_mutation_rate_for_quality(fastq_path, quality_threshold, work_dir, ref_hit_fasta, plasmid_fasta):
@@ -563,17 +611,21 @@ def run_qc_analysis(fastq_path, results_dir, ref_hit_fasta, plasmid_fasta):
563
611
  quality_thresholds = [10, 12, 14, 16, 18, 20, 22, 24, 26]
564
612
 
565
613
 
566
- # Segment the input FASTQ file
567
- logging.info("Segmenting FASTQ file into 10 parts for error estimation...")
568
- segment_files = segment_fastq_file(fastq_path, n_segments=10)
569
-
570
- if not segment_files:
571
- logging.error("Failed to segment FASTQ file")
572
- return
573
-
574
614
  # Create temporary work directory for QC analysis
575
615
  with tempfile.TemporaryDirectory() as qc_work_dir:
576
616
  logging.info(f"Using temporary work directory: {qc_work_dir}")
617
+
618
+ # Segment the input FASTQ file into the temp workspace
619
+ logging.info("Segmenting FASTQ file into 10 parts for error estimation...")
620
+ segment_files = segment_fastq_file(
621
+ fastq_path,
622
+ n_segments=10,
623
+ output_dir=qc_work_dir,
624
+ )
625
+
626
+ if not segment_files:
627
+ logging.error("Failed to segment FASTQ file")
628
+ return
577
629
 
578
630
  # Calculate results for each quality threshold
579
631
  qc_results = []
@@ -1253,7 +1305,7 @@ def qscore_uncertainty_factor(qscore):
1253
1305
 
1254
1306
  return uncertainty_factor
1255
1307
 
1256
- def segment_fastq_file(input_fastq, n_segments=10):
1308
+ def segment_fastq_file(input_fastq, n_segments=10, output_dir: Optional[str] = None):
1257
1309
  """
1258
1310
  Segment a FASTQ file into N parts for error estimation.
1259
1311
 
@@ -1270,7 +1322,10 @@ def segment_fastq_file(input_fastq, n_segments=10):
1270
1322
 
1271
1323
  # Create output directory
1272
1324
  base_name = os.path.splitext(os.path.basename(input_fastq))[0].replace('.fastq', '')
1273
- segment_dir = os.path.join(os.path.dirname(input_fastq), f"{base_name}_segments")
1325
+ if output_dir:
1326
+ segment_dir = os.path.join(output_dir, f"{base_name}_segments")
1327
+ else:
1328
+ segment_dir = os.path.join(os.path.dirname(input_fastq), f"{base_name}_segments")
1274
1329
  os.makedirs(segment_dir, exist_ok=True)
1275
1330
 
1276
1331
  # Open output files
@@ -2007,6 +2062,9 @@ def write_key_findings(results_dir, consensus_info, simple_lambda, simple_aa_mea
2007
2062
  if is_protein and headline_aa is not None:
2008
2063
  f.write(f" {headline_aa:.2f} +/- {headline_std:.2f} AA mutations per gene copy\n")
2009
2064
  f.write(f" (Method: {method_note})\n\n")
2065
+ f.write(
2066
+ f" Poisson lambda used in summary plots (bp mutations per copy): {simple_lambda:.6f}\n\n"
2067
+ )
2010
2068
 
2011
2069
  # Plain-language interpretation using Poisson distribution
2012
2070
  f.write("WHAT THIS MEANS (Poisson distribution):\n")
@@ -2702,6 +2760,9 @@ def run_main_analysis_for_qscore(fastq_path, qscore, qscore_desc, sample_name, w
2702
2760
  txtf.write(f" • Z‐statistic: {z_stat:.4f}\n")
2703
2761
  txtf.write(f" • p‐value: {p_val if p_val is not None else 'N/A'}\n")
2704
2762
  txtf.write(f" • Estimated mutations per copy: {est_mut_per_copy:.6e}\n\n")
2763
+ txtf.write(
2764
+ f" • Poisson lambda used in summary plots (bp mutations per copy): {est_mut_per_copy:.6e}\n\n"
2765
+ )
2705
2766
 
2706
2767
  txtf.write("3) Protein‐coding evaluation:\n")
2707
2768
  txtf.write(f" • Is protein: {is_protein}\n")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uht-tooling
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Tooling for ultra-high throughput screening workflows.
5
5
  Author: Matt115A
6
6
  License-Expression: MIT
File without changes
File without changes