speconsense 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
speconsense/__init__.py CHANGED
@@ -5,7 +5,7 @@ A Python tool for experimental clustering and consensus generation as an alterna
5
5
  in the fungal DNA barcoding pipeline.
6
6
  """
7
7
 
8
- __version__ = "0.7.4"
8
+ __version__ = "0.7.5"
9
9
  __author__ = "Josh Walker"
10
10
  __email__ = "joshowalker@yahoo.com"
11
11
 
@@ -392,19 +392,19 @@ def process_single_specimen(file_consensuses: List[ConsensusInfo],
392
392
  final_consensus.append(renamed_variant)
393
393
  group_naming.append((variant.sample_name, new_name))
394
394
 
395
- # Generate full consensus from PRE-MERGE variants
395
+ # Generate full consensus from PRE-MERGE variants that contributed
396
+ # to surviving post-merge variants (after select-min-size-ratio)
396
397
  if getattr(args, 'enable_full_consensus', False):
397
- pre_merge_variants = variant_groups[group_id]
398
-
399
- # Apply size-ratio filter (same as merge pipeline)
400
- if args.merge_min_size_ratio > 0 and len(pre_merge_variants) > 1:
401
- largest_size = max(v.size for v in pre_merge_variants)
402
- filtered = [v for v in pre_merge_variants
403
- if (v.size / largest_size) >= args.merge_min_size_ratio]
404
- if len(filtered) < len(pre_merge_variants):
405
- filtered_count = len(pre_merge_variants) - len(filtered)
406
- logging.debug(f"Full consensus: filtered out {filtered_count} variants with size ratio < {args.merge_min_size_ratio} relative to largest (size={largest_size})")
407
- pre_merge_variants = filtered
398
+ # Collect original cluster names from surviving post-merge variants
399
+ surviving_originals = set()
400
+ for v in group_members:
401
+ if v.sample_name in all_merge_traceability:
402
+ surviving_originals.update(all_merge_traceability[v.sample_name])
403
+ else:
404
+ surviving_originals.add(v.sample_name)
405
+
406
+ pre_merge_variants = [v for v in variant_groups[group_id]
407
+ if v.sample_name in surviving_originals]
408
408
 
409
409
  specimen_base = selected_variants[0].sample_name.rsplit('-c', 1)[0]
410
410
  full_name = f"{specimen_base}-{group_idx + 1}.full"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speconsense
3
- Version: 0.7.4
3
+ Version: 0.7.5
4
4
  Summary: High-quality clustering and consensus generation for Oxford Nanopore amplicon reads
5
5
  Author-email: Josh Walker <joshowalker@yahoo.com>
6
6
  License: BSD-3-Clause
@@ -295,14 +295,14 @@ When using `speconsense-summarize` for post-processing, creates `__Summary__/` d
295
295
  |---------------|-------------|------------|-------------|
296
296
  | **Original** | Source `cluster_debug/` | `-c1`, `-c2`, `-c3` | Preserves speconsense clustering results |
297
297
  | **Summarization** | `__Summary__/`, `FASTQ Files/`, `variants/` | `-1.v1`, `-1.v2`, `-2.v1`, `.raw1` | Post-processing groups and variants |
298
- | **Full consensus** | `__Summary__/` | `-1.full` | IUPAC consensus from all pre-merge variants in a group |
298
+ | **Full consensus** | `__Summary__/` | `-1.full` | IUPAC consensus from pre-merge components of surviving variants |
299
299
 
300
300
  ### Example Directory Structure
301
301
  ```
302
302
  __Summary__/
303
303
  ├── sample-1.v1-RiC45.fasta # Primary variant (group 1, merged)
304
304
  ├── sample-1.v2-RiC23.fasta # Additional variant (not merged)
305
- ├── sample-1.full-RiC68.fasta # Full IUPAC consensus for group 1 (all pre-merge variants)
305
+ ├── sample-1.full-RiC68.fasta # Full IUPAC consensus for group 1 (surviving variants' components)
306
306
  ├── sample-2.v1-RiC30.fasta # Second organism group, primary variant
307
307
  ├── summary.fasta # All final consensus sequences (excludes .raw)
308
308
  ├── summary.txt # Statistics
@@ -829,7 +829,7 @@ For high-throughput workflows (e.g., 100K sequences/year), this prioritization e
829
829
  ```bash
830
830
  speconsense-summarize --enable-full-consensus
831
831
  ```
832
- - Generates a full IUPAC consensus sequence per variant group from all pre-merge variants
832
+ - Generates a full IUPAC consensus sequence per variant group from pre-merge variants that contributed to surviving post-merge variants
833
833
  - Output named `{specimen}-{group}.full-RiC{reads}.fasta` in the `__Summary__/` directory
834
834
  - Uses majority voting across all variants in the group; **gaps never win** — at each alignment column, the most common non-gap base is chosen, with IUPAC codes for ties among bases
835
835
  - Useful when you want a single representative sequence that captures all variation within a group as IUPAC ambiguity codes
@@ -1073,7 +1073,7 @@ The complete speconsense-summarize workflow operates in this order:
1073
1073
  4. **Homopolymer-aware MSA-based variant merging** within each group, including **overlap merging** for different-length sequences (`--disable-merging`, `--merge-effort`, `--merge-position-count`, `--merge-indel-length`, `--min-merge-overlap`, `--merge-snp`, `--merge-min-size-ratio`, `--disable-homopolymer-equivalence`)
1074
1074
  5. **Selection size ratio filtering** to remove tiny post-merge variants (`--select-min-size-ratio`)
1075
1075
  6. **Variant selection** within each group (`--select-max-variants`, `--select-strategy`)
1076
- 7. **Full consensus generation** (optional) — IUPAC consensus from all pre-merge variants per group (`--enable-full-consensus`)
1076
+ 7. **Full consensus generation** (optional) — IUPAC consensus from pre-merge components of surviving post-merge variants (`--enable-full-consensus`)
1077
1077
  8. **Output generation** with customizable header fields (`--fasta-fields`) and full traceability
1078
1078
 
1079
1079
  **Key architectural features**:
@@ -1,4 +1,4 @@
1
- speconsense/__init__.py,sha256=uLSZG2n0xobwuNT2PwZbytUg1DcyOr2aJlsbc52iKs0,537
1
+ speconsense/__init__.py,sha256=qxithGom21C3MjbApvgOzVSIRFqw4jReMKZqipfY-Kk,537
2
2
  speconsense/cli.py,sha256=Kqb2da0IuazocAz72iqTnw71jI7UaQgxsHfb9CwiolU,85
3
3
  speconsense/msa.py,sha256=t1uDb-Tj5tDnB17QnNZPslpAiLXgAMIlnmMKBbwBKzs,31661
4
4
  speconsense/quality_report.py,sha256=Byrc115T03ybi7mpA0Bw8-gc83nhKPzDY0tyH1IIAMQ,19803
@@ -23,15 +23,15 @@ speconsense/scalability/vsearch.py,sha256=I1IzTeRzEFn9bi8mNbBRvtcHvUBzBFdE7D5yf-
23
23
  speconsense/summarize/__init__.py,sha256=PE6W9hytDxhkw7W6Fz8X3jd92N2VdhuxiQ72Nqm1xC0,3181
24
24
  speconsense/summarize/__main__.py,sha256=_hzLNqNtv4PirL1oMic37GW2QmjWquoznzNtld_3FiQ,117
25
25
  speconsense/summarize/analysis.py,sha256=1MXtKMpX1bgKEtI-JN6BwTQj99qyt1eQLqNg51EgPiE,31560
26
- speconsense/summarize/cli.py,sha256=uSeY7__KpdQVXqJcQ0Zpn6ePeyJDVGdml7rZgHFr3W8,27124
26
+ speconsense/summarize/cli.py,sha256=JDgLQl8zFlbkH-Oa_n68lbUTB-cLBWzl3dphn041ph0,26961
27
27
  speconsense/summarize/clustering.py,sha256=kk-FdFCea8KRocowN_4dt_aoqZNVJMmEu7CVKPfYgK8,28346
28
28
  speconsense/summarize/fields.py,sha256=a6aK9hkPJ-sDRRSqM_7IkyqCki99KSMnsQMV-U7r2zY,8687
29
29
  speconsense/summarize/io.py,sha256=FdHLbcj0NOL3WE1e5OL85DRdJaHpyXPMcmlNg9mG3tM,32732
30
30
  speconsense/summarize/iupac.py,sha256=Y6KqELmnGy4Eya4C_4ldXY8uek0ReuSUgITLI3NW0-A,11042
31
31
  speconsense/summarize/merging.py,sha256=FakBey3qpu7ULPIsc2GDo9WG8jNU1L6q2pgQ2HrOKXk,28454
32
- speconsense-0.7.4.dist-info/licenses/LICENSE,sha256=T_VYPNbu9NSWjdQunfk4jqUGND_kYWe_An18s6N492o,1498
33
- speconsense-0.7.4.dist-info/METADATA,sha256=2vFyM5rqFEwMPIcsSAH32Dwh_bDA-bOk15D7El6MO7Y,79957
34
- speconsense-0.7.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
35
- speconsense-0.7.4.dist-info/entry_points.txt,sha256=C0zFp5EYA8_KCb04uOyb4JNkxNH7bli1eU-XYrSX3BU,147
36
- speconsense-0.7.4.dist-info/top_level.txt,sha256=nYUJOHrqeX-OOxOYQKvpp7Iv8-Bed18wN1DBwWfJKnQ,12
37
- speconsense-0.7.4.dist-info/RECORD,,
32
+ speconsense-0.7.5.dist-info/licenses/LICENSE,sha256=T_VYPNbu9NSWjdQunfk4jqUGND_kYWe_An18s6N492o,1498
33
+ speconsense-0.7.5.dist-info/METADATA,sha256=JiEg26k5JEJUFSR5hw4FHHysL4ei9PPyvVXsGBRBZVc,80041
34
+ speconsense-0.7.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
35
+ speconsense-0.7.5.dist-info/entry_points.txt,sha256=C0zFp5EYA8_KCb04uOyb4JNkxNH7bli1eU-XYrSX3BU,147
36
+ speconsense-0.7.5.dist-info/top_level.txt,sha256=nYUJOHrqeX-OOxOYQKvpp7Iv8-Bed18wN1DBwWfJKnQ,12
37
+ speconsense-0.7.5.dist-info/RECORD,,