speconsense 0.7.2__py3-none-any.whl → 0.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speconsense/__init__.py +1 -1
- speconsense/core/cli.py +18 -0
- speconsense/profiles/__init__.py +2 -0
- speconsense/profiles/compressed.yaml +28 -0
- speconsense/profiles/example.yaml +1 -0
- speconsense/summarize/cli.py +60 -3
- speconsense/summarize/fields.py +5 -3
- speconsense/summarize/io.py +10 -1
- speconsense/summarize/merging.py +97 -76
- {speconsense-0.7.2.dist-info → speconsense-0.7.4.dist-info}/METADATA +79 -12
- {speconsense-0.7.2.dist-info → speconsense-0.7.4.dist-info}/RECORD +15 -14
- {speconsense-0.7.2.dist-info → speconsense-0.7.4.dist-info}/WHEEL +1 -1
- {speconsense-0.7.2.dist-info → speconsense-0.7.4.dist-info}/entry_points.txt +0 -0
- {speconsense-0.7.2.dist-info → speconsense-0.7.4.dist-info}/licenses/LICENSE +0 -0
- {speconsense-0.7.2.dist-info → speconsense-0.7.4.dist-info}/top_level.txt +0 -0
speconsense/__init__.py
CHANGED
speconsense/core/cli.py
CHANGED
|
@@ -66,6 +66,9 @@ def main():
|
|
|
66
66
|
help="Disable position-based variant phasing (enabled by default). "
|
|
67
67
|
"MCL graph clustering already separates most variants; this "
|
|
68
68
|
"second pass analyzes MSA positions to phase remaining variants.")
|
|
69
|
+
phasing_group.add_argument("--enable-position-phasing", action="store_false",
|
|
70
|
+
dest="disable_position_phasing",
|
|
71
|
+
help="Override --disable-position-phasing or profile setting")
|
|
69
72
|
phasing_group.add_argument("--min-variant-frequency", type=float, default=0.10,
|
|
70
73
|
help="Minimum alternative allele frequency to call variant (default: 0.10 for 10%%)")
|
|
71
74
|
phasing_group.add_argument("--min-variant-count", type=int, default=5,
|
|
@@ -75,6 +78,9 @@ def main():
|
|
|
75
78
|
ambiguity_group = parser.add_argument_group("Ambiguity Calling")
|
|
76
79
|
ambiguity_group.add_argument("--disable-ambiguity-calling", action="store_true",
|
|
77
80
|
help="Disable IUPAC ambiguity code calling for unphased variant positions")
|
|
81
|
+
ambiguity_group.add_argument("--enable-ambiguity-calling", action="store_false",
|
|
82
|
+
dest="disable_ambiguity_calling",
|
|
83
|
+
help="Override --disable-ambiguity-calling or profile setting")
|
|
78
84
|
ambiguity_group.add_argument("--min-ambiguity-frequency", type=float, default=0.10,
|
|
79
85
|
help="Minimum alternative allele frequency for IUPAC ambiguity calling (default: 0.10 for 10%%)")
|
|
80
86
|
ambiguity_group.add_argument("--min-ambiguity-count", type=int, default=3,
|
|
@@ -84,8 +90,14 @@ def main():
|
|
|
84
90
|
merging_group = parser.add_argument_group("Cluster Merging")
|
|
85
91
|
merging_group.add_argument("--disable-cluster-merging", action="store_true",
|
|
86
92
|
help="Disable merging of clusters with identical consensus sequences")
|
|
93
|
+
merging_group.add_argument("--enable-cluster-merging", action="store_false",
|
|
94
|
+
dest="disable_cluster_merging",
|
|
95
|
+
help="Override --disable-cluster-merging or profile setting")
|
|
87
96
|
merging_group.add_argument("--disable-homopolymer-equivalence", action="store_true",
|
|
88
97
|
help="Disable homopolymer equivalence in cluster merging (only merge identical sequences)")
|
|
98
|
+
merging_group.add_argument("--enable-homopolymer-equivalence", action="store_false",
|
|
99
|
+
dest="disable_homopolymer_equivalence",
|
|
100
|
+
help="Override --disable-homopolymer-equivalence or profile setting")
|
|
89
101
|
|
|
90
102
|
# Orientation group
|
|
91
103
|
orient_group = parser.add_argument_group("Orientation")
|
|
@@ -104,11 +116,17 @@ def main():
|
|
|
104
116
|
"0=auto-detect, default=1 (safe for parallel workflows).")
|
|
105
117
|
perf_group.add_argument("--enable-early-filter", action="store_true",
|
|
106
118
|
help="Enable early filtering to skip small clusters before variant phasing (improves performance for large datasets)")
|
|
119
|
+
perf_group.add_argument("--disable-early-filter", action="store_false",
|
|
120
|
+
dest="enable_early_filter",
|
|
121
|
+
help="Override --enable-early-filter or profile setting")
|
|
107
122
|
|
|
108
123
|
# Debugging group
|
|
109
124
|
debug_group = parser.add_argument_group("Debugging")
|
|
110
125
|
debug_group.add_argument("--collect-discards", action="store_true",
|
|
111
126
|
help="Write discarded reads (outliers and filtered clusters) to cluster_debug/{sample}-discards.fastq")
|
|
127
|
+
debug_group.add_argument("--no-collect-discards", action="store_false",
|
|
128
|
+
dest="collect_discards",
|
|
129
|
+
help="Override --collect-discards or profile setting")
|
|
112
130
|
debug_group.add_argument("--log-level", default="INFO",
|
|
113
131
|
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
|
|
114
132
|
|
speconsense/profiles/__init__.py
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Compress variants into minimal IUPAC consensus sequences
|
|
2
|
+
#
|
|
3
|
+
# Aggressively merges similar variants (including indels) into single
|
|
4
|
+
# IUPAC consensus sequences. Only truly dissimilar sequences remain
|
|
5
|
+
# separate. Uses 20% frequency thresholds throughout.
|
|
6
|
+
#
|
|
7
|
+
# Designed for workflows where reviewers want fewer sequences to
|
|
8
|
+
# examine, with all variation represented via IUPAC ambiguity codes.
|
|
9
|
+
# Partial overlap merging is disabled as a safety measure.
|
|
10
|
+
#
|
|
11
|
+
# Use with:
|
|
12
|
+
# speconsense input.fastq -p compressed
|
|
13
|
+
# speconsense-summarize -p compressed
|
|
14
|
+
|
|
15
|
+
speconsense-version: "0.7.*"
|
|
16
|
+
description: "Compress variants into minimal IUPAC consensus sequences"
|
|
17
|
+
|
|
18
|
+
speconsense:
|
|
19
|
+
min-ambiguity-frequency: 0.20 # 20% threshold for IUPAC ambiguity calling
|
|
20
|
+
min-variant-frequency: 0.20 # 20% threshold for variant phasing
|
|
21
|
+
|
|
22
|
+
speconsense-summarize:
|
|
23
|
+
merge-indel-length: 5 # Merge indels up to 5bp
|
|
24
|
+
merge-position-count: 10 # Allow up to 10 variant positions in a merge
|
|
25
|
+
merge-min-size-ratio: 0.2 # Match 20% calling threshold
|
|
26
|
+
select-min-size-ratio: 0.2 # Match 20% calling threshold
|
|
27
|
+
min-merge-overlap: 0 # Disable partial overlap merging
|
|
28
|
+
enable-full-consensus: true # Include full IUPAC consensus per group
|
|
@@ -91,6 +91,7 @@ speconsense-summarize:
|
|
|
91
91
|
# select-max-groups: -1 # Max groups to output (-1 = no limit)
|
|
92
92
|
# select-max-variants: -1 # Max variants per group (-1 = no limit)
|
|
93
93
|
# select-strategy: size # Selection strategy: size or diversity
|
|
94
|
+
# select-min-size-ratio: 0 # Min size ratio to include variant (0 = disabled)
|
|
94
95
|
|
|
95
96
|
# --- Processing ---
|
|
96
97
|
# threads: 0 # Max threads (0 = auto-detect)
|
speconsense/summarize/cli.py
CHANGED
|
@@ -54,8 +54,8 @@ from .io import (
|
|
|
54
54
|
write_output_files,
|
|
55
55
|
)
|
|
56
56
|
from .clustering import perform_hac_clustering, select_variants
|
|
57
|
-
from .merging import merge_group_with_msa
|
|
58
|
-
from .analysis import MAX_MSA_MERGE_VARIANTS, MIN_MERGE_BATCH, MAX_MERGE_BATCH
|
|
57
|
+
from .merging import merge_group_with_msa, create_full_consensus_from_msa
|
|
58
|
+
from .analysis import run_spoa_msa, MAX_MSA_MERGE_VARIANTS, MIN_MERGE_BATCH, MAX_MERGE_BATCH
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
# Merge effort configuration
|
|
@@ -132,6 +132,8 @@ def parse_arguments():
|
|
|
132
132
|
merging_group = parser.add_argument_group("Merging")
|
|
133
133
|
merging_group.add_argument("--disable-merging", action="store_true",
|
|
134
134
|
help="Disable all variant merging (skip MSA-based merge evaluation entirely)")
|
|
135
|
+
merging_group.add_argument("--enable-merging", action="store_false", dest="disable_merging",
|
|
136
|
+
help="Override --disable-merging or profile setting")
|
|
135
137
|
merging_group.add_argument("--merge-snp", action=argparse.BooleanOptionalAction, default=True,
|
|
136
138
|
help="Enable SNP-based merging (default: True, use --no-merge-snp to disable)")
|
|
137
139
|
merging_group.add_argument("--merge-indel-length", type=int, default=0,
|
|
@@ -144,6 +146,9 @@ def parse_arguments():
|
|
|
144
146
|
help="Minimum overlap in bp for merging sequences of different lengths (default: 200, 0 to disable)")
|
|
145
147
|
merging_group.add_argument("--disable-homopolymer-equivalence", action="store_true",
|
|
146
148
|
help="Disable homopolymer equivalence in merging (treat AAA vs AAAA as different)")
|
|
149
|
+
merging_group.add_argument("--enable-homopolymer-equivalence", action="store_false",
|
|
150
|
+
dest="disable_homopolymer_equivalence",
|
|
151
|
+
help="Override --disable-homopolymer-equivalence or profile setting")
|
|
147
152
|
merging_group.add_argument("--merge-effort", type=str, default="balanced", metavar="LEVEL",
|
|
148
153
|
help="Merging effort level: fast (8), balanced (10), thorough (12), "
|
|
149
154
|
"or numeric 6-14. Higher values allow larger batch sizes for "
|
|
@@ -164,6 +169,15 @@ def parse_arguments():
|
|
|
164
169
|
selection_group.add_argument("--select-strategy", "--variant-selection",
|
|
165
170
|
dest="select_strategy", choices=["size", "diversity"], default="size",
|
|
166
171
|
help="Variant selection strategy: size or diversity (default: size)")
|
|
172
|
+
selection_group.add_argument("--select-min-size-ratio", type=float, default=0,
|
|
173
|
+
help="Minimum size ratio (variant/largest) to include in output "
|
|
174
|
+
"(default: 0 = disabled, e.g. 0.2 for 20%% cutoff)")
|
|
175
|
+
selection_group.add_argument("--enable-full-consensus", action="store_true",
|
|
176
|
+
help="Generate a full consensus per variant group representing all variation "
|
|
177
|
+
"from pre-merge variants (gaps never win)")
|
|
178
|
+
selection_group.add_argument("--disable-full-consensus", action="store_false",
|
|
179
|
+
dest="enable_full_consensus",
|
|
180
|
+
help="Override --enable-full-consensus or profile setting")
|
|
167
181
|
|
|
168
182
|
# Performance group
|
|
169
183
|
perf_group = parser.add_argument_group("Performance")
|
|
@@ -345,9 +359,21 @@ def process_single_specimen(file_consensuses: List[ConsensusInfo],
|
|
|
345
359
|
key=lambda x: max(m.size for m in x[1]),
|
|
346
360
|
reverse=True)
|
|
347
361
|
|
|
348
|
-
for group_idx, (
|
|
362
|
+
for group_idx, (group_id, group_members) in enumerate(sorted_groups):
|
|
349
363
|
final_group_name = group_idx + 1
|
|
350
364
|
|
|
365
|
+
# Apply select-min-size-ratio filter
|
|
366
|
+
if args.select_min_size_ratio > 0 and len(group_members) > 1:
|
|
367
|
+
largest_size = max(v.size for v in group_members)
|
|
368
|
+
filtered = [v for v in group_members
|
|
369
|
+
if (v.size / largest_size) >= args.select_min_size_ratio]
|
|
370
|
+
if len(filtered) < len(group_members):
|
|
371
|
+
filtered_count = len(group_members) - len(filtered)
|
|
372
|
+
logging.debug(f"Group {group_idx + 1}: filtered out {filtered_count} "
|
|
373
|
+
f"variants with size ratio < {args.select_min_size_ratio} "
|
|
374
|
+
f"relative to largest (size={largest_size})")
|
|
375
|
+
group_members = filtered
|
|
376
|
+
|
|
351
377
|
# Select variants for this group
|
|
352
378
|
selected_variants = select_variants(group_members, args.select_max_variants, args.select_strategy, group_number=final_group_name)
|
|
353
379
|
|
|
@@ -366,6 +392,35 @@ def process_single_specimen(file_consensuses: List[ConsensusInfo],
|
|
|
366
392
|
final_consensus.append(renamed_variant)
|
|
367
393
|
group_naming.append((variant.sample_name, new_name))
|
|
368
394
|
|
|
395
|
+
# Generate full consensus from PRE-MERGE variants
|
|
396
|
+
if getattr(args, 'enable_full_consensus', False):
|
|
397
|
+
pre_merge_variants = variant_groups[group_id]
|
|
398
|
+
|
|
399
|
+
# Apply size-ratio filter (same as merge pipeline)
|
|
400
|
+
if args.merge_min_size_ratio > 0 and len(pre_merge_variants) > 1:
|
|
401
|
+
largest_size = max(v.size for v in pre_merge_variants)
|
|
402
|
+
filtered = [v for v in pre_merge_variants
|
|
403
|
+
if (v.size / largest_size) >= args.merge_min_size_ratio]
|
|
404
|
+
if len(filtered) < len(pre_merge_variants):
|
|
405
|
+
filtered_count = len(pre_merge_variants) - len(filtered)
|
|
406
|
+
logging.debug(f"Full consensus: filtered out {filtered_count} variants with size ratio < {args.merge_min_size_ratio} relative to largest (size={largest_size})")
|
|
407
|
+
pre_merge_variants = filtered
|
|
408
|
+
|
|
409
|
+
specimen_base = selected_variants[0].sample_name.rsplit('-c', 1)[0]
|
|
410
|
+
full_name = f"{specimen_base}-{group_idx + 1}.full"
|
|
411
|
+
|
|
412
|
+
if len(pre_merge_variants) == 1:
|
|
413
|
+
# Single variant — copy directly
|
|
414
|
+
full_consensus = pre_merge_variants[0]._replace(sample_name=full_name)
|
|
415
|
+
else:
|
|
416
|
+
# MSA on pre-merge variants, full consensus logic
|
|
417
|
+
sequences = [v.sequence for v in pre_merge_variants]
|
|
418
|
+
aligned_seqs = run_spoa_msa(sequences, alignment_mode=1)
|
|
419
|
+
full_consensus = create_full_consensus_from_msa(aligned_seqs, pre_merge_variants)
|
|
420
|
+
full_consensus = full_consensus._replace(sample_name=full_name)
|
|
421
|
+
|
|
422
|
+
final_consensus.append(full_consensus)
|
|
423
|
+
|
|
369
424
|
naming_info[group_idx + 1] = group_naming
|
|
370
425
|
|
|
371
426
|
logging.info(f"Processed {file_name}: {len(final_consensus)} final variants across {len(merged_groups)} groups")
|
|
@@ -421,6 +476,8 @@ def main():
|
|
|
421
476
|
logging.info(f" --select-max-variants: {args.select_max_variants}")
|
|
422
477
|
logging.info(f" --select-max-groups: {args.select_max_groups}")
|
|
423
478
|
logging.info(f" --select-strategy: {args.select_strategy}")
|
|
479
|
+
logging.info(f" --select-min-size-ratio: {args.select_min_size_ratio}")
|
|
480
|
+
logging.info(f" --enable-full-consensus: {args.enable_full_consensus}")
|
|
424
481
|
logging.info(f" --log-level: {args.log_level}")
|
|
425
482
|
logging.info("")
|
|
426
483
|
logging.info("Processing each specimen file independently to organize variants within specimens")
|
speconsense/summarize/fields.py
CHANGED
|
@@ -124,8 +124,8 @@ class GroupField(FastaField):
|
|
|
124
124
|
super().__init__('group', 'Variant group number')
|
|
125
125
|
|
|
126
126
|
def format_value(self, consensus: ConsensusInfo) -> Optional[str]:
|
|
127
|
-
# Extract from sample_name (e.g., "...-1.v1"
|
|
128
|
-
match = re.search(r'-(\d+)
|
|
127
|
+
# Extract from sample_name (e.g., "...-1.v1", "...-2.v1.raw1", or "...-1.full")
|
|
128
|
+
match = re.search(r'-(\d+)(?:\.v\d+(?:\.raw\d+)?|\.full)$', consensus.sample_name)
|
|
129
129
|
if match:
|
|
130
130
|
return f"group={match.group(1)}"
|
|
131
131
|
return None
|
|
@@ -136,8 +136,10 @@ class VariantField(FastaField):
|
|
|
136
136
|
super().__init__('variant', 'Variant identifier within group')
|
|
137
137
|
|
|
138
138
|
def format_value(self, consensus: ConsensusInfo) -> Optional[str]:
|
|
139
|
-
# Extract from sample_name (e.g., "...-1.v1" -> "v1"
|
|
139
|
+
# Extract from sample_name (e.g., "...-1.v1" -> "v1", "...-1.v1.raw1" -> "v1", "...-1.full" -> "full")
|
|
140
140
|
match = re.search(r'\.(v\d+)(?:\.raw\d+)?$', consensus.sample_name)
|
|
141
|
+
if not match:
|
|
142
|
+
match = re.search(r'\.(full)$', consensus.sample_name)
|
|
141
143
|
if match:
|
|
142
144
|
return f"variant={match.group(1)}"
|
|
143
145
|
return None
|
speconsense/summarize/io.py
CHANGED
|
@@ -358,6 +358,9 @@ def write_specimen_data_files(specimen_consensus: List[ConsensusInfo],
|
|
|
358
358
|
# Generate .raw file consensuses for merged variants
|
|
359
359
|
raw_file_consensuses = []
|
|
360
360
|
for consensus in specimen_consensus:
|
|
361
|
+
# Skip .raw generation for .full consensus (synthetic/derived)
|
|
362
|
+
if consensus.sample_name.endswith('.full'):
|
|
363
|
+
continue
|
|
361
364
|
# Only create .raw files if this consensus was actually merged
|
|
362
365
|
if consensus.raw_ric and len(consensus.raw_ric) > 1:
|
|
363
366
|
# Find the original cluster name from naming_info
|
|
@@ -412,6 +415,9 @@ def write_specimen_data_files(specimen_consensus: List[ConsensusInfo],
|
|
|
412
415
|
|
|
413
416
|
# Write FASTQ files for each final consensus containing all contributing reads
|
|
414
417
|
for consensus in specimen_consensus:
|
|
418
|
+
# Skip FASTQ for .full consensus (synthetic/derived, no traceable cluster reads)
|
|
419
|
+
if consensus.sample_name.endswith('.full'):
|
|
420
|
+
continue
|
|
415
421
|
write_consensus_fastq(consensus, merge_traceability, naming_info, fastq_dir, fastq_lookup, original_consensus_lookup)
|
|
416
422
|
|
|
417
423
|
# Write .raw files (individual FASTA and FASTQ for pre-merge variants)
|
|
@@ -704,7 +710,10 @@ def write_output_files(final_consensus: List[ConsensusInfo],
|
|
|
704
710
|
multiple_id = specimen_counters[base_name]
|
|
705
711
|
writer.writerow([consensus.sample_name, len(consensus.sequence), consensus.ric, multiple_id])
|
|
706
712
|
unique_samples.add(base_name)
|
|
707
|
-
|
|
713
|
+
# Exclude .full from total RiC to avoid double-counting
|
|
714
|
+
# (.full aggregates reads already counted in merged variants)
|
|
715
|
+
if not consensus.sample_name.endswith('.full'):
|
|
716
|
+
total_ric += consensus.ric
|
|
708
717
|
|
|
709
718
|
writer.writerow([])
|
|
710
719
|
writer.writerow(['Total Unique Samples', len(unique_samples)])
|
speconsense/summarize/merging.py
CHANGED
|
@@ -106,6 +106,63 @@ def is_compatible_subset(variant_stats: dict, args, prior_positions: dict = None
|
|
|
106
106
|
return True
|
|
107
107
|
|
|
108
108
|
|
|
109
|
+
def _build_merged_consensus_info(
|
|
110
|
+
consensus_seq: list, snp_count: int, variants: List[ConsensusInfo]
|
|
111
|
+
) -> ConsensusInfo:
|
|
112
|
+
"""Assemble a ConsensusInfo from column-voting results and source variants.
|
|
113
|
+
|
|
114
|
+
Handles joining the consensus sequence, aggregating size/ric totals,
|
|
115
|
+
flattening raw_ric/raw_len merge history, and selecting metadata
|
|
116
|
+
from the largest variant.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
consensus_seq: List of consensus characters from column voting
|
|
120
|
+
snp_count: Number of ambiguous (multi-base) positions
|
|
121
|
+
variants: Source ConsensusInfo objects that were merged
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
ConsensusInfo with merged metadata
|
|
125
|
+
"""
|
|
126
|
+
consensus_sequence = ''.join(consensus_seq)
|
|
127
|
+
total_size = sum(v.size for v in variants)
|
|
128
|
+
total_ric = sum(v.ric for v in variants)
|
|
129
|
+
|
|
130
|
+
# Collect RiC values, preserving any prior merge history
|
|
131
|
+
raw_ric_values = []
|
|
132
|
+
for v in variants:
|
|
133
|
+
if v.raw_ric:
|
|
134
|
+
raw_ric_values.extend(v.raw_ric)
|
|
135
|
+
else:
|
|
136
|
+
raw_ric_values.append(v.ric)
|
|
137
|
+
raw_ric_values = sorted(raw_ric_values, reverse=True) if len(variants) > 1 else None
|
|
138
|
+
|
|
139
|
+
# Collect lengths, preserving any prior merge history
|
|
140
|
+
raw_len_values = []
|
|
141
|
+
for v in variants:
|
|
142
|
+
if v.raw_len:
|
|
143
|
+
raw_len_values.extend(v.raw_len)
|
|
144
|
+
else:
|
|
145
|
+
raw_len_values.append(len(v.sequence))
|
|
146
|
+
raw_len_values = sorted(raw_len_values, reverse=True) if len(variants) > 1 else None
|
|
147
|
+
|
|
148
|
+
largest_variant = max(variants, key=lambda v: v.size)
|
|
149
|
+
|
|
150
|
+
return ConsensusInfo(
|
|
151
|
+
sample_name=largest_variant.sample_name,
|
|
152
|
+
cluster_id=largest_variant.cluster_id,
|
|
153
|
+
sequence=consensus_sequence,
|
|
154
|
+
ric=total_ric,
|
|
155
|
+
size=total_size,
|
|
156
|
+
file_path=largest_variant.file_path,
|
|
157
|
+
snp_count=snp_count if snp_count > 0 else None,
|
|
158
|
+
primers=largest_variant.primers,
|
|
159
|
+
raw_ric=raw_ric_values,
|
|
160
|
+
raw_len=raw_len_values,
|
|
161
|
+
rid=largest_variant.rid,
|
|
162
|
+
rid_min=largest_variant.rid_min,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
109
166
|
def create_consensus_from_msa(aligned_seqs: List, variants: List[ConsensusInfo]) -> ConsensusInfo:
|
|
110
167
|
"""
|
|
111
168
|
Generate consensus from MSA using size-weighted majority voting.
|
|
@@ -160,46 +217,7 @@ def create_consensus_from_msa(aligned_seqs: List, variants: List[ConsensusInfo])
|
|
|
160
217
|
snp_count += 1
|
|
161
218
|
# else: majority wants gap, omit position
|
|
162
219
|
|
|
163
|
-
|
|
164
|
-
consensus_sequence = ''.join(consensus_seq)
|
|
165
|
-
total_size = sum(v.size for v in variants)
|
|
166
|
-
total_ric = sum(v.ric for v in variants)
|
|
167
|
-
|
|
168
|
-
# Collect RiC values, preserving any prior merge history
|
|
169
|
-
raw_ric_values = []
|
|
170
|
-
for v in variants:
|
|
171
|
-
if v.raw_ric:
|
|
172
|
-
raw_ric_values.extend(v.raw_ric) # Flatten prior merge history
|
|
173
|
-
else:
|
|
174
|
-
raw_ric_values.append(v.ric)
|
|
175
|
-
raw_ric_values = sorted(raw_ric_values, reverse=True) if len(variants) > 1 else None
|
|
176
|
-
|
|
177
|
-
# Collect lengths, preserving any prior merge history
|
|
178
|
-
raw_len_values = []
|
|
179
|
-
for v in variants:
|
|
180
|
-
if v.raw_len:
|
|
181
|
-
raw_len_values.extend(v.raw_len) # Flatten prior merge history
|
|
182
|
-
else:
|
|
183
|
-
raw_len_values.append(len(v.sequence))
|
|
184
|
-
raw_len_values = sorted(raw_len_values, reverse=True) if len(variants) > 1 else None
|
|
185
|
-
|
|
186
|
-
# Use name from largest variant
|
|
187
|
-
largest_variant = max(variants, key=lambda v: v.size)
|
|
188
|
-
|
|
189
|
-
return ConsensusInfo(
|
|
190
|
-
sample_name=largest_variant.sample_name,
|
|
191
|
-
cluster_id=largest_variant.cluster_id,
|
|
192
|
-
sequence=consensus_sequence,
|
|
193
|
-
ric=total_ric,
|
|
194
|
-
size=total_size,
|
|
195
|
-
file_path=largest_variant.file_path,
|
|
196
|
-
snp_count=snp_count if snp_count > 0 else None,
|
|
197
|
-
primers=largest_variant.primers,
|
|
198
|
-
raw_ric=raw_ric_values,
|
|
199
|
-
raw_len=raw_len_values,
|
|
200
|
-
rid=largest_variant.rid, # Preserve identity metrics from largest variant
|
|
201
|
-
rid_min=largest_variant.rid_min,
|
|
202
|
-
)
|
|
220
|
+
return _build_merged_consensus_info(consensus_seq, snp_count, variants)
|
|
203
221
|
|
|
204
222
|
|
|
205
223
|
def create_overlap_consensus_from_msa(aligned_seqs: List, variants: List[ConsensusInfo]) -> ConsensusInfo:
|
|
@@ -295,46 +313,49 @@ def create_overlap_consensus_from_msa(aligned_seqs: List, variants: List[Consens
|
|
|
295
313
|
consensus_seq.append(iupac_code)
|
|
296
314
|
snp_count += 1
|
|
297
315
|
|
|
298
|
-
|
|
299
|
-
consensus_sequence = ''.join(consensus_seq)
|
|
300
|
-
total_size = sum(v.size for v in variants)
|
|
301
|
-
total_ric = sum(v.ric for v in variants)
|
|
316
|
+
return _build_merged_consensus_info(consensus_seq, snp_count, variants)
|
|
302
317
|
|
|
303
|
-
# Collect RiC values, preserving any prior merge history
|
|
304
|
-
raw_ric_values = []
|
|
305
|
-
for v in variants:
|
|
306
|
-
if v.raw_ric:
|
|
307
|
-
raw_ric_values.extend(v.raw_ric) # Flatten prior merge history
|
|
308
|
-
else:
|
|
309
|
-
raw_ric_values.append(v.ric)
|
|
310
|
-
raw_ric_values = sorted(raw_ric_values, reverse=True) if len(variants) > 1 else None
|
|
311
318
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
if v.raw_len:
|
|
316
|
-
raw_len_values.extend(v.raw_len) # Flatten prior merge history
|
|
317
|
-
else:
|
|
318
|
-
raw_len_values.append(len(v.sequence))
|
|
319
|
-
raw_len_values = sorted(raw_len_values, reverse=True) if len(variants) > 1 else None
|
|
319
|
+
def create_full_consensus_from_msa(aligned_seqs: List, variants: List[ConsensusInfo]) -> ConsensusInfo:
|
|
320
|
+
"""
|
|
321
|
+
Generate full consensus from MSA where any non-gap base means inclusion.
|
|
320
322
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
+
Unlike create_consensus_from_msa where gaps can win by majority vote,
|
|
324
|
+
the full consensus includes a position if ANY variant has a base there.
|
|
325
|
+
This captures all variation from all contributing variants.
|
|
323
326
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
327
|
+
Args:
|
|
328
|
+
aligned_seqs: MSA sequences with gaps as '-'
|
|
329
|
+
variants: Original ConsensusInfo objects (for metadata)
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
ConsensusInfo with full consensus sequence
|
|
333
|
+
"""
|
|
334
|
+
consensus_seq = []
|
|
335
|
+
snp_count = 0
|
|
336
|
+
alignment_length = len(aligned_seqs[0].seq)
|
|
337
|
+
|
|
338
|
+
for col_idx in range(alignment_length):
|
|
339
|
+
column = [str(seq.seq[col_idx]) for seq in aligned_seqs]
|
|
340
|
+
|
|
341
|
+
# Collect non-gap bases
|
|
342
|
+
base_votes = defaultdict(int)
|
|
343
|
+
for i, base in enumerate(column):
|
|
344
|
+
upper_base = base.upper()
|
|
345
|
+
if upper_base != '-':
|
|
346
|
+
base_votes[upper_base] += variants[i].size
|
|
347
|
+
|
|
348
|
+
# Include position if ANY variant has a base (gaps never win)
|
|
349
|
+
if base_votes:
|
|
350
|
+
if len(base_votes) == 1:
|
|
351
|
+
consensus_seq.append(list(base_votes.keys())[0])
|
|
352
|
+
else:
|
|
353
|
+
represented_bases = set(base_votes.keys())
|
|
354
|
+
iupac_code = merge_bases_to_iupac(represented_bases)
|
|
355
|
+
consensus_seq.append(iupac_code)
|
|
356
|
+
snp_count += 1
|
|
357
|
+
|
|
358
|
+
return _build_merged_consensus_info(consensus_seq, snp_count, variants)
|
|
338
359
|
|
|
339
360
|
|
|
340
361
|
def merge_group_with_msa(variants: List[ConsensusInfo], args) -> Tuple[List[ConsensusInfo], Dict, int, List[OverlapMergeInfo]]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: speconsense
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.4
|
|
4
4
|
Summary: High-quality clustering and consensus generation for Oxford Nanopore amplicon reads
|
|
5
5
|
Author-email: Josh Walker <joshowalker@yahoo.com>
|
|
6
6
|
License: BSD-3-Clause
|
|
@@ -171,6 +171,7 @@ speconsense input.fastq -p herbarium --min-size 10
|
|
|
171
171
|
```
|
|
172
172
|
|
|
173
173
|
**Bundled profiles:**
|
|
174
|
+
- `compressed` — Compress variants into minimal IUPAC consensus sequences (aggressive merging with indels, 20% thresholds, full consensus, 20% selection size ratio)
|
|
174
175
|
- `herbarium` — High-recall for degraded DNA/type specimens
|
|
175
176
|
- `largedata` — Experimental settings for large input files
|
|
176
177
|
- `nostalgia` — Simulate older bioinformatics pipelines
|
|
@@ -294,12 +295,14 @@ When using `speconsense-summarize` for post-processing, creates `__Summary__/` d
|
|
|
294
295
|
|---------------|-------------|------------|-------------|
|
|
295
296
|
| **Original** | Source `cluster_debug/` | `-c1`, `-c2`, `-c3` | Preserves speconsense clustering results |
|
|
296
297
|
| **Summarization** | `__Summary__/`, `FASTQ Files/`, `variants/` | `-1.v1`, `-1.v2`, `-2.v1`, `.raw1` | Post-processing groups and variants |
|
|
298
|
+
| **Full consensus** | `__Summary__/` | `-1.full` | IUPAC consensus from all pre-merge variants in a group |
|
|
297
299
|
|
|
298
300
|
### Example Directory Structure
|
|
299
301
|
```
|
|
300
302
|
__Summary__/
|
|
301
303
|
├── sample-1.v1-RiC45.fasta # Primary variant (group 1, merged)
|
|
302
304
|
├── sample-1.v2-RiC23.fasta # Additional variant (not merged)
|
|
305
|
+
├── sample-1.full-RiC68.fasta # Full IUPAC consensus for group 1 (all pre-merge variants)
|
|
303
306
|
├── sample-2.v1-RiC30.fasta # Second organism group, primary variant
|
|
304
307
|
├── summary.fasta # All final consensus sequences (excludes .raw)
|
|
305
308
|
├── summary.txt # Statistics
|
|
@@ -675,6 +678,18 @@ speconsense-summarize --select-strategy diversity --select-max-variants 2
|
|
|
675
678
|
- Output up to select_max_variants per group
|
|
676
679
|
3. Final output contains representatives from all groups, ensuring both biological diversity (between groups) and appropriate sampling within each biological entity (within groups)
|
|
677
680
|
|
|
681
|
+
**Selection Size Ratio Filtering:**
|
|
682
|
+
```bash
|
|
683
|
+
speconsense-summarize --select-min-size-ratio 0.2
|
|
684
|
+
```
|
|
685
|
+
- Filters out post-merge variants whose size is too small relative to the largest variant in their group
|
|
686
|
+
- Ratio calculated as `variant_size / largest_size` — must be ≥ threshold to keep
|
|
687
|
+
- Example: `--select-min-size-ratio 0.2` means a variant must have ≥20% the reads of the largest variant in its group
|
|
688
|
+
- Default is 0 (disabled) — all post-merge variants pass through to selection
|
|
689
|
+
- Applied after merging but before variant selection
|
|
690
|
+
- Useful for suppressing noise variants that survived merging but are too small to be meaningful
|
|
691
|
+
- Set to 0.2 in the `compressed` profile to match the 20% calling threshold theme
|
|
692
|
+
|
|
678
693
|
This two-stage process ensures that distinct biological sequences are preserved as separate groups, while providing control over variant complexity within each group.
|
|
679
694
|
|
|
680
695
|
### Customizing FASTA Header Fields
|
|
@@ -810,6 +825,18 @@ For high-throughput workflows (e.g., 100K sequences/year), this prioritization e
|
|
|
810
825
|
|
|
811
826
|
### Additional Summarize Options
|
|
812
827
|
|
|
828
|
+
**Full Consensus:**
|
|
829
|
+
```bash
|
|
830
|
+
speconsense-summarize --enable-full-consensus
|
|
831
|
+
```
|
|
832
|
+
- Generates a full IUPAC consensus sequence per variant group from all pre-merge variants
|
|
833
|
+
- Output named `{specimen}-{group}.full-RiC{reads}.fasta` in the `__Summary__/` directory
|
|
834
|
+
- Uses majority voting across all variants in the group; **gaps never win** — at each alignment column, the most common non-gap base is chosen, with IUPAC codes for ties among bases
|
|
835
|
+
- Useful when you want a single representative sequence that captures all variation within a group as IUPAC ambiguity codes
|
|
836
|
+
- Included in `summary.fasta` (but excluded from total RiC to avoid double-counting)
|
|
837
|
+
- Enabled by default in the `compressed` profile
|
|
838
|
+
- Use `--disable-full-consensus` to override when set by a profile
|
|
839
|
+
|
|
813
840
|
**Quality Filtering:**
|
|
814
841
|
```bash
|
|
815
842
|
speconsense-summarize --min-ric 5
|
|
@@ -1044,8 +1071,10 @@ The complete speconsense-summarize workflow operates in this order:
|
|
|
1044
1071
|
2. **HAC variant grouping** by sequence identity to separate dissimilar sequences (`--group-identity`); uses single-linkage when overlap merging is enabled
|
|
1045
1072
|
3. **Group filtering** to limit output groups (`--select-max-groups`)
|
|
1046
1073
|
4. **Homopolymer-aware MSA-based variant merging** within each group, including **overlap merging** for different-length sequences (`--disable-merging`, `--merge-effort`, `--merge-position-count`, `--merge-indel-length`, `--min-merge-overlap`, `--merge-snp`, `--merge-min-size-ratio`, `--disable-homopolymer-equivalence`)
|
|
1047
|
-
5. **
|
|
1048
|
-
6. **
|
|
1074
|
+
5. **Selection size ratio filtering** to remove tiny post-merge variants (`--select-min-size-ratio`)
|
|
1075
|
+
6. **Variant selection** within each group (`--select-max-variants`, `--select-strategy`)
|
|
1076
|
+
7. **Full consensus generation** (optional) — IUPAC consensus from all pre-merge variants per group (`--enable-full-consensus`)
|
|
1077
|
+
8. **Output generation** with customizable header fields (`--fasta-fields`) and full traceability
|
|
1049
1078
|
|
|
1050
1079
|
**Key architectural features**:
|
|
1051
1080
|
- HAC grouping occurs BEFORE merging to prevent inappropriate merging of dissimilar sequences (e.g., contaminants with primary targets)
|
|
@@ -1098,17 +1127,20 @@ usage: speconsense [-h] [-O OUTPUT_DIR] [--primers PRIMERS]
|
|
|
1098
1127
|
[--min-cluster-ratio MIN_CLUSTER_RATIO]
|
|
1099
1128
|
[--max-sample-size MAX_SAMPLE_SIZE]
|
|
1100
1129
|
[--outlier-identity OUTLIER_IDENTITY]
|
|
1101
|
-
[--disable-position-phasing]
|
|
1130
|
+
[--disable-position-phasing] [--enable-position-phasing]
|
|
1102
1131
|
[--min-variant-frequency MIN_VARIANT_FREQUENCY]
|
|
1103
1132
|
[--min-variant-count MIN_VARIANT_COUNT]
|
|
1104
|
-
[--disable-ambiguity-calling]
|
|
1133
|
+
[--disable-ambiguity-calling] [--enable-ambiguity-calling]
|
|
1105
1134
|
[--min-ambiguity-frequency MIN_AMBIGUITY_FREQUENCY]
|
|
1106
1135
|
[--min-ambiguity-count MIN_AMBIGUITY_COUNT]
|
|
1107
|
-
[--disable-cluster-merging]
|
|
1136
|
+
[--disable-cluster-merging] [--enable-cluster-merging]
|
|
1108
1137
|
[--disable-homopolymer-equivalence]
|
|
1138
|
+
[--enable-homopolymer-equivalence]
|
|
1109
1139
|
[--orient-mode {skip,keep-all,filter-failed}]
|
|
1110
1140
|
[--presample PRESAMPLE] [--scale-threshold SCALE_THRESHOLD]
|
|
1111
|
-
[--threads N] [--enable-early-filter]
|
|
1141
|
+
[--threads N] [--enable-early-filter]
|
|
1142
|
+
[--disable-early-filter] [--collect-discards]
|
|
1143
|
+
[--no-collect-discards]
|
|
1112
1144
|
[--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
|
|
1113
1145
|
[--version] [-p NAME] [--list-profiles]
|
|
1114
1146
|
input_file
|
|
@@ -1167,6 +1199,8 @@ Variant Phasing:
|
|
|
1167
1199
|
default). MCL graph clustering already separates most
|
|
1168
1200
|
variants; this second pass analyzes MSA positions to
|
|
1169
1201
|
phase remaining variants.
|
|
1202
|
+
--enable-position-phasing
|
|
1203
|
+
Override --disable-position-phasing or profile setting
|
|
1170
1204
|
--min-variant-frequency MIN_VARIANT_FREQUENCY
|
|
1171
1205
|
Minimum alternative allele frequency to call variant
|
|
1172
1206
|
(default: 0.10 for 10%)
|
|
@@ -1178,6 +1212,9 @@ Ambiguity Calling:
|
|
|
1178
1212
|
--disable-ambiguity-calling
|
|
1179
1213
|
Disable IUPAC ambiguity code calling for unphased
|
|
1180
1214
|
variant positions
|
|
1215
|
+
--enable-ambiguity-calling
|
|
1216
|
+
Override --disable-ambiguity-calling or profile
|
|
1217
|
+
setting
|
|
1181
1218
|
--min-ambiguity-frequency MIN_AMBIGUITY_FREQUENCY
|
|
1182
1219
|
Minimum alternative allele frequency for IUPAC
|
|
1183
1220
|
ambiguity calling (default: 0.10 for 10%)
|
|
@@ -1189,9 +1226,14 @@ Cluster Merging:
|
|
|
1189
1226
|
--disable-cluster-merging
|
|
1190
1227
|
Disable merging of clusters with identical consensus
|
|
1191
1228
|
sequences
|
|
1229
|
+
--enable-cluster-merging
|
|
1230
|
+
Override --disable-cluster-merging or profile setting
|
|
1192
1231
|
--disable-homopolymer-equivalence
|
|
1193
1232
|
Disable homopolymer equivalence in cluster merging
|
|
1194
1233
|
(only merge identical sequences)
|
|
1234
|
+
--enable-homopolymer-equivalence
|
|
1235
|
+
Override --disable-homopolymer-equivalence or profile
|
|
1236
|
+
setting
|
|
1195
1237
|
|
|
1196
1238
|
Orientation:
|
|
1197
1239
|
--orient-mode {skip,keep-all,filter-failed}
|
|
@@ -1213,10 +1255,14 @@ Performance:
|
|
|
1213
1255
|
Enable early filtering to skip small clusters before
|
|
1214
1256
|
variant phasing (improves performance for large
|
|
1215
1257
|
datasets)
|
|
1258
|
+
--disable-early-filter
|
|
1259
|
+
Override --enable-early-filter or profile setting
|
|
1216
1260
|
|
|
1217
1261
|
Debugging:
|
|
1218
1262
|
--collect-discards Write discarded reads (outliers and filtered clusters)
|
|
1219
1263
|
to cluster_debug/{sample}-discards.fastq
|
|
1264
|
+
--no-collect-discards
|
|
1265
|
+
Override --collect-discards or profile setting
|
|
1220
1266
|
--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
|
|
1221
1267
|
```
|
|
1222
1268
|
|
|
@@ -1227,15 +1273,22 @@ usage: speconsense-summarize [-h] [--source SOURCE]
|
|
|
1227
1273
|
[--summary-dir SUMMARY_DIR]
|
|
1228
1274
|
[--fasta-fields FASTA_FIELDS] [--min-ric MIN_RIC]
|
|
1229
1275
|
[--min-len MIN_LEN] [--max-len MAX_LEN]
|
|
1230
|
-
[--group-identity GROUP_IDENTITY]
|
|
1276
|
+
[--group-identity GROUP_IDENTITY]
|
|
1277
|
+
[--disable-merging] [--enable-merging]
|
|
1278
|
+
[--merge-snp | --no-merge-snp]
|
|
1231
1279
|
[--merge-indel-length MERGE_INDEL_LENGTH]
|
|
1232
1280
|
[--merge-position-count MERGE_POSITION_COUNT]
|
|
1233
1281
|
[--merge-min-size-ratio MERGE_MIN_SIZE_RATIO]
|
|
1234
1282
|
[--min-merge-overlap MIN_MERGE_OVERLAP]
|
|
1235
1283
|
[--disable-homopolymer-equivalence]
|
|
1284
|
+
[--enable-homopolymer-equivalence]
|
|
1285
|
+
[--merge-effort LEVEL]
|
|
1236
1286
|
[--select-max-groups SELECT_MAX_GROUPS]
|
|
1237
1287
|
[--select-max-variants SELECT_MAX_VARIANTS]
|
|
1238
1288
|
[--select-strategy {size,diversity}]
|
|
1289
|
+
[--select-min-size-ratio SELECT_MIN_SIZE_RATIO]
|
|
1290
|
+
[--enable-full-consensus]
|
|
1291
|
+
[--disable-full-consensus]
|
|
1239
1292
|
[--scale-threshold SCALE_THRESHOLD] [--threads N]
|
|
1240
1293
|
[--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
|
|
1241
1294
|
[--version] [-p NAME] [--list-profiles]
|
|
@@ -1281,10 +1334,7 @@ Grouping:
|
|
|
1281
1334
|
Merging:
|
|
1282
1335
|
--disable-merging Disable all variant merging (skip MSA-based merge
|
|
1283
1336
|
evaluation entirely)
|
|
1284
|
-
--
|
|
1285
|
-
thorough (12), or numeric 6-14. Higher values allow
|
|
1286
|
-
larger batch sizes for exhaustive subset search.
|
|
1287
|
-
Default: balanced
|
|
1337
|
+
--enable-merging Override --disable-merging or profile setting
|
|
1288
1338
|
--merge-snp, --no-merge-snp
|
|
1289
1339
|
Enable SNP-based merging (default: True, use --no-
|
|
1290
1340
|
merge-snp to disable)
|
|
@@ -1303,6 +1353,13 @@ Merging:
|
|
|
1303
1353
|
--disable-homopolymer-equivalence
|
|
1304
1354
|
Disable homopolymer equivalence in merging (treat AAA
|
|
1305
1355
|
vs AAAA as different)
|
|
1356
|
+
--enable-homopolymer-equivalence
|
|
1357
|
+
Override --disable-homopolymer-equivalence or profile
|
|
1358
|
+
setting
|
|
1359
|
+
--merge-effort LEVEL Merging effort level: fast (8), balanced (10),
|
|
1360
|
+
thorough (12), or numeric 6-14. Higher values allow
|
|
1361
|
+
larger batch sizes for exhaustive subset search.
|
|
1362
|
+
Default: balanced
|
|
1306
1363
|
|
|
1307
1364
|
Selection:
|
|
1308
1365
|
--select-max-groups SELECT_MAX_GROUPS, --max-groups SELECT_MAX_GROUPS
|
|
@@ -1314,6 +1371,16 @@ Selection:
|
|
|
1314
1371
|
--select-strategy {size,diversity}, --variant-selection {size,diversity}
|
|
1315
1372
|
Variant selection strategy: size or diversity
|
|
1316
1373
|
(default: size)
|
|
1374
|
+
--select-min-size-ratio SELECT_MIN_SIZE_RATIO
|
|
1375
|
+
Minimum size ratio (variant/largest) to include in
|
|
1376
|
+
output (default: 0 = disabled, e.g. 0.2 for 20%
|
|
1377
|
+
cutoff)
|
|
1378
|
+
--enable-full-consensus
|
|
1379
|
+
Generate a full consensus per variant group
|
|
1380
|
+
representing all variation from pre-merge variants
|
|
1381
|
+
(gaps never win)
|
|
1382
|
+
--disable-full-consensus
|
|
1383
|
+
Override --enable-full-consensus or profile setting
|
|
1317
1384
|
|
|
1318
1385
|
Performance:
|
|
1319
1386
|
--scale-threshold SCALE_THRESHOLD
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
speconsense/__init__.py,sha256=
|
|
1
|
+
speconsense/__init__.py,sha256=uLSZG2n0xobwuNT2PwZbytUg1DcyOr2aJlsbc52iKs0,537
|
|
2
2
|
speconsense/cli.py,sha256=Kqb2da0IuazocAz72iqTnw71jI7UaQgxsHfb9CwiolU,85
|
|
3
3
|
speconsense/msa.py,sha256=t1uDb-Tj5tDnB17QnNZPslpAiLXgAMIlnmMKBbwBKzs,31661
|
|
4
4
|
speconsense/quality_report.py,sha256=Byrc115T03ybi7mpA0Bw8-gc83nhKPzDY0tyH1IIAMQ,19803
|
|
@@ -6,11 +6,12 @@ speconsense/synth.py,sha256=7kbifR9XZDcsB0wxo2PCHD8vLGEkVMTH3SQ724hTFGw,9892
|
|
|
6
6
|
speconsense/types.py,sha256=_16nMMbfALEW212LDwTCan9u-gjvnS1ZQKpMK3y3zCE,1669
|
|
7
7
|
speconsense/core/__init__.py,sha256=3AWfnmw1FTzzf-BRdGo1vRHjVJq7d-Wugsw50GJQY_0,694
|
|
8
8
|
speconsense/core/__main__.py,sha256=dCfyQkVxxwlP6QqcWw9y5zp5iLzkG-fQsLmFHHEUlbI,112
|
|
9
|
-
speconsense/core/cli.py,sha256=
|
|
9
|
+
speconsense/core/cli.py,sha256=iepQMK0ZUhZvQShVZY_6WaHneR8ZIRKZ_b6NvVwaRwU,17186
|
|
10
10
|
speconsense/core/clusterer.py,sha256=UFK5Ec0oMQ7l3GsFJOAhTFk7r90eOOdOBXRskm79Fwk,72093
|
|
11
11
|
speconsense/core/workers.py,sha256=6pUyt-W9KxkillJ6TU1RjRh-_L-zRIwWqzIcBSeiOSc,25811
|
|
12
|
-
speconsense/profiles/__init__.py,sha256=
|
|
13
|
-
speconsense/profiles/
|
|
12
|
+
speconsense/profiles/__init__.py,sha256=5UWj6VyUIXTzQ1kBZ4mJ2olZ_ADMK85rwr7KEmRfZfk,16382
|
|
13
|
+
speconsense/profiles/compressed.yaml,sha256=LKtBm6nj8cpF2xeFcA7vzzNzaXdEo0JknnmcDDmdFj8,1227
|
|
14
|
+
speconsense/profiles/example.yaml,sha256=UGHoVvFiB6iQ-lUU4rwInL6oE1eAd7Fo5qp14vfXJvA,4546
|
|
14
15
|
speconsense/profiles/herbarium.yaml,sha256=1OyAPvBZmJ0eWHejfTU_NLd1_08F9n5WbeE686mzYGE,1125
|
|
15
16
|
speconsense/profiles/largedata.yaml,sha256=7qwl5CHA7BiFcznycUoprOX_A-qrsZzV5fBLnA3QmcE,884
|
|
16
17
|
speconsense/profiles/nostalgia.yaml,sha256=Hy20M88FiCmDvscyIKbwfNSusiHptmBm4pIWPiSFmp0,661
|
|
@@ -22,15 +23,15 @@ speconsense/scalability/vsearch.py,sha256=I1IzTeRzEFn9bi8mNbBRvtcHvUBzBFdE7D5yf-
|
|
|
22
23
|
speconsense/summarize/__init__.py,sha256=PE6W9hytDxhkw7W6Fz8X3jd92N2VdhuxiQ72Nqm1xC0,3181
|
|
23
24
|
speconsense/summarize/__main__.py,sha256=_hzLNqNtv4PirL1oMic37GW2QmjWquoznzNtld_3FiQ,117
|
|
24
25
|
speconsense/summarize/analysis.py,sha256=1MXtKMpX1bgKEtI-JN6BwTQj99qyt1eQLqNg51EgPiE,31560
|
|
25
|
-
speconsense/summarize/cli.py,sha256=
|
|
26
|
+
speconsense/summarize/cli.py,sha256=uSeY7__KpdQVXqJcQ0Zpn6ePeyJDVGdml7rZgHFr3W8,27124
|
|
26
27
|
speconsense/summarize/clustering.py,sha256=kk-FdFCea8KRocowN_4dt_aoqZNVJMmEu7CVKPfYgK8,28346
|
|
27
|
-
speconsense/summarize/fields.py,sha256=
|
|
28
|
-
speconsense/summarize/io.py,sha256=
|
|
28
|
+
speconsense/summarize/fields.py,sha256=a6aK9hkPJ-sDRRSqM_7IkyqCki99KSMnsQMV-U7r2zY,8687
|
|
29
|
+
speconsense/summarize/io.py,sha256=FdHLbcj0NOL3WE1e5OL85DRdJaHpyXPMcmlNg9mG3tM,32732
|
|
29
30
|
speconsense/summarize/iupac.py,sha256=Y6KqELmnGy4Eya4C_4ldXY8uek0ReuSUgITLI3NW0-A,11042
|
|
30
|
-
speconsense/summarize/merging.py,sha256=
|
|
31
|
-
speconsense-0.7.
|
|
32
|
-
speconsense-0.7.
|
|
33
|
-
speconsense-0.7.
|
|
34
|
-
speconsense-0.7.
|
|
35
|
-
speconsense-0.7.
|
|
36
|
-
speconsense-0.7.
|
|
31
|
+
speconsense/summarize/merging.py,sha256=FakBey3qpu7ULPIsc2GDo9WG8jNU1L6q2pgQ2HrOKXk,28454
|
|
32
|
+
speconsense-0.7.4.dist-info/licenses/LICENSE,sha256=T_VYPNbu9NSWjdQunfk4jqUGND_kYWe_An18s6N492o,1498
|
|
33
|
+
speconsense-0.7.4.dist-info/METADATA,sha256=2vFyM5rqFEwMPIcsSAH32Dwh_bDA-bOk15D7El6MO7Y,79957
|
|
34
|
+
speconsense-0.7.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
35
|
+
speconsense-0.7.4.dist-info/entry_points.txt,sha256=C0zFp5EYA8_KCb04uOyb4JNkxNH7bli1eU-XYrSX3BU,147
|
|
36
|
+
speconsense-0.7.4.dist-info/top_level.txt,sha256=nYUJOHrqeX-OOxOYQKvpp7Iv8-Bed18wN1DBwWfJKnQ,12
|
|
37
|
+
speconsense-0.7.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|