speconsense 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,514 @@
1
+ """Profile system for speconsense parameter presets.
2
+
3
+ Profiles allow users to save and reuse parameter configurations for different
4
+ workflows (e.g., herbarium specimens vs. fresh specimens).
5
+
6
+ Profile resolution order:
7
+ 1. User profiles in ~/.config/speconsense/profiles/
8
+ 2. Bundled profiles in package
9
+
10
+ Override order: defaults -> profile -> explicit CLI arguments
11
+ """
12
+
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Dict, Any, Optional, List, Set
16
+ import logging
17
+ import os
18
+ import re
19
+ import sys
20
+
21
+ try:
22
+ import yaml
23
+ except ImportError:
24
+ yaml = None # type: ignore
25
+
26
+ try:
27
+ from speconsense import __version__
28
+ except ImportError:
29
+ __version__ = "dev"
30
+
31
+ # Use importlib.resources for Python 3.9+, fall back to pkg_resources
32
+ HAS_IMPORTLIB_RESOURCES = False
33
+ HAS_PKG_RESOURCES = False
34
+
35
+ try:
36
+ from importlib.resources import files as importlib_files
37
+ from importlib.resources import as_file
38
+ HAS_IMPORTLIB_RESOURCES = True
39
+ except ImportError:
40
+ try:
41
+ import pkg_resources
42
+ HAS_PKG_RESOURCES = True
43
+ except ImportError:
44
+ pass
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ # XDG-compliant config path
49
+ XDG_CONFIG_HOME = Path.home() / ".config"
50
+ PROFILES_DIR = XDG_CONFIG_HOME / "speconsense" / "profiles"
51
+
52
+ # Valid keys for each tool (for strict validation)
53
+ VALID_SPECONSENSE_KEYS = {
54
+ # Clustering algorithm
55
+ "algorithm",
56
+ "min-identity",
57
+ "inflation",
58
+ "k-nearest-neighbors",
59
+ # Cluster filtering
60
+ "min-size",
61
+ "min-cluster-ratio",
62
+ "outlier-identity",
63
+ # Sampling
64
+ "max-sample-size",
65
+ "presample",
66
+ # Variant calling
67
+ "min-variant-frequency",
68
+ "min-variant-count",
69
+ "disable-position-phasing",
70
+ # Ambiguity calling
71
+ "min-ambiguity-frequency",
72
+ "min-ambiguity-count",
73
+ "disable-ambiguity-calling",
74
+ # Merging
75
+ "disable-cluster-merging",
76
+ "disable-homopolymer-equivalence",
77
+ # Orientation
78
+ "orient-mode",
79
+ # Processing
80
+ "scale-threshold",
81
+ "threads",
82
+ "enable-early-filter",
83
+ "collect-discards",
84
+ }
85
+
86
+ VALID_SUMMARIZE_KEYS = {
87
+ # Filtering
88
+ "min-ric",
89
+ "min-len",
90
+ "max-len",
91
+ # Grouping
92
+ "group-identity",
93
+ # Merging
94
+ "disable-merging",
95
+ "merge-effort",
96
+ "merge-snp",
97
+ "merge-indel-length",
98
+ "merge-position-count",
99
+ "merge-min-size-ratio",
100
+ "min-merge-overlap",
101
+ "disable-homopolymer-equivalence",
102
+ # Selection
103
+ "select-max-groups",
104
+ "select-max-variants",
105
+ "select-strategy",
106
+ # Processing
107
+ "scale-threshold",
108
+ "threads",
109
+ }
110
+
111
+
112
+ class ProfileError(Exception):
113
+ """Error loading or applying a profile."""
114
+ pass
115
+
116
+
117
+ class ProfileVersionError(ProfileError):
118
+ """Profile version is incompatible with current speconsense version."""
119
+ pass
120
+
121
+
122
+ class ProfileValidationError(ProfileError):
123
+ """Profile contains invalid keys."""
124
+ pass
125
+
126
+
127
+ @dataclass
128
+ class Profile:
129
+ """A parameter profile for speconsense tools."""
130
+ name: str
131
+ version: str # e.g., "0.7.*"
132
+ description: str
133
+ speconsense: Dict[str, Any] = field(default_factory=dict)
134
+ speconsense_summarize: Dict[str, Any] = field(default_factory=dict)
135
+
136
+ @classmethod
137
+ def load(cls, name: str, check_version: bool = True) -> 'Profile':
138
+ """Load profile by name from user dir or bundled.
139
+
140
+ Args:
141
+ name: Profile name (without .yaml extension)
142
+ check_version: If True, validate version compatibility
143
+
144
+ Returns:
145
+ Loaded Profile instance
146
+
147
+ Raises:
148
+ ProfileError: If profile not found
149
+ ProfileVersionError: If version incompatible
150
+ ProfileValidationError: If profile contains invalid keys
151
+ """
152
+ if yaml is None:
153
+ raise ProfileError(
154
+ "PyYAML is required for profile support. "
155
+ "Install with: pip install pyyaml"
156
+ )
157
+
158
+ # Initialize user profiles directory with example on first use
159
+ ensure_user_profiles_dir()
160
+
161
+ # Try user profile first
162
+ user_path = PROFILES_DIR / f"{name}.yaml"
163
+ if user_path.exists():
164
+ return cls._load_from_path(user_path, name, check_version)
165
+
166
+ # Fall back to bundled profile
167
+ bundled_path = get_bundled_profile_path(name)
168
+ if bundled_path is not None:
169
+ return cls._load_from_path(bundled_path, name, check_version)
170
+
171
+ # Profile not found - provide helpful error
172
+ available = list_profiles()
173
+ if available:
174
+ raise ProfileError(
175
+ f"Profile '{name}' not found. Available profiles: {', '.join(available)}"
176
+ )
177
+ else:
178
+ raise ProfileError(
179
+ f"Profile '{name}' not found and no profiles are available. "
180
+ f"Check that profiles are installed in {PROFILES_DIR}"
181
+ )
182
+
183
+ @classmethod
184
+ def _load_from_path(cls, path: Path, name: str, check_version: bool) -> 'Profile':
185
+ """Load profile from a specific path."""
186
+ try:
187
+ with open(path, 'r') as f:
188
+ data = yaml.safe_load(f)
189
+ except yaml.YAMLError as e:
190
+ raise ProfileError(f"Invalid YAML in profile '{name}': {e}")
191
+ except IOError as e:
192
+ raise ProfileError(f"Cannot read profile '{name}': {e}")
193
+
194
+ if not isinstance(data, dict):
195
+ raise ProfileError(f"Profile '{name}' must be a YAML mapping")
196
+
197
+ # Extract fields
198
+ version = data.get('speconsense-version', '*')
199
+ description = data.get('description', '')
200
+ speconsense = data.get('speconsense', {}) or {}
201
+ speconsense_summarize = data.get('speconsense-summarize', {}) or {}
202
+
203
+ # Validate version compatibility
204
+ if check_version and not check_version_compatible(version, __version__):
205
+ raise ProfileVersionError(
206
+ f"Profile '{name}' requires speconsense version {version}, "
207
+ f"but you have {__version__}.\n\n"
208
+ f"This profile may use parameters that have changed or been removed.\n"
209
+ f"Please update the profile for your version, or copy the bundled\n"
210
+ f"'{name}' profile which is compatible with your version:\n\n"
211
+ f" cp {PROFILES_DIR}/{name}.yaml {PROFILES_DIR}/{name}.yaml.bak\n"
212
+ f" speconsense --list-profiles # Will show available profiles"
213
+ )
214
+
215
+ # Validate keys (strict validation)
216
+ _validate_profile_keys(name, speconsense, speconsense_summarize)
217
+
218
+ return cls(
219
+ name=name,
220
+ version=version,
221
+ description=description,
222
+ speconsense=speconsense,
223
+ speconsense_summarize=speconsense_summarize,
224
+ )
225
+
226
+
227
+ def check_version_compatible(profile_version: str, current_version: str) -> bool:
228
+ """Check if profile version pattern matches current version.
229
+
230
+ Supports wildcards:
231
+ - "0.7.*" matches "0.7.0", "0.7.1", etc.
232
+ - "0.7.0" matches only "0.7.0"
233
+ - "0.*" matches any 0.x release
234
+ - "*" matches any version
235
+
236
+ Args:
237
+ profile_version: Version pattern from profile (e.g., "0.7.*")
238
+ current_version: Current speconsense version (e.g., "0.7.2")
239
+
240
+ Returns:
241
+ True if versions are compatible
242
+ """
243
+ if profile_version == '*':
244
+ return True
245
+
246
+ # Convert wildcard pattern to regex
247
+ # Escape dots and convert * to .*
248
+ pattern = profile_version.replace('.', r'\.').replace('*', r'.*')
249
+ pattern = f'^{pattern}$'
250
+
251
+ try:
252
+ return bool(re.match(pattern, current_version))
253
+ except re.error:
254
+ # Invalid regex, treat as literal match
255
+ return profile_version == current_version
256
+
257
+
258
+ def _validate_profile_keys(
259
+ name: str,
260
+ speconsense: Dict[str, Any],
261
+ speconsense_summarize: Dict[str, Any]
262
+ ) -> None:
263
+ """Validate that profile only contains known keys.
264
+
265
+ Raises ProfileValidationError for unknown keys.
266
+ """
267
+ errors = []
268
+
269
+ unknown_core = set(speconsense.keys()) - VALID_SPECONSENSE_KEYS
270
+ if unknown_core:
271
+ errors.append(f" speconsense: {', '.join(sorted(unknown_core))}")
272
+
273
+ unknown_summarize = set(speconsense_summarize.keys()) - VALID_SUMMARIZE_KEYS
274
+ if unknown_summarize:
275
+ errors.append(f" speconsense-summarize: {', '.join(sorted(unknown_summarize))}")
276
+
277
+ if errors:
278
+ raise ProfileValidationError(
279
+ f"Profile '{name}' contains unknown keys:\n" + '\n'.join(errors) + "\n\n"
280
+ f"This may indicate a typo or an option that has been removed.\n"
281
+ f"Please check the profile and fix or remove the invalid keys."
282
+ )
283
+
284
+
285
+ def get_bundled_profile_path(name: str) -> Optional[Path]:
286
+ """Get path to bundled profile using importlib.resources.
287
+
288
+ Args:
289
+ name: Profile name (without .yaml extension)
290
+
291
+ Returns:
292
+ Path to bundled profile file, or None if not found
293
+ """
294
+ if HAS_IMPORTLIB_RESOURCES:
295
+ try:
296
+ # Python 3.9+ style
297
+ profiles_pkg = importlib_files('speconsense.profiles')
298
+ profile_file = profiles_pkg.joinpath(f'{name}.yaml')
299
+ # Check if file exists using as_file context manager
300
+ with as_file(profile_file) as path:
301
+ if path.exists():
302
+ return path
303
+ except (TypeError, FileNotFoundError, ModuleNotFoundError):
304
+ pass
305
+
306
+ if HAS_PKG_RESOURCES:
307
+ try:
308
+ # Fall back to pkg_resources for Python 3.8
309
+ resource_path = pkg_resources.resource_filename(
310
+ 'speconsense.profiles', f'{name}.yaml'
311
+ )
312
+ path = Path(resource_path)
313
+ if path.exists():
314
+ return path
315
+ except (FileNotFoundError, ModuleNotFoundError):
316
+ pass
317
+
318
+ # Last resort: check relative to this file (profiles/__init__.py)
319
+ bundled_dir = Path(__file__).parent
320
+ bundled_path = bundled_dir / f'{name}.yaml'
321
+ if bundled_path.exists():
322
+ return bundled_path
323
+
324
+ return None
325
+
326
+
327
+ def list_bundled_profiles() -> List[str]:
328
+ """List names of bundled profiles."""
329
+ profiles = []
330
+
331
+ # Try importlib.resources first
332
+ if HAS_IMPORTLIB_RESOURCES:
333
+ try:
334
+ profiles_pkg = importlib_files('speconsense.profiles')
335
+ for item in profiles_pkg.iterdir():
336
+ if str(item).endswith('.yaml'):
337
+ name = Path(str(item)).stem
338
+ profiles.append(name)
339
+ if profiles:
340
+ return sorted(profiles)
341
+ except (TypeError, FileNotFoundError, ModuleNotFoundError):
342
+ pass
343
+
344
+ # Fall back to checking directory relative to this file
345
+ bundled_dir = Path(__file__).parent
346
+ if bundled_dir.exists():
347
+ for yaml_file in bundled_dir.glob('*.yaml'):
348
+ profiles.append(yaml_file.stem)
349
+
350
+ return sorted(profiles)
351
+
352
+
353
+ def list_profiles() -> List[str]:
354
+ """List available profiles (user + bundled).
355
+
356
+ Returns list of profile names (without .yaml extension).
357
+ User profiles take precedence over bundled profiles with same name.
358
+ """
359
+ profiles: Set[str] = set()
360
+
361
+ # User profiles
362
+ if PROFILES_DIR.exists():
363
+ for yaml_file in PROFILES_DIR.glob('*.yaml'):
364
+ profiles.add(yaml_file.stem)
365
+
366
+ # Bundled profiles
367
+ profiles.update(list_bundled_profiles())
368
+
369
+ return sorted(profiles)
370
+
371
+
372
+ def ensure_user_profiles_dir() -> Path:
373
+ """Ensure user profiles directory exists with example profile.
374
+
375
+ On first use, creates the directory and copies an example profile
376
+ to help users create their own profiles.
377
+
378
+ This function is safe to call from parallel processes - it uses
379
+ atomic file operations to avoid race conditions.
380
+
381
+ Returns:
382
+ Path to user profiles directory
383
+ """
384
+ import tempfile
385
+
386
+ PROFILES_DIR.mkdir(parents=True, exist_ok=True)
387
+
388
+ example_path = PROFILES_DIR / "example.yaml"
389
+
390
+ # Skip if example already exists (common case, avoid extra work)
391
+ if example_path.exists():
392
+ return PROFILES_DIR
393
+
394
+ # Skip if user already has other profiles (they don't need the example)
395
+ if any(PROFILES_DIR.glob('*.yaml')):
396
+ return PROFILES_DIR
397
+
398
+ # Copy example profile atomically (safe for parallel invocations)
399
+ bundled_example = get_bundled_profile_path('example')
400
+ if bundled_example is not None:
401
+ try:
402
+ # Write to temp file in same directory, then atomic rename
403
+ fd, temp_path = tempfile.mkstemp(
404
+ dir=PROFILES_DIR,
405
+ prefix='.example.',
406
+ suffix='.yaml.tmp'
407
+ )
408
+ try:
409
+ with open(bundled_example, 'rb') as src:
410
+ os.write(fd, src.read())
411
+ finally:
412
+ os.close(fd)
413
+
414
+ # Atomic rename - if file exists, this either succeeds or fails cleanly
415
+ # On POSIX: atomic, last writer wins (all have same content, so fine)
416
+ # On Windows: may raise if file exists, which we catch
417
+ try:
418
+ os.rename(temp_path, example_path)
419
+ logger.info(f"Created example profile at {example_path}")
420
+ except OSError:
421
+ # Another process won the race - that's fine
422
+ if os.path.exists(temp_path):
423
+ os.unlink(temp_path)
424
+ except Exception as e:
425
+ # Non-fatal - profile system works without example in user dir
426
+ logger.debug(f"Could not create example profile: {e}")
427
+
428
+ return PROFILES_DIR
429
+
430
+
431
+ def apply_profile_to_args(
432
+ args,
433
+ profile: Profile,
434
+ tool: str,
435
+ explicit_args: Set[str]
436
+ ) -> None:
437
+ """Apply profile values to args, respecting explicit CLI overrides.
438
+
439
+ Args:
440
+ args: argparse Namespace to modify
441
+ profile: Profile to apply
442
+ tool: Either 'speconsense' or 'speconsense-summarize'
443
+ explicit_args: Set of argument names that were explicitly provided on CLI
444
+ """
445
+ if tool == 'speconsense':
446
+ profile_values = profile.speconsense
447
+ elif tool == 'speconsense-summarize':
448
+ profile_values = profile.speconsense_summarize
449
+ else:
450
+ raise ValueError(f"Unknown tool: {tool}")
451
+
452
+ for key, value in profile_values.items():
453
+ # Convert YAML key (with dashes) to argparse attribute name (with underscores)
454
+ attr_name = key.replace('-', '_')
455
+
456
+ # Only apply if not explicitly set on command line
457
+ if attr_name not in explicit_args:
458
+ if hasattr(args, attr_name):
459
+ logger.debug(f"Profile '{profile.name}': setting {attr_name}={value}")
460
+ setattr(args, attr_name, value)
461
+ else:
462
+ # This shouldn't happen if validation passed, but log it
463
+ logger.warning(
464
+ f"Profile '{profile.name}': unknown attribute '{attr_name}'"
465
+ )
466
+
467
+
468
+ def print_profiles_list(tool: str = 'speconsense') -> None:
469
+ """Print available profiles to stdout.
470
+
471
+ Args:
472
+ tool: Either 'speconsense' or 'speconsense-summarize'
473
+ """
474
+ if yaml is None:
475
+ print("Profile support requires PyYAML. Install with: pip install pyyaml")
476
+ return
477
+
478
+ # Initialize user profiles directory with example on first use
479
+ ensure_user_profiles_dir()
480
+
481
+ profiles = list_profiles()
482
+
483
+ if not profiles:
484
+ print(f"No profiles found.")
485
+ print(f"\nProfiles are stored in: {PROFILES_DIR}")
486
+ return
487
+
488
+ print(f"Available profiles:\n")
489
+
490
+ for name in profiles:
491
+ try:
492
+ # Load without version check to show all profiles
493
+ profile = Profile.load(name, check_version=False)
494
+
495
+ # Check if it's a user profile or bundled
496
+ user_path = PROFILES_DIR / f"{name}.yaml"
497
+ source = "user" if user_path.exists() else "bundled"
498
+
499
+ # Check version compatibility
500
+ compatible = check_version_compatible(profile.version, __version__)
501
+ compat_str = "" if compatible else " [INCOMPATIBLE]"
502
+
503
+ print(f" {name} ({source}){compat_str}")
504
+ if profile.description:
505
+ print(f" {profile.description}")
506
+ print(f" Version: {profile.version}")
507
+ print()
508
+
509
+ except ProfileError as e:
510
+ print(f" {name} [ERROR: {e}]")
511
+ print()
512
+
513
+ print(f"Usage: {tool} -p <profile> [other options]")
514
+ print(f"Profile directory: {PROFILES_DIR}")
@@ -0,0 +1,97 @@
1
+ # Example Speconsense Profile
2
+ #
3
+ # Copy this file to create your own profiles:
4
+ # cp example.yaml my-workflow.yaml
5
+ #
6
+ # Then use it with:
7
+ # speconsense input.fastq -p my-workflow
8
+ # speconsense-summarize -p my-workflow
9
+ #
10
+ # HOW TO USE THIS FILE:
11
+ # - Lines starting with # are comments (ignored)
12
+ # - To enable an option, remove the # at the start of the line
13
+ # - Only include parameters you want to change from defaults
14
+ # - CLI arguments always override profile values
15
+ #
16
+ # Example: To set min-size to 10, change:
17
+ # # min-size: 5
18
+ # to:
19
+ # min-size: 10
20
+
21
+ speconsense-version: "0.7.*"
22
+ description: "My custom workflow"
23
+
24
+ # =============================================================================
25
+ # Parameters for speconsense (clustering and consensus)
26
+ # =============================================================================
27
+ speconsense:
28
+
29
+ # --- Clustering Algorithm ---
30
+ # algorithm: graph # Clustering algorithm: graph (MCL) or greedy
31
+ # min-identity: 0.9 # Similarity threshold for clustering (0.0-1.0)
32
+ # inflation: 2.0 # MCL inflation parameter (higher = more clusters)
33
+ # k-nearest-neighbors: 10 # K-NN graph construction parameter
34
+
35
+ # --- Cluster Filtering ---
36
+ # min-size: 5 # Minimum reads per cluster (0 to disable)
37
+ # min-cluster-ratio: 0.01 # Minimum cluster size as ratio of total reads
38
+ # outlier-identity: 0.85 # Identity threshold for outlier detection
39
+
40
+ # --- Read Sampling ---
41
+ # presample: 1000 # Initial random sampling of input reads
42
+ # max-sample-size: 100 # Max reads used for consensus generation
43
+
44
+ # --- Variant Calling (position-based phasing) ---
45
+ # min-variant-frequency: 0.10 # Min frequency to call a variant (0.0-1.0)
46
+ # min-variant-count: 3 # Min read count to call a variant
47
+ # disable-position-phasing: false # Set true to disable variant detection
48
+
49
+ # --- Ambiguity Calling (IUPAC codes) ---
50
+ # min-ambiguity-frequency: 0.25 # Min frequency for IUPAC ambiguity codes
51
+ # min-ambiguity-count: 3 # Min read count for ambiguity codes
52
+ # disable-ambiguity-calling: false # Set true to disable IUPAC codes
53
+
54
+ # --- Cluster Merging ---
55
+ # disable-cluster-merging: false # Set true to skip merging similar clusters
56
+ # disable-homopolymer-equivalence: false # Set true for strict homopolymer comparison
57
+
58
+ # --- Primer Orientation ---
59
+ # orient-mode: filter-failed # skip, keep-all, or filter-failed
60
+
61
+ # --- Processing ---
62
+ # threads: 0 # Max threads (0 = auto-detect)
63
+ # scale-threshold: 500 # Read count threshold for scaled processing
64
+ # enable-early-filter: false # Enable early filtering optimization
65
+ # collect-discards: false # Write discarded reads to separate file
66
+
67
+ # =============================================================================
68
+ # Parameters for speconsense-summarize (post-processing)
69
+ # =============================================================================
70
+ speconsense-summarize:
71
+
72
+ # --- Input Filtering ---
73
+ # min-ric: 3 # Minimum Reads in Consensus threshold
74
+ # min-len: 0 # Minimum sequence length (0 = disabled)
75
+ # max-len: 0 # Maximum sequence length (0 = disabled)
76
+
77
+ # --- Variant Grouping (HAC clustering) ---
78
+ # group-identity: 0.95 # Identity threshold for grouping variants
79
+
80
+ # --- MSA-based Merging ---
81
+ # disable-merging: false # Set true to skip all merging (fastest)
82
+ # merge-effort: balanced # Effort level: fast, balanced, thorough, or 6-14
83
+ # merge-snp: true # Enable SNP-based merging
84
+ # merge-indel-length: 3 # Max indel length to merge (0 = disabled)
85
+ # merge-position-count: 1 # Max variant positions to merge
86
+ # merge-min-size-ratio: 0.1 # Min size ratio for merge candidate
87
+ # min-merge-overlap: 0.9 # Min alignment overlap for merging
88
+ # disable-homopolymer-equivalence: false # Set true for strict comparison
89
+
90
+ # --- Variant Selection ---
91
+ # select-max-groups: -1 # Max groups to output (-1 = no limit)
92
+ # select-max-variants: -1 # Max variants per group (-1 = no limit)
93
+ # select-strategy: size # Selection strategy: size or diversity
94
+
95
+ # --- Processing ---
96
+ # threads: 0 # Max threads (0 = auto-detect)
97
+ # scale-threshold: 500 # Read count threshold for scaled processing
@@ -0,0 +1,25 @@
1
+ # High-recall settings for herbarium specimens with degraded DNA
2
+ #
3
+ # Use this profile when working with:
4
+ # - Herbarium specimens where DNA may be degraded
5
+ # - Type specimens that cannot be resampled
6
+ # - Samples where contamination may dominate the target
7
+ # - Any situation where false negatives are more costly than false positives
8
+ #
9
+ # The settings prioritize keeping all potential biological signal for
10
+ # manual review, at the cost of more output variants.
11
+
12
+ speconsense-version: "0.7.*"
13
+ description: "High-recall settings for herbarium specimens with degraded DNA"
14
+
15
+ speconsense:
16
+ min-identity: 0.85 # Lower threshold for degraded DNA
17
+ # outlier-identity: auto # Auto-calculated as 0.925 from min-identity
18
+ min-cluster-ratio: 0 # Keep all clusters
19
+ min-size: 3 # Lower minimum for degraded samples
20
+ min-variant-frequency: 0.05 # More sensitive variant detection (5%)
21
+ presample: 0 # Use all reads
22
+ max-sample-size: 100 # 100 reads for high-quality consensus
23
+
24
+ speconsense-summarize:
25
+ min-ric: 3 # Accept lower-support consensus
@@ -0,0 +1,19 @@
1
+ # Speconsense profile for a single large input file
2
+
3
+ speconsense-version: "0.7.*"
4
+ description: "Large Input (experimental)"
5
+
6
+ # =============================================================================
7
+ # Parameters for speconsense (clustering and consensus)
8
+ # =============================================================================
9
+ speconsense:
10
+ min-cluster-ratio: 0 # Do not filter by relative size
11
+ presample: 0 # Process all reads
12
+ threads: 0 # Max threads (0 = auto-detect)
13
+
14
+ # =============================================================================
15
+ # Parameters for speconsense-summarize (post-processing)
16
+ # =============================================================================
17
+ speconsense-summarize:
18
+ merge-effort: fast
19
+ group-identity: 0.95 # Avoid spurious groupings with many sequences
@@ -0,0 +1,22 @@
1
+ # Speconsense Profile to simulate results of older bioinformatics pipelines.
2
+ # Included for comparison only - not recommended for production usage
3
+
4
+ speconsense-version: "0.7.*"
5
+ description: "Simulate older bioinformatics"
6
+
7
+ # Parameters for speconsense (clustering and consensus)
8
+ speconsense:
9
+ algorithm: greedy
10
+ min-identity: 0.85
11
+ outlier-identity: 0.85
12
+ disable-position-phasing: True
13
+ disable-ambiguity-calling: True
14
+ min-cluster-ratio: 0.2
15
+ min-size: 5
16
+ max-sample-size: 500
17
+ presample: 500
18
+
19
+ # Parameters for speconsense-summarize (post-processing)
20
+ speconsense-summarize:
21
+ min-ric: 5
22
+ disable-merging: true # Skip merging entirely