varcode 2.3.0__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {varcode-2.3.0/varcode.egg-info → varcode-2.4.0}/PKG-INFO +17 -2
  2. {varcode-2.3.0 → varcode-2.4.0}/README.md +16 -1
  3. varcode-2.4.0/tests/test_splice_outcomes.py +605 -0
  4. {varcode-2.3.0 → varcode-2.4.0}/varcode/__init__.py +12 -0
  5. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/__init__.py +2 -0
  6. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_classes.py +26 -0
  7. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_collection.py +6 -3
  8. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_ordering.py +14 -1
  9. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction.py +16 -2
  10. varcode-2.4.0/varcode/splice_outcomes.py +722 -0
  11. {varcode-2.3.0 → varcode-2.4.0}/varcode/variant.py +21 -2
  12. {varcode-2.3.0 → varcode-2.4.0}/varcode/variant_collection.py +10 -2
  13. varcode-2.4.0/varcode/version.py +1 -0
  14. {varcode-2.3.0 → varcode-2.4.0/varcode.egg-info}/PKG-INFO +17 -2
  15. {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/SOURCES.txt +2 -0
  16. varcode-2.3.0/varcode/version.py +0 -1
  17. {varcode-2.3.0 → varcode-2.4.0}/LICENSE +0 -0
  18. {varcode-2.3.0 → varcode-2.4.0}/MANIFEST.in +0 -0
  19. {varcode-2.3.0 → varcode-2.4.0}/pyproject.toml +0 -0
  20. {varcode-2.3.0 → varcode-2.4.0}/requirements.txt +0 -0
  21. {varcode-2.3.0 → varcode-2.4.0}/setup.cfg +0 -0
  22. {varcode-2.3.0 → varcode-2.4.0}/tests/__init__.py +0 -0
  23. {varcode-2.3.0 → varcode-2.4.0}/tests/benchmark_vcf_load.py +0 -0
  24. {varcode-2.3.0 → varcode-2.4.0}/tests/common.py +0 -0
  25. {varcode-2.3.0 → varcode-2.4.0}/tests/data.py +0 -0
  26. {varcode-2.3.0 → varcode-2.4.0}/tests/test_cli_effects.py +0 -0
  27. {varcode-2.3.0 → varcode-2.4.0}/tests/test_cli_genes.py +0 -0
  28. {varcode-2.3.0 → varcode-2.4.0}/tests/test_collection_filtering.py +0 -0
  29. {varcode-2.3.0 → varcode-2.4.0}/tests/test_collection_variants_attr_consistency.py +0 -0
  30. {varcode-2.3.0 → varcode-2.4.0}/tests/test_common.py +0 -0
  31. {varcode-2.3.0 → varcode-2.4.0}/tests/test_cosmic_mutations.py +0 -0
  32. {varcode-2.3.0 → varcode-2.4.0}/tests/test_csv_roundtrip.py +0 -0
  33. {varcode-2.3.0 → varcode-2.4.0}/tests/test_dbnsfp_validation.py +0 -0
  34. {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_annotation_errors.py +0 -0
  35. {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_classes.py +0 -0
  36. {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_collection.py +0 -0
  37. {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_collection_serialization.py +0 -0
  38. {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_collection_sort_order.py +0 -0
  39. {varcode-2.3.0 → varcode-2.4.0}/tests/test_effects_from_mutagenix_variants.py +0 -0
  40. {varcode-2.3.0 → varcode-2.4.0}/tests/test_exonic_splice_site.py +0 -0
  41. {varcode-2.3.0 → varcode-2.4.0}/tests/test_frameshift_helpers.py +0 -0
  42. {varcode-2.3.0 → varcode-2.4.0}/tests/test_genotype.py +0 -0
  43. {varcode-2.3.0 → varcode-2.4.0}/tests/test_genotype_from_vcf.py +0 -0
  44. {varcode-2.3.0 → varcode-2.4.0}/tests/test_maf.py +0 -0
  45. {varcode-2.3.0 → varcode-2.4.0}/tests/test_mm10_klf6_frameshift.py +0 -0
  46. {varcode-2.3.0 → varcode-2.4.0}/tests/test_mouse.py +0 -0
  47. {varcode-2.3.0 → varcode-2.4.0}/tests/test_mutate.py +0 -0
  48. {varcode-2.3.0 → varcode-2.4.0}/tests/test_no_duplicate_variants.py +0 -0
  49. {varcode-2.3.0 → varcode-2.4.0}/tests/test_premature_stop_short_description.py +0 -0
  50. {varcode-2.3.0 → varcode-2.4.0}/tests/test_problematic_variants.py +0 -0
  51. {varcode-2.3.0 → varcode-2.4.0}/tests/test_reference.py +0 -0
  52. {varcode-2.3.0 → varcode-2.4.0}/tests/test_reference_mismatch_error.py +0 -0
  53. {varcode-2.3.0 → varcode-2.4.0}/tests/test_silent_aa_pos.py +0 -0
  54. {varcode-2.3.0 → varcode-2.4.0}/tests/test_silent_hgvs_description.py +0 -0
  55. {varcode-2.3.0 → varcode-2.4.0}/tests/test_splice_site_effects.py +0 -0
  56. {varcode-2.3.0 → varcode-2.4.0}/tests/test_stop_codon_classification_bugs.py +0 -0
  57. {varcode-2.3.0 → varcode-2.4.0}/tests/test_string_helpers.py +0 -0
  58. {varcode-2.3.0 → varcode-2.4.0}/tests/test_symbolic_alleles.py +0 -0
  59. {varcode-2.3.0 → varcode-2.4.0}/tests/test_timings.py +0 -0
  60. {varcode-2.3.0 → varcode-2.4.0}/tests/test_variant.py +0 -0
  61. {varcode-2.3.0 → varcode-2.4.0}/tests/test_variant_collection.py +0 -0
  62. {varcode-2.3.0 → varcode-2.4.0}/tests/test_vcf.py +0 -0
  63. {varcode-2.3.0 → varcode-2.4.0}/tests/test_vcf_output.py +0 -0
  64. {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/__init__.py +0 -0
  65. {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/effects_script.py +0 -0
  66. {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/genes_script.py +0 -0
  67. {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/logging.conf +0 -0
  68. {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/variant_args.py +0 -0
  69. {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/version_info.py +0 -0
  70. {varcode-2.3.0 → varcode-2.4.0}/varcode/common.py +0 -0
  71. {varcode-2.3.0 → varcode-2.4.0}/varcode/csv_helpers.py +0 -0
  72. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/common.py +0 -0
  73. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_helpers.py +0 -0
  74. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction_coding.py +0 -0
  75. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction_coding_frameshift.py +0 -0
  76. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction_coding_in_frame.py +0 -0
  77. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/mutate.py +0 -0
  78. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/transcript_helpers.py +0 -0
  79. {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/translate.py +0 -0
  80. {varcode-2.3.0 → varcode-2.4.0}/varcode/errors.py +0 -0
  81. {varcode-2.3.0 → varcode-2.4.0}/varcode/genotype.py +0 -0
  82. {varcode-2.3.0 → varcode-2.4.0}/varcode/maf.py +0 -0
  83. {varcode-2.3.0 → varcode-2.4.0}/varcode/nucleotides.py +0 -0
  84. {varcode-2.3.0 → varcode-2.4.0}/varcode/reference.py +0 -0
  85. {varcode-2.3.0 → varcode-2.4.0}/varcode/string_helpers.py +0 -0
  86. {varcode-2.3.0 → varcode-2.4.0}/varcode/ucsc_reference_names.py +0 -0
  87. {varcode-2.3.0 → varcode-2.4.0}/varcode/util.py +0 -0
  88. {varcode-2.3.0 → varcode-2.4.0}/varcode/vcf.py +0 -0
  89. {varcode-2.3.0 → varcode-2.4.0}/varcode/vcf_output.py +0 -0
  90. {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/dependency_links.txt +0 -0
  91. {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/entry_points.txt +0 -0
  92. {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/requires.txt +0 -0
  93. {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: varcode
3
- Version: 2.3.0
3
+ Version: 2.4.0
4
4
  Summary: Variant annotation in Python
5
5
  Author-email: Alex Rubinsteyn <alex.rubinsteyn@unc.edu>
6
6
  Project-URL: Homepage, https://github.com/openvax/varcode
@@ -106,7 +106,22 @@ print(premature_stop_effect.gene.name)
106
106
  ### 'TP53'
107
107
  ```
108
108
 
109
- If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode
109
+ If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode.
110
+
111
+ ## Further reading
112
+
113
+ Feature guides live in [`docs/`](./docs/):
114
+
115
+ - [**Genotypes and sample-aware queries**](./docs/genotype.md) — per-sample
116
+ zygosity on multi-sample VCFs (`Genotype`, `Zygosity`, `VariantCollection.for_sample`,
117
+ `.heterozygous_in`, `.homozygous_alt_in`). New in 2.3.
118
+ - [**CSV round-trip and metadata headers**](./docs/csv.md) — `to_csv` /
119
+ `from_csv` on both collection types, with `#`-prefixed provenance
120
+ headers. New in 2.1, refined in 2.2.
121
+ - [**Error handling**](./docs/errors.md) — `ReferenceMismatchError`,
122
+ `SampleNotFoundError`, and the `raise_on_error=False` escape hatch.
123
+
124
+ See [`CHANGELOG.md`](./CHANGELOG.md) for the release history.
110
125
 
111
126
  ## Effect Types
112
127
 
@@ -79,7 +79,22 @@ print(premature_stop_effect.gene.name)
79
79
  ### 'TP53'
80
80
  ```
81
81
 
82
- If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode
82
+ If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode.
83
+
84
+ ## Further reading
85
+
86
+ Feature guides live in [`docs/`](./docs/):
87
+
88
+ - [**Genotypes and sample-aware queries**](./docs/genotype.md) — per-sample
89
+ zygosity on multi-sample VCFs (`Genotype`, `Zygosity`, `VariantCollection.for_sample`,
90
+ `.heterozygous_in`, `.homozygous_alt_in`). New in 2.3.
91
+ - [**CSV round-trip and metadata headers**](./docs/csv.md) — `to_csv` /
92
+ `from_csv` on both collection types, with `#`-prefixed provenance
93
+ headers. New in 2.1, refined in 2.2.
94
+ - [**Error handling**](./docs/errors.md) — `ReferenceMismatchError`,
95
+ `SampleNotFoundError`, and the `raise_on_error=False` escape hatch.
96
+
97
+ See [`CHANGELOG.md`](./CHANGELOG.md) for the release history.
83
98
 
84
99
  ## Effect Types
85
100
 
@@ -0,0 +1,605 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """
14
+ Tests for the splice outcome possibility-set prototype
15
+ (openvax/varcode#262).
16
+
17
+ Coverage:
18
+ - Default behavior unchanged (back-compat)
19
+ - Opt-in wraps splice effects in SpliceOutcomeSet
20
+ - Each canonical splice signal class produces the expected outcome set
21
+ - Plausibility ordering is stable
22
+ - Per-outcome candidate construction (normal splicing, exon
23
+ skipping, intron retention stubs, cryptic splice stubs)
24
+ - SpliceOutcomeSet integrates with EffectCollection
25
+ - Multi-allelic and reverse-strand variants work too
26
+ """
27
+
28
+ import pytest
29
+ from pyensembl import cached_release
30
+
31
+ import varcode
32
+ from varcode import (
33
+ SpliceCandidate,
34
+ SpliceOutcome,
35
+ SpliceOutcomeSet,
36
+ Variant,
37
+ )
38
+ from varcode.effects import (
39
+ ExonicSpliceSite,
40
+ IntronicSpliceSite,
41
+ SpliceAcceptor,
42
+ SpliceDonor,
43
+ )
44
+ from varcode.splice_outcomes import enumerate_splice_outcomes
45
+
46
+
47
+ ensembl_grch38 = cached_release(81)
48
+ CFTR_TRANSCRIPT_ID = "ENST00000003084"
49
+ BRCA1_TRANSCRIPT_ID = "ENST00000357654"
50
+
51
+
52
+ # --------------------------------------------------------------------
53
+ # Back-compat
54
+ # --------------------------------------------------------------------
55
+
56
+
57
+ def test_default_behavior_unchanged():
58
+ # No kwarg -> same as today: SpliceDonor effect, no wrapping.
59
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
60
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
61
+ effect = variant.effect_on_transcript(transcript)
62
+ assert effect.__class__ is SpliceDonor
63
+ assert not isinstance(effect, SpliceOutcomeSet)
64
+
65
+
66
+ def test_default_for_collection_unchanged():
67
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
68
+ effects = variant.effects() # default: splice_outcomes=False
69
+ classes = {type(e) for e in effects}
70
+ assert SpliceOutcomeSet not in classes
71
+
72
+
73
+ # --------------------------------------------------------------------
74
+ # Opt-in wraps splice effects
75
+ # --------------------------------------------------------------------
76
+
77
+
78
+ def test_opt_in_wraps_splice_donor():
79
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
80
+ effects = variant.effects(splice_outcomes=True)
81
+ cftr_effect = next(
82
+ e for e in effects
83
+ if getattr(e, "transcript", None) is not None
84
+ and e.transcript.id == CFTR_TRANSCRIPT_ID
85
+ )
86
+ assert isinstance(cftr_effect, SpliceOutcomeSet)
87
+ assert cftr_effect.disrupted_signal_class is SpliceDonor
88
+
89
+
90
+ def test_opt_in_wraps_splice_acceptor():
91
+ # CFTR exon 4 acceptor -1 with canonical ref G.
92
+ variant = Variant("7", 117530898, "G", "A", ensembl_grch38)
93
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
94
+ effect = variant.effects(splice_outcomes=True)
95
+ target = next(e for e in effect if e.transcript is transcript)
96
+ assert isinstance(target, SpliceOutcomeSet)
97
+ assert target.disrupted_signal_class is SpliceAcceptor
98
+
99
+
100
+ def test_opt_in_wraps_exonic_splice_site():
101
+ # CFTR exon 4 ends with AAG. G->T at -1 disrupts the MAG signal.
102
+ variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
103
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
104
+ effects = variant.effects(splice_outcomes=True)
105
+ target = next(e for e in effects if e.transcript is transcript)
106
+ assert isinstance(target, SpliceOutcomeSet)
107
+ assert target.disrupted_signal_class is ExonicSpliceSite
108
+
109
+
110
+ def test_opt_in_wraps_intronic_splice_site():
111
+ # CFTR exon 4 +1 with NON-canonical ref A is downgraded to
112
+ # IntronicSpliceSite (post-2.0.0 sequence-aware classification).
113
+ variant = Variant("7", 117531115, "A", "G", ensembl_grch38)
114
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
115
+ effects = variant.effects(splice_outcomes=True)
116
+ target = next(e for e in effects if e.transcript is transcript)
117
+ assert isinstance(target, SpliceOutcomeSet)
118
+ assert target.disrupted_signal_class is IntronicSpliceSite
119
+
120
+
121
+ def test_opt_in_passes_through_non_splice_effects():
122
+ # Pure substitution that doesn't touch any splice signal: should
123
+ # not be wrapped.
124
+ variant = Variant("17", 43082575 - 5, "CCT", "GGG", ensembl_grch38)
125
+ effects = variant.effects(splice_outcomes=True)
126
+ # At least one effect should be a Substitution, not wrapped.
127
+ classes = [type(e).__name__ for e in effects]
128
+ assert "Substitution" in classes
129
+
130
+
131
+ # --------------------------------------------------------------------
132
+ # Plausibility ordering and candidate composition
133
+ # --------------------------------------------------------------------
134
+
135
+
136
+ def test_splice_donor_candidate_set_has_expected_outcomes():
137
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
138
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
139
+ effect = variant.effects(splice_outcomes=True)
140
+ target = next(e for e in effect if e.transcript is transcript)
141
+ outcomes = {c.outcome for c in target.candidates}
142
+ assert outcomes == {
143
+ SpliceOutcome.EXON_SKIPPING,
144
+ SpliceOutcome.INTRON_RETENTION,
145
+ SpliceOutcome.CRYPTIC_DONOR,
146
+ SpliceOutcome.NORMAL_SPLICING,
147
+ }
148
+
149
+
150
+ def test_splice_acceptor_candidate_set_uses_cryptic_acceptor():
151
+ variant = Variant("7", 117530898, "G", "A", ensembl_grch38)
152
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
153
+ effects = variant.effects(splice_outcomes=True)
154
+ target = next(e for e in effects if e.transcript is transcript)
155
+ outcomes = {c.outcome for c in target.candidates}
156
+ # SpliceAcceptor disruption uses CRYPTIC_ACCEPTOR not CRYPTIC_DONOR.
157
+ assert SpliceOutcome.CRYPTIC_ACCEPTOR in outcomes
158
+ assert SpliceOutcome.CRYPTIC_DONOR not in outcomes
159
+
160
+
161
+ def test_candidates_sorted_by_plausibility_descending():
162
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
163
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
164
+ effects = variant.effects(splice_outcomes=True)
165
+ target = next(e for e in effects if e.transcript is transcript)
166
+ plaus = [c.plausibility for c in target.candidates]
167
+ assert plaus == sorted(plaus, reverse=True), (
168
+ "Candidates should be ordered most-plausible-first, got %r"
169
+ % plaus)
170
+
171
+
172
+ def test_most_likely_for_splice_donor_is_exon_skipping():
173
+ # Per the plausibility table, EXON_SKIPPING dominates SpliceDonor.
174
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
175
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
176
+ effects = variant.effects(splice_outcomes=True)
177
+ target = next(e for e in effects if e.transcript is transcript)
178
+ assert target.most_likely.outcome is SpliceOutcome.EXON_SKIPPING
179
+
180
+
181
+ def test_most_likely_for_exonic_splice_site_is_normal_splicing():
182
+ # ExonicSpliceSite gets NORMAL_SPLICING as the most-likely
183
+ # outcome (the disruption is on the exon side and often tolerated).
184
+ variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
185
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
186
+ effects = variant.effects(splice_outcomes=True)
187
+ target = next(e for e in effects if e.transcript is transcript)
188
+ assert target.most_likely.outcome is SpliceOutcome.NORMAL_SPLICING
189
+
190
+
191
+ def test_normal_splicing_carries_underlying_coding_effect():
192
+ # ExonicSpliceSite has an alternate_effect (the coding change if
193
+ # splicing proceeds). NORMAL_SPLICING candidate exposes it.
194
+ variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
195
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
196
+ effects = variant.effects(splice_outcomes=True)
197
+ target = next(e for e in effects if e.transcript is transcript)
198
+ normal = next(
199
+ c for c in target.candidates
200
+ if c.outcome is SpliceOutcome.NORMAL_SPLICING
201
+ )
202
+ assert normal.coding_effect is not None
203
+ assert "p." in normal.coding_effect.short_description
204
+
205
+
206
+ # --------------------------------------------------------------------
207
+ # Per-outcome detail
208
+ # --------------------------------------------------------------------
209
+
210
+
211
+ def test_intron_retention_candidate_predicts_premature_stop():
212
+ # Intron retention typically produces a PrematureStop. Stub
213
+ # without exact protein since we don't have intronic genomic seq.
214
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
215
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
216
+ effects = variant.effects(splice_outcomes=True)
217
+ target = next(e for e in effects if e.transcript is transcript)
218
+ intron = next(
219
+ c for c in target.candidates
220
+ if c.outcome is SpliceOutcome.INTRON_RETENTION
221
+ )
222
+ assert intron.predicted_class_name == "PrematureStop"
223
+ assert intron.coding_effect is None
224
+
225
+
226
+ def test_cryptic_donor_candidate_is_a_stub():
227
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
228
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
229
+ effects = variant.effects(splice_outcomes=True)
230
+ target = next(e for e in effects if e.transcript is transcript)
231
+ cryptic = next(
232
+ c for c in target.candidates
233
+ if c.outcome is SpliceOutcome.CRYPTIC_DONOR
234
+ )
235
+ assert cryptic.coding_effect is None
236
+ assert "cryptic" in cryptic.description.lower()
237
+
238
+
239
+ def test_exon_skipping_for_in_frame_exon_emits_deletion():
240
+ # CFTR exon 4 is 216 nucleotides = 72 codons (216 % 3 == 0), so
241
+ # skipping it is in-frame. The candidate should report Deletion
242
+ # of the exon's amino acids.
243
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
244
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
245
+ effects = variant.effects(splice_outcomes=True)
246
+ target = next(e for e in effects if e.transcript is transcript)
247
+ skip = next(
248
+ c for c in target.candidates
249
+ if c.outcome is SpliceOutcome.EXON_SKIPPING
250
+ )
251
+ # Either a Deletion was constructed, or the candidate falls back
252
+ # to None with predicted_class_name still set. Both are valid.
253
+ if skip.coding_effect is not None:
254
+ assert skip.predicted_class_name == "Deletion"
255
+ assert skip.coding_effect.aa_ref # non-empty AA range
256
+ else:
257
+ assert skip.predicted_class_name in ("Deletion", "FrameShift", "ExonLoss")
258
+
259
+
260
+ # --------------------------------------------------------------------
261
+ # EffectCollection integration
262
+ # --------------------------------------------------------------------
263
+
264
+
265
+ def test_collection_iteration_after_wrapping():
266
+ # The wrapped collection should still be iterable, indexable, and
267
+ # produce SpliceOutcomeSet objects in place of the original splice
268
+ # effects.
269
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
270
+ effects = variant.effects(splice_outcomes=True)
271
+ items = list(effects)
272
+ assert len(items) > 0
273
+ splice_set_count = sum(1 for e in items if isinstance(e, SpliceOutcomeSet))
274
+ assert splice_set_count >= 1
275
+
276
+
277
+ def test_short_description_uses_most_likely():
278
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
279
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
280
+ effects = variant.effects(splice_outcomes=True)
281
+ target = next(e for e in effects if e.transcript is transcript)
282
+ desc = target.short_description
283
+ assert desc.startswith("splice-set:")
284
+ assert target.most_likely.outcome.value in desc
285
+
286
+
287
+ # --------------------------------------------------------------------
288
+ # Reverse-strand
289
+ # --------------------------------------------------------------------
290
+
291
+
292
+ def test_opt_in_works_on_reverse_strand_donor():
293
+ # BRCA1 exon 12 reverse-strand donor at 43082403 with canonical ref C.
294
+ variant = Variant("17", 43082403, "C", "T", ensembl_grch38)
295
+ transcript = ensembl_grch38.transcript_by_id(BRCA1_TRANSCRIPT_ID)
296
+ effects = variant.effects(splice_outcomes=True)
297
+ target = next(e for e in effects if e.transcript is transcript)
298
+ assert isinstance(target, SpliceOutcomeSet)
299
+ assert target.disrupted_signal_class is SpliceDonor
300
+
301
+
302
+ # --------------------------------------------------------------------
303
+ # Direct enumerate_splice_outcomes tests
304
+ # --------------------------------------------------------------------
305
+
306
+
307
+ def test_enumerate_passes_through_non_splice():
308
+ # Non-splice effect should pass through unchanged.
309
+ variant = Variant("17", 43082575 - 5, "CCT", "GGG", ensembl_grch38)
310
+ transcript = ensembl_grch38.transcript_by_id(BRCA1_TRANSCRIPT_ID)
311
+ sub_effect = variant.effect_on_transcript(transcript)
312
+ assert type(sub_effect).__name__ == "Substitution"
313
+ wrapped = enumerate_splice_outcomes(sub_effect)
314
+ assert wrapped is sub_effect
315
+
316
+
317
+ # --------------------------------------------------------------------
318
+ # SpliceCandidate dataclass ergonomics
319
+ # --------------------------------------------------------------------
320
+
321
+
322
+ def test_splice_candidate_is_frozen():
323
+ c = SpliceCandidate(
324
+ outcome=SpliceOutcome.EXON_SKIPPING,
325
+ plausibility=0.5,
326
+ description="test",
327
+ )
328
+ try:
329
+ c.plausibility = 0.9 # type: ignore
330
+ except Exception:
331
+ pass
332
+ else:
333
+ raise AssertionError("SpliceCandidate should be frozen")
334
+
335
+
336
+ def test_splice_candidate_equality():
337
+ a = SpliceCandidate(
338
+ outcome=SpliceOutcome.EXON_SKIPPING,
339
+ plausibility=0.5,
340
+ description="d",
341
+ )
342
+ b = SpliceCandidate(
343
+ outcome=SpliceOutcome.EXON_SKIPPING,
344
+ plausibility=0.5,
345
+ description="d",
346
+ )
347
+ assert a == b
348
+
349
+
350
+ def test_package_level_exports():
351
+ assert varcode.SpliceCandidate is SpliceCandidate
352
+ assert varcode.SpliceOutcome is SpliceOutcome
353
+ assert varcode.SpliceOutcomeSet is SpliceOutcomeSet
354
+
355
+
356
+ # --------------------------------------------------------------------
357
+ # MultiOutcomeEffect protocol (see #299 for the planned generalization).
358
+ # --------------------------------------------------------------------
359
+
360
+
361
+ def test_splice_outcome_set_is_a_multi_outcome_effect():
362
+ # Downstream consumers filter multi-outcome results with
363
+ # isinstance(e, MultiOutcomeEffect) so future wrappers (RNA
364
+ # evidence #259, germline-aware #268, etc.) don't force churn.
365
+ from varcode import MultiOutcomeEffect
366
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
367
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
368
+ effects = variant.effects(splice_outcomes=True)
369
+ target = next(e for e in effects if e.transcript is transcript)
370
+ assert isinstance(target, MultiOutcomeEffect)
371
+ # Protocol surface: candidates, most_likely, priority_class.
372
+ assert hasattr(target, "candidates") and len(target.candidates) > 0
373
+ assert target.most_likely is target.candidates[0]
374
+ assert target.priority_class is target.disrupted_signal_class
375
+
376
+
377
+ def test_multi_outcome_effect_exported_at_package_level():
378
+ from varcode import MultiOutcomeEffect
379
+ from varcode import effects
380
+ assert MultiOutcomeEffect is effects.MultiOutcomeEffect
381
+ # Confirm SpliceOutcomeSet is a subclass, not a duck.
382
+ assert issubclass(SpliceOutcomeSet, MultiOutcomeEffect)
383
+
384
+
385
+ def test_non_splice_effects_are_not_multi_outcome():
386
+ # Guard against future class-hierarchy rearrangements that might
387
+ # accidentally mark deterministic effects as multi-outcome.
388
+ from varcode import MultiOutcomeEffect
389
+ from varcode.effects import Substitution, Silent, Intronic, MutationEffect
390
+ for cls in (Substitution, Silent, Intronic, MutationEffect):
391
+ assert not issubclass(cls, MultiOutcomeEffect), (
392
+ "%s should not be a MultiOutcomeEffect" % cls.__name__)
393
+
394
+
395
+ # --------------------------------------------------------------------
396
+ # Priority integration: SpliceOutcomeSet sorts as if it were the
397
+ # disrupted-signal class (review feedback on PR #292).
398
+ # --------------------------------------------------------------------
399
+
400
+
401
+ def test_splice_outcome_set_sorts_as_disrupted_signal_class():
402
+ # When wrapped, a SpliceDonor-backed SpliceOutcomeSet should have
403
+ # the same priority as a bare SpliceDonor — higher than Intronic,
404
+ # lower than Substitution. If the priority delegation is broken,
405
+ # SpliceOutcomeSet gets priority -1 and sorts to the bottom.
406
+ from varcode.effects import effect_priority
407
+
408
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
409
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
410
+ bare_effect = variant.effect_on_transcript(transcript)
411
+ assert isinstance(bare_effect, SpliceDonor)
412
+ bare_priority = effect_priority(bare_effect)
413
+
414
+ wrapped_effects = variant.effects(splice_outcomes=True)
415
+ wrapped = next(e for e in wrapped_effects if e.transcript is transcript)
416
+ assert isinstance(wrapped, SpliceOutcomeSet)
417
+ wrapped_priority = effect_priority(wrapped)
418
+
419
+ assert wrapped_priority == bare_priority, (
420
+ "SpliceOutcomeSet priority (%d) must match the disrupted-"
421
+ "signal class priority (%d); otherwise sorting and "
422
+ "top_priority_effect() behave wrongly." % (
423
+ wrapped_priority, bare_priority))
424
+
425
+
426
+ def test_splice_outcome_set_top_priority_works():
427
+ # top_priority_effect on a collection containing SpliceOutcomeSet
428
+ # should not pick a lower-priority non-splice effect.
429
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
430
+ effects = variant.effects(splice_outcomes=True)
431
+ top = effects.top_priority_effect()
432
+ # The wrapped SpliceDonor (or one of the splice-set variants) is
433
+ # higher priority than Intronic/NoncodingTranscript from other
434
+ # overlapping transcripts, so the top should be a splice-related
435
+ # effect.
436
+ top_class_name = type(top).__name__
437
+ assert top_class_name in ("SpliceOutcomeSet", "SpliceDonor"), (
438
+ "Expected a splice-related effect at top priority, got %s"
439
+ % top_class_name)
440
+
441
+
442
+ # --------------------------------------------------------------------
443
+ # Acceptor-side IntronicSpliceSite emits CRYPTIC_ACCEPTOR, not DONOR.
444
+ # --------------------------------------------------------------------
445
+
446
+
447
+ def test_acceptor_side_intronic_splice_site_uses_cryptic_acceptor():
448
+ # CFTR exon 4 acceptor -3 (3bp before exon.start). A variant here
449
+ # with NON-canonical ref (not A, the canonical MAG component) is
450
+ # classified as IntronicSpliceSite. The splice set should include
451
+ # CRYPTIC_ACCEPTOR (the relevant cryptic direction for the
452
+ # acceptor side), not CRYPTIC_DONOR.
453
+ from varcode.effects import IntronicSpliceSite
454
+ # chr7:117530896 is -3 before CFTR exon 4 (forward strand).
455
+ # Use a non-canonical ref for the -3 position so it's
456
+ # IntronicSpliceSite (not SpliceAcceptor which covers -1/-2).
457
+ # At distance -3, the position isn't required to be canonical
458
+ # anyway — the classifier emits IntronicSpliceSite for this window.
459
+ variant = Variant("7", 117530896, "G", "T", ensembl_grch38)
460
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
461
+ bare = variant.effect_on_transcript(transcript)
462
+ assert isinstance(bare, IntronicSpliceSite) and \
463
+ not isinstance(bare, (SpliceDonor, SpliceAcceptor))
464
+ effects = variant.effects(splice_outcomes=True)
465
+ target = next(e for e in effects if e.transcript is transcript)
466
+ outcomes = {c.outcome for c in target.candidates}
467
+ assert SpliceOutcome.CRYPTIC_ACCEPTOR in outcomes, \
468
+ "Acceptor-side IntronicSpliceSite should use CRYPTIC_ACCEPTOR"
469
+ assert SpliceOutcome.CRYPTIC_DONOR not in outcomes, \
470
+ "Acceptor-side IntronicSpliceSite should not use CRYPTIC_DONOR"
471
+
472
+
473
+ def test_donor_side_intronic_splice_site_uses_cryptic_donor():
474
+ # Mirror test for donor-side IntronicSpliceSite at +3 after CFTR
475
+ # exon 4 end (117531117 = exon.end + 3).
476
+ from varcode.effects import IntronicSpliceSite
477
+ variant = Variant("7", 117531117, "G", "T", ensembl_grch38)
478
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
479
+ bare = variant.effect_on_transcript(transcript)
480
+ assert isinstance(bare, IntronicSpliceSite) and \
481
+ not isinstance(bare, (SpliceDonor, SpliceAcceptor))
482
+ effects = variant.effects(splice_outcomes=True)
483
+ target = next(e for e in effects if e.transcript is transcript)
484
+ outcomes = {c.outcome for c in target.candidates}
485
+ assert SpliceOutcome.CRYPTIC_DONOR in outcomes
486
+ assert SpliceOutcome.CRYPTIC_ACCEPTOR not in outcomes
487
+
488
+
489
+ # --------------------------------------------------------------------
490
+ # Multi-protein surface: candidate_proteins and mutant_protein_sequences
491
+ # --------------------------------------------------------------------
492
+
493
+
494
+ def test_candidate_proteins_maps_each_outcome_to_a_protein():
495
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
496
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
497
+ effects = variant.effects(splice_outcomes=True)
498
+ target = next(e for e in effects if e.transcript is transcript)
499
+ proteins = target.candidate_proteins
500
+ # Every candidate outcome appears as a key.
501
+ outcomes = {c.outcome for c in target.candidates}
502
+ assert set(proteins.keys()) == outcomes
503
+ # EXON_SKIPPING for an in-frame exon should have a non-empty
504
+ # protein (reference minus the skipped AAs). CFTR exon 4 is 216
505
+ # nucleotides = 72 codons = in-frame.
506
+ assert proteins[SpliceOutcome.EXON_SKIPPING], \
507
+ "Expected a concrete mutant protein for in-frame exon skipping"
508
+ # INTRON_RETENTION and CRYPTIC are stubs → empty string for now.
509
+ assert proteins[SpliceOutcome.INTRON_RETENTION] == ""
510
+ assert proteins[SpliceOutcome.CRYPTIC_DONOR] == ""
511
+
512
+
513
+ def test_mutant_protein_sequences_collects_distinct_proteins():
514
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
515
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
516
+ effects = variant.effects(splice_outcomes=True)
517
+ target = next(e for e in effects if e.transcript is transcript)
518
+ proteins = target.mutant_protein_sequences
519
+ assert isinstance(proteins, set)
520
+ assert len(proteins) >= 1
521
+ # Reference protein should be in there or a proper subset of
522
+ # reference (exon-skipped version is shorter).
523
+ ref = str(transcript.protein_sequence)
524
+ # The in-frame exon skip removes exon 4 AAs; resulting protein
525
+ # should be shorter than reference.
526
+ shortest = min(proteins, key=len)
527
+ assert len(shortest) < len(ref)
528
+
529
+
530
+ # --------------------------------------------------------------------
531
+ # Out-of-frame exon skip now produces a real mutant protein
532
+ # --------------------------------------------------------------------
533
+
534
+
535
+ def test_out_of_frame_exon_skip_produces_mutant_protein():
536
+ # CFTR exon 5 is 90 nucleotides = 30 codons, BUT exon 5 is not
537
+ # out of frame — need a different exon. Use a variant known to
538
+ # target an out-of-frame exon. We'll discover one empirically
539
+ # by finding an exon whose length is not divisible by 3.
540
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
541
+ target_exon = None
542
+ for exon in transcript.exons[2:]:
543
+ length = exon.end - exon.start + 1
544
+ if length % 3 != 0:
545
+ target_exon = exon
546
+ break
547
+ if target_exon is None:
548
+ pytest.skip("No out-of-frame exon found in CFTR beyond exon 2")
549
+
550
+ # Construct a donor-side disrupting variant at this exon's end
551
+ # (+1 position after the exon in + strand coords).
552
+ donor_plus_1 = target_exon.end + 1
553
+ # Use a canonical-ref SNV to ensure SpliceDonor classification.
554
+ variant = Variant("7", donor_plus_1, "G", "A", ensembl_grch38)
555
+ bare = variant.effect_on_transcript(transcript)
556
+ if not isinstance(bare, SpliceDonor):
557
+ pytest.skip(
558
+ "Canonical donor G not present at %d; classifier emitted %s "
559
+ "rather than SpliceDonor." % (donor_plus_1, type(bare).__name__))
560
+ effects = variant.effects(splice_outcomes=True)
561
+ splice_set = next(e for e in effects if e.transcript is transcript)
562
+ skip_candidate = next(
563
+ c for c in splice_set.candidates
564
+ if c.outcome is SpliceOutcome.EXON_SKIPPING
565
+ )
566
+ # Out-of-frame skip should now carry a mutant protein.
567
+ assert skip_candidate.coding_effect is not None, (
568
+ "Out-of-frame exon skip should produce a concrete mutant "
569
+ "protein, not a stub")
570
+ protein = skip_candidate.coding_effect.mutant_protein_sequence
571
+ assert isinstance(protein, str)
572
+ assert len(protein) > 0
573
+ # The frameshifted protein should differ from the reference
574
+ # after the skip point.
575
+ assert protein != str(transcript.protein_sequence)
576
+
577
+
578
+ # --------------------------------------------------------------------
579
+ # has_protein property on SpliceCandidate
580
+ # --------------------------------------------------------------------
581
+
582
+
583
+ def test_has_protein_is_true_for_candidates_with_coding_effect():
584
+ variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
585
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
586
+ effects = variant.effects(splice_outcomes=True)
587
+ target = next(e for e in effects if e.transcript is transcript)
588
+ # NORMAL_SPLICING has a Substitution coding_effect with a protein.
589
+ normal = next(
590
+ c for c in target.candidates
591
+ if c.outcome is SpliceOutcome.NORMAL_SPLICING
592
+ )
593
+ assert normal.has_protein is True
594
+
595
+
596
+ def test_has_protein_is_false_for_stub_candidates():
597
+ variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
598
+ transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
599
+ effects = variant.effects(splice_outcomes=True)
600
+ target = next(e for e in effects if e.transcript is transcript)
601
+ intron = next(
602
+ c for c in target.candidates
603
+ if c.outcome is SpliceOutcome.INTRON_RETENTION
604
+ )
605
+ assert intron.has_protein is False