varcode 2.3.0__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {varcode-2.3.0/varcode.egg-info → varcode-2.4.0}/PKG-INFO +17 -2
- {varcode-2.3.0 → varcode-2.4.0}/README.md +16 -1
- varcode-2.4.0/tests/test_splice_outcomes.py +605 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/__init__.py +12 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/__init__.py +2 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_classes.py +26 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_collection.py +6 -3
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_ordering.py +14 -1
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction.py +16 -2
- varcode-2.4.0/varcode/splice_outcomes.py +722 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/variant.py +21 -2
- {varcode-2.3.0 → varcode-2.4.0}/varcode/variant_collection.py +10 -2
- varcode-2.4.0/varcode/version.py +1 -0
- {varcode-2.3.0 → varcode-2.4.0/varcode.egg-info}/PKG-INFO +17 -2
- {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/SOURCES.txt +2 -0
- varcode-2.3.0/varcode/version.py +0 -1
- {varcode-2.3.0 → varcode-2.4.0}/LICENSE +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/MANIFEST.in +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/pyproject.toml +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/requirements.txt +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/setup.cfg +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/__init__.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/benchmark_vcf_load.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/common.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/data.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_cli_effects.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_cli_genes.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_collection_filtering.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_collection_variants_attr_consistency.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_common.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_cosmic_mutations.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_csv_roundtrip.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_dbnsfp_validation.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_annotation_errors.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_classes.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_collection.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_collection_serialization.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_effect_collection_sort_order.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_effects_from_mutagenix_variants.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_exonic_splice_site.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_frameshift_helpers.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_genotype.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_genotype_from_vcf.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_maf.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_mm10_klf6_frameshift.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_mouse.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_mutate.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_no_duplicate_variants.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_premature_stop_short_description.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_problematic_variants.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_reference.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_reference_mismatch_error.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_silent_aa_pos.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_silent_hgvs_description.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_splice_site_effects.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_stop_codon_classification_bugs.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_string_helpers.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_symbolic_alleles.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_timings.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_variant.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_variant_collection.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_vcf.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/tests/test_vcf_output.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/__init__.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/effects_script.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/genes_script.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/logging.conf +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/variant_args.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/cli/version_info.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/common.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/csv_helpers.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/common.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_helpers.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction_coding.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction_coding_frameshift.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/effect_prediction_coding_in_frame.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/mutate.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/transcript_helpers.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/effects/translate.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/errors.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/genotype.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/maf.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/nucleotides.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/reference.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/string_helpers.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/ucsc_reference_names.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/util.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/vcf.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode/vcf_output.py +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/dependency_links.txt +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/entry_points.txt +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/requires.txt +0 -0
- {varcode-2.3.0 → varcode-2.4.0}/varcode.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: varcode
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: Variant annotation in Python
|
|
5
5
|
Author-email: Alex Rubinsteyn <alex.rubinsteyn@unc.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/openvax/varcode
|
|
@@ -106,7 +106,22 @@ print(premature_stop_effect.gene.name)
|
|
|
106
106
|
### 'TP53'
|
|
107
107
|
```
|
|
108
108
|
|
|
109
|
-
If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode
|
|
109
|
+
If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode.
|
|
110
|
+
|
|
111
|
+
## Further reading
|
|
112
|
+
|
|
113
|
+
Feature guides live in [`docs/`](./docs/):
|
|
114
|
+
|
|
115
|
+
- [**Genotypes and sample-aware queries**](./docs/genotype.md) — per-sample
|
|
116
|
+
zygosity on multi-sample VCFs (`Genotype`, `Zygosity`, `VariantCollection.for_sample`,
|
|
117
|
+
`.heterozygous_in`, `.homozygous_alt_in`). New in 2.3.
|
|
118
|
+
- [**CSV round-trip and metadata headers**](./docs/csv.md) — `to_csv` /
|
|
119
|
+
`from_csv` on both collection types, with `#`-prefixed provenance
|
|
120
|
+
headers. New in 2.1, refined in 2.2.
|
|
121
|
+
- [**Error handling**](./docs/errors.md) — `ReferenceMismatchError`,
|
|
122
|
+
`SampleNotFoundError`, and the `raise_on_error=False` escape hatch.
|
|
123
|
+
|
|
124
|
+
See [`CHANGELOG.md`](./CHANGELOG.md) for the release history.
|
|
110
125
|
|
|
111
126
|
## Effect Types
|
|
112
127
|
|
|
@@ -79,7 +79,22 @@ print(premature_stop_effect.gene.name)
|
|
|
79
79
|
### 'TP53'
|
|
80
80
|
```
|
|
81
81
|
|
|
82
|
-
If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode
|
|
82
|
+
If you are looking for a quick start guide, you can check out [this iPython book](./examples/varcode-quick_start.ipynb) that demonstrates simple use cases of Varcode.
|
|
83
|
+
|
|
84
|
+
## Further reading
|
|
85
|
+
|
|
86
|
+
Feature guides live in [`docs/`](./docs/):
|
|
87
|
+
|
|
88
|
+
- [**Genotypes and sample-aware queries**](./docs/genotype.md) — per-sample
|
|
89
|
+
zygosity on multi-sample VCFs (`Genotype`, `Zygosity`, `VariantCollection.for_sample`,
|
|
90
|
+
`.heterozygous_in`, `.homozygous_alt_in`). New in 2.3.
|
|
91
|
+
- [**CSV round-trip and metadata headers**](./docs/csv.md) — `to_csv` /
|
|
92
|
+
`from_csv` on both collection types, with `#`-prefixed provenance
|
|
93
|
+
headers. New in 2.1, refined in 2.2.
|
|
94
|
+
- [**Error handling**](./docs/errors.md) — `ReferenceMismatchError`,
|
|
95
|
+
`SampleNotFoundError`, and the `raise_on_error=False` escape hatch.
|
|
96
|
+
|
|
97
|
+
See [`CHANGELOG.md`](./CHANGELOG.md) for the release history.
|
|
83
98
|
|
|
84
99
|
## Effect Types
|
|
85
100
|
|
|
@@ -0,0 +1,605 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
Tests for the splice outcome possibility-set prototype
|
|
15
|
+
(openvax/varcode#262).
|
|
16
|
+
|
|
17
|
+
Coverage:
|
|
18
|
+
- Default behavior unchanged (back-compat)
|
|
19
|
+
- Opt-in wraps splice effects in SpliceOutcomeSet
|
|
20
|
+
- Each canonical splice signal class produces the expected outcome set
|
|
21
|
+
- Plausibility ordering is stable
|
|
22
|
+
- Per-outcome candidate construction (normal splicing, exon
|
|
23
|
+
skipping, intron retention stubs, cryptic splice stubs)
|
|
24
|
+
- SpliceOutcomeSet integrates with EffectCollection
|
|
25
|
+
- Multi-allelic and reverse-strand variants work too
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import pytest
|
|
29
|
+
from pyensembl import cached_release
|
|
30
|
+
|
|
31
|
+
import varcode
|
|
32
|
+
from varcode import (
|
|
33
|
+
SpliceCandidate,
|
|
34
|
+
SpliceOutcome,
|
|
35
|
+
SpliceOutcomeSet,
|
|
36
|
+
Variant,
|
|
37
|
+
)
|
|
38
|
+
from varcode.effects import (
|
|
39
|
+
ExonicSpliceSite,
|
|
40
|
+
IntronicSpliceSite,
|
|
41
|
+
SpliceAcceptor,
|
|
42
|
+
SpliceDonor,
|
|
43
|
+
)
|
|
44
|
+
from varcode.splice_outcomes import enumerate_splice_outcomes
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
ensembl_grch38 = cached_release(81)
|
|
48
|
+
CFTR_TRANSCRIPT_ID = "ENST00000003084"
|
|
49
|
+
BRCA1_TRANSCRIPT_ID = "ENST00000357654"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# --------------------------------------------------------------------
|
|
53
|
+
# Back-compat
|
|
54
|
+
# --------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_default_behavior_unchanged():
|
|
58
|
+
# No kwarg -> same as today: SpliceDonor effect, no wrapping.
|
|
59
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
60
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
61
|
+
effect = variant.effect_on_transcript(transcript)
|
|
62
|
+
assert effect.__class__ is SpliceDonor
|
|
63
|
+
assert not isinstance(effect, SpliceOutcomeSet)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_default_for_collection_unchanged():
|
|
67
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
68
|
+
effects = variant.effects() # default: splice_outcomes=False
|
|
69
|
+
classes = {type(e) for e in effects}
|
|
70
|
+
assert SpliceOutcomeSet not in classes
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# --------------------------------------------------------------------
|
|
74
|
+
# Opt-in wraps splice effects
|
|
75
|
+
# --------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_opt_in_wraps_splice_donor():
|
|
79
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
80
|
+
effects = variant.effects(splice_outcomes=True)
|
|
81
|
+
cftr_effect = next(
|
|
82
|
+
e for e in effects
|
|
83
|
+
if getattr(e, "transcript", None) is not None
|
|
84
|
+
and e.transcript.id == CFTR_TRANSCRIPT_ID
|
|
85
|
+
)
|
|
86
|
+
assert isinstance(cftr_effect, SpliceOutcomeSet)
|
|
87
|
+
assert cftr_effect.disrupted_signal_class is SpliceDonor
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_opt_in_wraps_splice_acceptor():
|
|
91
|
+
# CFTR exon 4 acceptor -1 with canonical ref G.
|
|
92
|
+
variant = Variant("7", 117530898, "G", "A", ensembl_grch38)
|
|
93
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
94
|
+
effect = variant.effects(splice_outcomes=True)
|
|
95
|
+
target = next(e for e in effect if e.transcript is transcript)
|
|
96
|
+
assert isinstance(target, SpliceOutcomeSet)
|
|
97
|
+
assert target.disrupted_signal_class is SpliceAcceptor
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_opt_in_wraps_exonic_splice_site():
|
|
101
|
+
# CFTR exon 4 ends with AAG. G->T at -1 disrupts the MAG signal.
|
|
102
|
+
variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
|
|
103
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
104
|
+
effects = variant.effects(splice_outcomes=True)
|
|
105
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
106
|
+
assert isinstance(target, SpliceOutcomeSet)
|
|
107
|
+
assert target.disrupted_signal_class is ExonicSpliceSite
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_opt_in_wraps_intronic_splice_site():
|
|
111
|
+
# CFTR exon 4 +1 with NON-canonical ref A is downgraded to
|
|
112
|
+
# IntronicSpliceSite (post-2.0.0 sequence-aware classification).
|
|
113
|
+
variant = Variant("7", 117531115, "A", "G", ensembl_grch38)
|
|
114
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
115
|
+
effects = variant.effects(splice_outcomes=True)
|
|
116
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
117
|
+
assert isinstance(target, SpliceOutcomeSet)
|
|
118
|
+
assert target.disrupted_signal_class is IntronicSpliceSite
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_opt_in_passes_through_non_splice_effects():
|
|
122
|
+
# Pure substitution that doesn't touch any splice signal: should
|
|
123
|
+
# not be wrapped.
|
|
124
|
+
variant = Variant("17", 43082575 - 5, "CCT", "GGG", ensembl_grch38)
|
|
125
|
+
effects = variant.effects(splice_outcomes=True)
|
|
126
|
+
# At least one effect should be a Substitution, not wrapped.
|
|
127
|
+
classes = [type(e).__name__ for e in effects]
|
|
128
|
+
assert "Substitution" in classes
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# --------------------------------------------------------------------
|
|
132
|
+
# Plausibility ordering and candidate composition
|
|
133
|
+
# --------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_splice_donor_candidate_set_has_expected_outcomes():
|
|
137
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
138
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
139
|
+
effect = variant.effects(splice_outcomes=True)
|
|
140
|
+
target = next(e for e in effect if e.transcript is transcript)
|
|
141
|
+
outcomes = {c.outcome for c in target.candidates}
|
|
142
|
+
assert outcomes == {
|
|
143
|
+
SpliceOutcome.EXON_SKIPPING,
|
|
144
|
+
SpliceOutcome.INTRON_RETENTION,
|
|
145
|
+
SpliceOutcome.CRYPTIC_DONOR,
|
|
146
|
+
SpliceOutcome.NORMAL_SPLICING,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_splice_acceptor_candidate_set_uses_cryptic_acceptor():
|
|
151
|
+
variant = Variant("7", 117530898, "G", "A", ensembl_grch38)
|
|
152
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
153
|
+
effects = variant.effects(splice_outcomes=True)
|
|
154
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
155
|
+
outcomes = {c.outcome for c in target.candidates}
|
|
156
|
+
# SpliceAcceptor disruption uses CRYPTIC_ACCEPTOR not CRYPTIC_DONOR.
|
|
157
|
+
assert SpliceOutcome.CRYPTIC_ACCEPTOR in outcomes
|
|
158
|
+
assert SpliceOutcome.CRYPTIC_DONOR not in outcomes
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def test_candidates_sorted_by_plausibility_descending():
|
|
162
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
163
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
164
|
+
effects = variant.effects(splice_outcomes=True)
|
|
165
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
166
|
+
plaus = [c.plausibility for c in target.candidates]
|
|
167
|
+
assert plaus == sorted(plaus, reverse=True), (
|
|
168
|
+
"Candidates should be ordered most-plausible-first, got %r"
|
|
169
|
+
% plaus)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def test_most_likely_for_splice_donor_is_exon_skipping():
|
|
173
|
+
# Per the plausibility table, EXON_SKIPPING dominates SpliceDonor.
|
|
174
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
175
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
176
|
+
effects = variant.effects(splice_outcomes=True)
|
|
177
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
178
|
+
assert target.most_likely.outcome is SpliceOutcome.EXON_SKIPPING
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_most_likely_for_exonic_splice_site_is_normal_splicing():
|
|
182
|
+
# ExonicSpliceSite gets NORMAL_SPLICING as the most-likely
|
|
183
|
+
# outcome (the disruption is on the exon side and often tolerated).
|
|
184
|
+
variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
|
|
185
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
186
|
+
effects = variant.effects(splice_outcomes=True)
|
|
187
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
188
|
+
assert target.most_likely.outcome is SpliceOutcome.NORMAL_SPLICING
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_normal_splicing_carries_underlying_coding_effect():
|
|
192
|
+
# ExonicSpliceSite has an alternate_effect (the coding change if
|
|
193
|
+
# splicing proceeds). NORMAL_SPLICING candidate exposes it.
|
|
194
|
+
variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
|
|
195
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
196
|
+
effects = variant.effects(splice_outcomes=True)
|
|
197
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
198
|
+
normal = next(
|
|
199
|
+
c for c in target.candidates
|
|
200
|
+
if c.outcome is SpliceOutcome.NORMAL_SPLICING
|
|
201
|
+
)
|
|
202
|
+
assert normal.coding_effect is not None
|
|
203
|
+
assert "p." in normal.coding_effect.short_description
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# --------------------------------------------------------------------
|
|
207
|
+
# Per-outcome detail
|
|
208
|
+
# --------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def test_intron_retention_candidate_predicts_premature_stop():
|
|
212
|
+
# Intron retention typically produces a PrematureStop. Stub
|
|
213
|
+
# without exact protein since we don't have intronic genomic seq.
|
|
214
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
215
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
216
|
+
effects = variant.effects(splice_outcomes=True)
|
|
217
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
218
|
+
intron = next(
|
|
219
|
+
c for c in target.candidates
|
|
220
|
+
if c.outcome is SpliceOutcome.INTRON_RETENTION
|
|
221
|
+
)
|
|
222
|
+
assert intron.predicted_class_name == "PrematureStop"
|
|
223
|
+
assert intron.coding_effect is None
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_cryptic_donor_candidate_is_a_stub():
|
|
227
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
228
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
229
|
+
effects = variant.effects(splice_outcomes=True)
|
|
230
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
231
|
+
cryptic = next(
|
|
232
|
+
c for c in target.candidates
|
|
233
|
+
if c.outcome is SpliceOutcome.CRYPTIC_DONOR
|
|
234
|
+
)
|
|
235
|
+
assert cryptic.coding_effect is None
|
|
236
|
+
assert "cryptic" in cryptic.description.lower()
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def test_exon_skipping_for_in_frame_exon_emits_deletion():
|
|
240
|
+
# CFTR exon 4 is 216 nucleotides = 72 codons (216 % 3 == 0), so
|
|
241
|
+
# skipping it is in-frame. The candidate should report Deletion
|
|
242
|
+
# of the exon's amino acids.
|
|
243
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
244
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
245
|
+
effects = variant.effects(splice_outcomes=True)
|
|
246
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
247
|
+
skip = next(
|
|
248
|
+
c for c in target.candidates
|
|
249
|
+
if c.outcome is SpliceOutcome.EXON_SKIPPING
|
|
250
|
+
)
|
|
251
|
+
# Either a Deletion was constructed, or the candidate falls back
|
|
252
|
+
# to None with predicted_class_name still set. Both are valid.
|
|
253
|
+
if skip.coding_effect is not None:
|
|
254
|
+
assert skip.predicted_class_name == "Deletion"
|
|
255
|
+
assert skip.coding_effect.aa_ref # non-empty AA range
|
|
256
|
+
else:
|
|
257
|
+
assert skip.predicted_class_name in ("Deletion", "FrameShift", "ExonLoss")
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# --------------------------------------------------------------------
|
|
261
|
+
# EffectCollection integration
|
|
262
|
+
# --------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def test_collection_iteration_after_wrapping():
|
|
266
|
+
# The wrapped collection should still be iterable, indexable, and
|
|
267
|
+
# produce SpliceOutcomeSet objects in place of the original splice
|
|
268
|
+
# effects.
|
|
269
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
270
|
+
effects = variant.effects(splice_outcomes=True)
|
|
271
|
+
items = list(effects)
|
|
272
|
+
assert len(items) > 0
|
|
273
|
+
splice_set_count = sum(1 for e in items if isinstance(e, SpliceOutcomeSet))
|
|
274
|
+
assert splice_set_count >= 1
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def test_short_description_uses_most_likely():
|
|
278
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
279
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
280
|
+
effects = variant.effects(splice_outcomes=True)
|
|
281
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
282
|
+
desc = target.short_description
|
|
283
|
+
assert desc.startswith("splice-set:")
|
|
284
|
+
assert target.most_likely.outcome.value in desc
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# --------------------------------------------------------------------
|
|
288
|
+
# Reverse-strand
|
|
289
|
+
# --------------------------------------------------------------------
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def test_opt_in_works_on_reverse_strand_donor():
|
|
293
|
+
# BRCA1 exon 12 reverse-strand donor at 43082403 with canonical ref C.
|
|
294
|
+
variant = Variant("17", 43082403, "C", "T", ensembl_grch38)
|
|
295
|
+
transcript = ensembl_grch38.transcript_by_id(BRCA1_TRANSCRIPT_ID)
|
|
296
|
+
effects = variant.effects(splice_outcomes=True)
|
|
297
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
298
|
+
assert isinstance(target, SpliceOutcomeSet)
|
|
299
|
+
assert target.disrupted_signal_class is SpliceDonor
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# --------------------------------------------------------------------
|
|
303
|
+
# Direct enumerate_splice_outcomes tests
|
|
304
|
+
# --------------------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def test_enumerate_passes_through_non_splice():
|
|
308
|
+
# Non-splice effect should pass through unchanged.
|
|
309
|
+
variant = Variant("17", 43082575 - 5, "CCT", "GGG", ensembl_grch38)
|
|
310
|
+
transcript = ensembl_grch38.transcript_by_id(BRCA1_TRANSCRIPT_ID)
|
|
311
|
+
sub_effect = variant.effect_on_transcript(transcript)
|
|
312
|
+
assert type(sub_effect).__name__ == "Substitution"
|
|
313
|
+
wrapped = enumerate_splice_outcomes(sub_effect)
|
|
314
|
+
assert wrapped is sub_effect
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
# --------------------------------------------------------------------
|
|
318
|
+
# SpliceCandidate dataclass ergonomics
|
|
319
|
+
# --------------------------------------------------------------------
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def test_splice_candidate_is_frozen():
|
|
323
|
+
c = SpliceCandidate(
|
|
324
|
+
outcome=SpliceOutcome.EXON_SKIPPING,
|
|
325
|
+
plausibility=0.5,
|
|
326
|
+
description="test",
|
|
327
|
+
)
|
|
328
|
+
try:
|
|
329
|
+
c.plausibility = 0.9 # type: ignore
|
|
330
|
+
except Exception:
|
|
331
|
+
pass
|
|
332
|
+
else:
|
|
333
|
+
raise AssertionError("SpliceCandidate should be frozen")
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def test_splice_candidate_equality():
|
|
337
|
+
a = SpliceCandidate(
|
|
338
|
+
outcome=SpliceOutcome.EXON_SKIPPING,
|
|
339
|
+
plausibility=0.5,
|
|
340
|
+
description="d",
|
|
341
|
+
)
|
|
342
|
+
b = SpliceCandidate(
|
|
343
|
+
outcome=SpliceOutcome.EXON_SKIPPING,
|
|
344
|
+
plausibility=0.5,
|
|
345
|
+
description="d",
|
|
346
|
+
)
|
|
347
|
+
assert a == b
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def test_package_level_exports():
|
|
351
|
+
assert varcode.SpliceCandidate is SpliceCandidate
|
|
352
|
+
assert varcode.SpliceOutcome is SpliceOutcome
|
|
353
|
+
assert varcode.SpliceOutcomeSet is SpliceOutcomeSet
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
# --------------------------------------------------------------------
|
|
357
|
+
# MultiOutcomeEffect protocol (see #299 for the planned generalization).
|
|
358
|
+
# --------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def test_splice_outcome_set_is_a_multi_outcome_effect():
|
|
362
|
+
# Downstream consumers filter multi-outcome results with
|
|
363
|
+
# isinstance(e, MultiOutcomeEffect) so future wrappers (RNA
|
|
364
|
+
# evidence #259, germline-aware #268, etc.) don't force churn.
|
|
365
|
+
from varcode import MultiOutcomeEffect
|
|
366
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
367
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
368
|
+
effects = variant.effects(splice_outcomes=True)
|
|
369
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
370
|
+
assert isinstance(target, MultiOutcomeEffect)
|
|
371
|
+
# Protocol surface: candidates, most_likely, priority_class.
|
|
372
|
+
assert hasattr(target, "candidates") and len(target.candidates) > 0
|
|
373
|
+
assert target.most_likely is target.candidates[0]
|
|
374
|
+
assert target.priority_class is target.disrupted_signal_class
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def test_multi_outcome_effect_exported_at_package_level():
|
|
378
|
+
from varcode import MultiOutcomeEffect
|
|
379
|
+
from varcode import effects
|
|
380
|
+
assert MultiOutcomeEffect is effects.MultiOutcomeEffect
|
|
381
|
+
# Confirm SpliceOutcomeSet is a subclass, not a duck.
|
|
382
|
+
assert issubclass(SpliceOutcomeSet, MultiOutcomeEffect)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def test_non_splice_effects_are_not_multi_outcome():
|
|
386
|
+
# Guard against future class-hierarchy rearrangements that might
|
|
387
|
+
# accidentally mark deterministic effects as multi-outcome.
|
|
388
|
+
from varcode import MultiOutcomeEffect
|
|
389
|
+
from varcode.effects import Substitution, Silent, Intronic, MutationEffect
|
|
390
|
+
for cls in (Substitution, Silent, Intronic, MutationEffect):
|
|
391
|
+
assert not issubclass(cls, MultiOutcomeEffect), (
|
|
392
|
+
"%s should not be a MultiOutcomeEffect" % cls.__name__)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# --------------------------------------------------------------------
|
|
396
|
+
# Priority integration: SpliceOutcomeSet sorts as if it were the
|
|
397
|
+
# disrupted-signal class (review feedback on PR #292).
|
|
398
|
+
# --------------------------------------------------------------------
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def test_splice_outcome_set_sorts_as_disrupted_signal_class():
|
|
402
|
+
# When wrapped, a SpliceDonor-backed SpliceOutcomeSet should have
|
|
403
|
+
# the same priority as a bare SpliceDonor — higher than Intronic,
|
|
404
|
+
# lower than Substitution. If the priority delegation is broken,
|
|
405
|
+
# SpliceOutcomeSet gets priority -1 and sorts to the bottom.
|
|
406
|
+
from varcode.effects import effect_priority
|
|
407
|
+
|
|
408
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
409
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
410
|
+
bare_effect = variant.effect_on_transcript(transcript)
|
|
411
|
+
assert isinstance(bare_effect, SpliceDonor)
|
|
412
|
+
bare_priority = effect_priority(bare_effect)
|
|
413
|
+
|
|
414
|
+
wrapped_effects = variant.effects(splice_outcomes=True)
|
|
415
|
+
wrapped = next(e for e in wrapped_effects if e.transcript is transcript)
|
|
416
|
+
assert isinstance(wrapped, SpliceOutcomeSet)
|
|
417
|
+
wrapped_priority = effect_priority(wrapped)
|
|
418
|
+
|
|
419
|
+
assert wrapped_priority == bare_priority, (
|
|
420
|
+
"SpliceOutcomeSet priority (%d) must match the disrupted-"
|
|
421
|
+
"signal class priority (%d); otherwise sorting and "
|
|
422
|
+
"top_priority_effect() behave wrongly." % (
|
|
423
|
+
wrapped_priority, bare_priority))
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def test_splice_outcome_set_top_priority_works():
|
|
427
|
+
# top_priority_effect on a collection containing SpliceOutcomeSet
|
|
428
|
+
# should not pick a lower-priority non-splice effect.
|
|
429
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
430
|
+
effects = variant.effects(splice_outcomes=True)
|
|
431
|
+
top = effects.top_priority_effect()
|
|
432
|
+
# The wrapped SpliceDonor (or one of the splice-set variants) is
|
|
433
|
+
# higher priority than Intronic/NoncodingTranscript from other
|
|
434
|
+
# overlapping transcripts, so the top should be a splice-related
|
|
435
|
+
# effect.
|
|
436
|
+
top_class_name = type(top).__name__
|
|
437
|
+
assert top_class_name in ("SpliceOutcomeSet", "SpliceDonor"), (
|
|
438
|
+
"Expected a splice-related effect at top priority, got %s"
|
|
439
|
+
% top_class_name)
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
# --------------------------------------------------------------------
|
|
443
|
+
# Acceptor-side IntronicSpliceSite emits CRYPTIC_ACCEPTOR, not DONOR.
|
|
444
|
+
# --------------------------------------------------------------------
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def test_acceptor_side_intronic_splice_site_uses_cryptic_acceptor():
|
|
448
|
+
# CFTR exon 4 acceptor -3 (3bp before exon.start). A variant here
|
|
449
|
+
# with NON-canonical ref (not A, the canonical MAG component) is
|
|
450
|
+
# classified as IntronicSpliceSite. The splice set should include
|
|
451
|
+
# CRYPTIC_ACCEPTOR (the relevant cryptic direction for the
|
|
452
|
+
# acceptor side), not CRYPTIC_DONOR.
|
|
453
|
+
from varcode.effects import IntronicSpliceSite
|
|
454
|
+
# chr7:117530896 is -3 before CFTR exon 4 (forward strand).
|
|
455
|
+
# Use a non-canonical ref for the -3 position so it's
|
|
456
|
+
# IntronicSpliceSite (not SpliceAcceptor which covers -1/-2).
|
|
457
|
+
# At distance -3, the position isn't required to be canonical
|
|
458
|
+
# anyway — the classifier emits IntronicSpliceSite for this window.
|
|
459
|
+
variant = Variant("7", 117530896, "G", "T", ensembl_grch38)
|
|
460
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
461
|
+
bare = variant.effect_on_transcript(transcript)
|
|
462
|
+
assert isinstance(bare, IntronicSpliceSite) and \
|
|
463
|
+
not isinstance(bare, (SpliceDonor, SpliceAcceptor))
|
|
464
|
+
effects = variant.effects(splice_outcomes=True)
|
|
465
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
466
|
+
outcomes = {c.outcome for c in target.candidates}
|
|
467
|
+
assert SpliceOutcome.CRYPTIC_ACCEPTOR in outcomes, \
|
|
468
|
+
"Acceptor-side IntronicSpliceSite should use CRYPTIC_ACCEPTOR"
|
|
469
|
+
assert SpliceOutcome.CRYPTIC_DONOR not in outcomes, \
|
|
470
|
+
"Acceptor-side IntronicSpliceSite should not use CRYPTIC_DONOR"
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def test_donor_side_intronic_splice_site_uses_cryptic_donor():
|
|
474
|
+
# Mirror test for donor-side IntronicSpliceSite at +3 after CFTR
|
|
475
|
+
# exon 4 end (117531117 = exon.end + 3).
|
|
476
|
+
from varcode.effects import IntronicSpliceSite
|
|
477
|
+
variant = Variant("7", 117531117, "G", "T", ensembl_grch38)
|
|
478
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
479
|
+
bare = variant.effect_on_transcript(transcript)
|
|
480
|
+
assert isinstance(bare, IntronicSpliceSite) and \
|
|
481
|
+
not isinstance(bare, (SpliceDonor, SpliceAcceptor))
|
|
482
|
+
effects = variant.effects(splice_outcomes=True)
|
|
483
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
484
|
+
outcomes = {c.outcome for c in target.candidates}
|
|
485
|
+
assert SpliceOutcome.CRYPTIC_DONOR in outcomes
|
|
486
|
+
assert SpliceOutcome.CRYPTIC_ACCEPTOR not in outcomes
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
# --------------------------------------------------------------------
|
|
490
|
+
# Multi-protein surface: candidate_proteins and mutant_protein_sequences
|
|
491
|
+
# --------------------------------------------------------------------
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def test_candidate_proteins_maps_each_outcome_to_a_protein():
|
|
495
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
496
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
497
|
+
effects = variant.effects(splice_outcomes=True)
|
|
498
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
499
|
+
proteins = target.candidate_proteins
|
|
500
|
+
# Every candidate outcome appears as a key.
|
|
501
|
+
outcomes = {c.outcome for c in target.candidates}
|
|
502
|
+
assert set(proteins.keys()) == outcomes
|
|
503
|
+
# EXON_SKIPPING for an in-frame exon should have a non-empty
|
|
504
|
+
# protein (reference minus the skipped AAs). CFTR exon 4 is 216
|
|
505
|
+
# nucleotides = 72 codons = in-frame.
|
|
506
|
+
assert proteins[SpliceOutcome.EXON_SKIPPING], \
|
|
507
|
+
"Expected a concrete mutant protein for in-frame exon skipping"
|
|
508
|
+
# INTRON_RETENTION and CRYPTIC are stubs → empty string for now.
|
|
509
|
+
assert proteins[SpliceOutcome.INTRON_RETENTION] == ""
|
|
510
|
+
assert proteins[SpliceOutcome.CRYPTIC_DONOR] == ""
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def test_mutant_protein_sequences_collects_distinct_proteins():
|
|
514
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
515
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
516
|
+
effects = variant.effects(splice_outcomes=True)
|
|
517
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
518
|
+
proteins = target.mutant_protein_sequences
|
|
519
|
+
assert isinstance(proteins, set)
|
|
520
|
+
assert len(proteins) >= 1
|
|
521
|
+
# Reference protein should be in there or a proper subset of
|
|
522
|
+
# reference (exon-skipped version is shorter).
|
|
523
|
+
ref = str(transcript.protein_sequence)
|
|
524
|
+
# The in-frame exon skip removes exon 4 AAs; resulting protein
|
|
525
|
+
# should be shorter than reference.
|
|
526
|
+
shortest = min(proteins, key=len)
|
|
527
|
+
assert len(shortest) < len(ref)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
# --------------------------------------------------------------------
|
|
531
|
+
# Out-of-frame exon skip now produces a real mutant protein
|
|
532
|
+
# --------------------------------------------------------------------
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def test_out_of_frame_exon_skip_produces_mutant_protein():
|
|
536
|
+
# CFTR exon 5 is 90 nucleotides = 30 codons, BUT exon 5 is not
|
|
537
|
+
# out of frame — need a different exon. Use a variant known to
|
|
538
|
+
# target an out-of-frame exon. We'll discover one empirically
|
|
539
|
+
# by finding an exon whose length is not divisible by 3.
|
|
540
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
541
|
+
target_exon = None
|
|
542
|
+
for exon in transcript.exons[2:]:
|
|
543
|
+
length = exon.end - exon.start + 1
|
|
544
|
+
if length % 3 != 0:
|
|
545
|
+
target_exon = exon
|
|
546
|
+
break
|
|
547
|
+
if target_exon is None:
|
|
548
|
+
pytest.skip("No out-of-frame exon found in CFTR beyond exon 2")
|
|
549
|
+
|
|
550
|
+
# Construct a donor-side disrupting variant at this exon's end
|
|
551
|
+
# (+1 position after the exon in + strand coords).
|
|
552
|
+
donor_plus_1 = target_exon.end + 1
|
|
553
|
+
# Use a canonical-ref SNV to ensure SpliceDonor classification.
|
|
554
|
+
variant = Variant("7", donor_plus_1, "G", "A", ensembl_grch38)
|
|
555
|
+
bare = variant.effect_on_transcript(transcript)
|
|
556
|
+
if not isinstance(bare, SpliceDonor):
|
|
557
|
+
pytest.skip(
|
|
558
|
+
"Canonical donor G not present at %d; classifier emitted %s "
|
|
559
|
+
"rather than SpliceDonor." % (donor_plus_1, type(bare).__name__))
|
|
560
|
+
effects = variant.effects(splice_outcomes=True)
|
|
561
|
+
splice_set = next(e for e in effects if e.transcript is transcript)
|
|
562
|
+
skip_candidate = next(
|
|
563
|
+
c for c in splice_set.candidates
|
|
564
|
+
if c.outcome is SpliceOutcome.EXON_SKIPPING
|
|
565
|
+
)
|
|
566
|
+
# Out-of-frame skip should now carry a mutant protein.
|
|
567
|
+
assert skip_candidate.coding_effect is not None, (
|
|
568
|
+
"Out-of-frame exon skip should produce a concrete mutant "
|
|
569
|
+
"protein, not a stub")
|
|
570
|
+
protein = skip_candidate.coding_effect.mutant_protein_sequence
|
|
571
|
+
assert isinstance(protein, str)
|
|
572
|
+
assert len(protein) > 0
|
|
573
|
+
# The frameshifted protein should differ from the reference
|
|
574
|
+
# after the skip point.
|
|
575
|
+
assert protein != str(transcript.protein_sequence)
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
# --------------------------------------------------------------------
|
|
579
|
+
# has_protein property on SpliceCandidate
|
|
580
|
+
# --------------------------------------------------------------------
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def test_has_protein_is_true_for_candidates_with_coding_effect():
|
|
584
|
+
variant = Variant("7", 117531114, "G", "T", ensembl_grch38)
|
|
585
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
586
|
+
effects = variant.effects(splice_outcomes=True)
|
|
587
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
588
|
+
# NORMAL_SPLICING has a Substitution coding_effect with a protein.
|
|
589
|
+
normal = next(
|
|
590
|
+
c for c in target.candidates
|
|
591
|
+
if c.outcome is SpliceOutcome.NORMAL_SPLICING
|
|
592
|
+
)
|
|
593
|
+
assert normal.has_protein is True
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def test_has_protein_is_false_for_stub_candidates():
|
|
597
|
+
variant = Variant("7", 117531115, "G", "A", ensembl_grch38)
|
|
598
|
+
transcript = ensembl_grch38.transcript_by_id(CFTR_TRANSCRIPT_ID)
|
|
599
|
+
effects = variant.effects(splice_outcomes=True)
|
|
600
|
+
target = next(e for e in effects if e.transcript is transcript)
|
|
601
|
+
intron = next(
|
|
602
|
+
c for c in target.candidates
|
|
603
|
+
if c.outcome is SpliceOutcome.INTRON_RETENTION
|
|
604
|
+
)
|
|
605
|
+
assert intron.has_protein is False
|