varcode 2.2.0__tar.gz → 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {varcode-2.2.0/varcode.egg-info → varcode-2.2.1}/PKG-INFO +1 -1
  2. varcode-2.2.1/tests/test_reference_mismatch_error.py +103 -0
  3. {varcode-2.2.0 → varcode-2.2.1}/varcode/__init__.py +6 -2
  4. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_prediction.py +10 -11
  5. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_prediction_coding.py +9 -7
  6. varcode-2.2.1/varcode/errors.py +69 -0
  7. varcode-2.2.1/varcode/version.py +1 -0
  8. {varcode-2.2.0 → varcode-2.2.1/varcode.egg-info}/PKG-INFO +1 -1
  9. {varcode-2.2.0 → varcode-2.2.1}/varcode.egg-info/SOURCES.txt +2 -0
  10. varcode-2.2.0/varcode/version.py +0 -1
  11. {varcode-2.2.0 → varcode-2.2.1}/LICENSE +0 -0
  12. {varcode-2.2.0 → varcode-2.2.1}/MANIFEST.in +0 -0
  13. {varcode-2.2.0 → varcode-2.2.1}/README.md +0 -0
  14. {varcode-2.2.0 → varcode-2.2.1}/pyproject.toml +0 -0
  15. {varcode-2.2.0 → varcode-2.2.1}/requirements.txt +0 -0
  16. {varcode-2.2.0 → varcode-2.2.1}/setup.cfg +0 -0
  17. {varcode-2.2.0 → varcode-2.2.1}/tests/__init__.py +0 -0
  18. {varcode-2.2.0 → varcode-2.2.1}/tests/benchmark_vcf_load.py +0 -0
  19. {varcode-2.2.0 → varcode-2.2.1}/tests/common.py +0 -0
  20. {varcode-2.2.0 → varcode-2.2.1}/tests/data.py +0 -0
  21. {varcode-2.2.0 → varcode-2.2.1}/tests/test_cli_effects.py +0 -0
  22. {varcode-2.2.0 → varcode-2.2.1}/tests/test_cli_genes.py +0 -0
  23. {varcode-2.2.0 → varcode-2.2.1}/tests/test_collection_filtering.py +0 -0
  24. {varcode-2.2.0 → varcode-2.2.1}/tests/test_collection_variants_attr_consistency.py +0 -0
  25. {varcode-2.2.0 → varcode-2.2.1}/tests/test_common.py +0 -0
  26. {varcode-2.2.0 → varcode-2.2.1}/tests/test_cosmic_mutations.py +0 -0
  27. {varcode-2.2.0 → varcode-2.2.1}/tests/test_csv_roundtrip.py +0 -0
  28. {varcode-2.2.0 → varcode-2.2.1}/tests/test_dbnsfp_validation.py +0 -0
  29. {varcode-2.2.0 → varcode-2.2.1}/tests/test_effect_annotation_errors.py +0 -0
  30. {varcode-2.2.0 → varcode-2.2.1}/tests/test_effect_classes.py +0 -0
  31. {varcode-2.2.0 → varcode-2.2.1}/tests/test_effect_collection.py +0 -0
  32. {varcode-2.2.0 → varcode-2.2.1}/tests/test_effect_collection_serialization.py +0 -0
  33. {varcode-2.2.0 → varcode-2.2.1}/tests/test_effect_collection_sort_order.py +0 -0
  34. {varcode-2.2.0 → varcode-2.2.1}/tests/test_effects_from_mutagenix_variants.py +0 -0
  35. {varcode-2.2.0 → varcode-2.2.1}/tests/test_exonic_splice_site.py +0 -0
  36. {varcode-2.2.0 → varcode-2.2.1}/tests/test_frameshift_helpers.py +0 -0
  37. {varcode-2.2.0 → varcode-2.2.1}/tests/test_maf.py +0 -0
  38. {varcode-2.2.0 → varcode-2.2.1}/tests/test_mm10_klf6_frameshift.py +0 -0
  39. {varcode-2.2.0 → varcode-2.2.1}/tests/test_mouse.py +0 -0
  40. {varcode-2.2.0 → varcode-2.2.1}/tests/test_mutate.py +0 -0
  41. {varcode-2.2.0 → varcode-2.2.1}/tests/test_no_duplicate_variants.py +0 -0
  42. {varcode-2.2.0 → varcode-2.2.1}/tests/test_premature_stop_short_description.py +0 -0
  43. {varcode-2.2.0 → varcode-2.2.1}/tests/test_problematic_variants.py +0 -0
  44. {varcode-2.2.0 → varcode-2.2.1}/tests/test_reference.py +0 -0
  45. {varcode-2.2.0 → varcode-2.2.1}/tests/test_silent_aa_pos.py +0 -0
  46. {varcode-2.2.0 → varcode-2.2.1}/tests/test_silent_hgvs_description.py +0 -0
  47. {varcode-2.2.0 → varcode-2.2.1}/tests/test_splice_site_effects.py +0 -0
  48. {varcode-2.2.0 → varcode-2.2.1}/tests/test_stop_codon_classification_bugs.py +0 -0
  49. {varcode-2.2.0 → varcode-2.2.1}/tests/test_string_helpers.py +0 -0
  50. {varcode-2.2.0 → varcode-2.2.1}/tests/test_symbolic_alleles.py +0 -0
  51. {varcode-2.2.0 → varcode-2.2.1}/tests/test_timings.py +0 -0
  52. {varcode-2.2.0 → varcode-2.2.1}/tests/test_variant.py +0 -0
  53. {varcode-2.2.0 → varcode-2.2.1}/tests/test_variant_collection.py +0 -0
  54. {varcode-2.2.0 → varcode-2.2.1}/tests/test_vcf.py +0 -0
  55. {varcode-2.2.0 → varcode-2.2.1}/tests/test_vcf_output.py +0 -0
  56. {varcode-2.2.0 → varcode-2.2.1}/varcode/cli/__init__.py +0 -0
  57. {varcode-2.2.0 → varcode-2.2.1}/varcode/cli/effects_script.py +0 -0
  58. {varcode-2.2.0 → varcode-2.2.1}/varcode/cli/genes_script.py +0 -0
  59. {varcode-2.2.0 → varcode-2.2.1}/varcode/cli/logging.conf +0 -0
  60. {varcode-2.2.0 → varcode-2.2.1}/varcode/cli/variant_args.py +0 -0
  61. {varcode-2.2.0 → varcode-2.2.1}/varcode/cli/version_info.py +0 -0
  62. {varcode-2.2.0 → varcode-2.2.1}/varcode/common.py +0 -0
  63. {varcode-2.2.0 → varcode-2.2.1}/varcode/csv_helpers.py +0 -0
  64. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/__init__.py +0 -0
  65. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/common.py +0 -0
  66. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_classes.py +0 -0
  67. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_collection.py +0 -0
  68. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_helpers.py +0 -0
  69. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_ordering.py +0 -0
  70. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_prediction_coding_frameshift.py +0 -0
  71. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/effect_prediction_coding_in_frame.py +0 -0
  72. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/mutate.py +0 -0
  73. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/transcript_helpers.py +0 -0
  74. {varcode-2.2.0 → varcode-2.2.1}/varcode/effects/translate.py +0 -0
  75. {varcode-2.2.0 → varcode-2.2.1}/varcode/maf.py +0 -0
  76. {varcode-2.2.0 → varcode-2.2.1}/varcode/nucleotides.py +0 -0
  77. {varcode-2.2.0 → varcode-2.2.1}/varcode/reference.py +0 -0
  78. {varcode-2.2.0 → varcode-2.2.1}/varcode/string_helpers.py +0 -0
  79. {varcode-2.2.0 → varcode-2.2.1}/varcode/ucsc_reference_names.py +0 -0
  80. {varcode-2.2.0 → varcode-2.2.1}/varcode/util.py +0 -0
  81. {varcode-2.2.0 → varcode-2.2.1}/varcode/variant.py +0 -0
  82. {varcode-2.2.0 → varcode-2.2.1}/varcode/variant_collection.py +0 -0
  83. {varcode-2.2.0 → varcode-2.2.1}/varcode/vcf.py +0 -0
  84. {varcode-2.2.0 → varcode-2.2.1}/varcode/vcf_output.py +0 -0
  85. {varcode-2.2.0 → varcode-2.2.1}/varcode.egg-info/dependency_links.txt +0 -0
  86. {varcode-2.2.0 → varcode-2.2.1}/varcode.egg-info/entry_points.txt +0 -0
  87. {varcode-2.2.0 → varcode-2.2.1}/varcode.egg-info/requires.txt +0 -0
  88. {varcode-2.2.0 → varcode-2.2.1}/varcode.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: varcode
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Variant annotation in Python
5
5
  Author-email: Alex Rubinsteyn <alex.rubinsteyn@unc.edu>
6
6
  Project-URL: Homepage, https://github.com/openvax/varcode
@@ -0,0 +1,103 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """
14
+ Regression tests for https://github.com/openvax/varcode/issues/215
15
+ (and the duplicate symptom in #246).
16
+
17
+ When a variant's ref allele doesn't match the reference genome at the
18
+ variant's position — typically because the variant was called against
19
+ a different build, the ref field was populated with the patient's
20
+ germline allele, or there's strand confusion — varcode should raise a
21
+ dedicated ``ReferenceMismatchError`` with an actionable message, not
22
+ a generic ``ValueError`` or ``AssertionError``.
23
+ """
24
+
25
+ import pytest
26
+
27
+ import varcode
28
+ from varcode import Variant, ReferenceMismatchError
29
+
30
+
31
+ def _construct_mismatching_variant():
32
+ """Construct a variant whose ref doesn't match the GRCh38 genome.
33
+
34
+ Uses CFTR exon 4 (chr7:117530899-117531114 on GRCh38, + strand)
35
+ where the real genome has specific bases. We claim ref='Z'... well,
36
+ varcode rejects unknown nucleotides, so instead we use a valid
37
+ base that doesn't match the genome at that position.
38
+ """
39
+ # chr7:117531114 on GRCh38 is G (last base of CFTR exon 4). Claim
40
+ # the variant has ref='T' (which is wrong). This will fail the
41
+ # transcript-vs-variant ref check.
42
+ return Variant("7", 117531114, "T", "A", "GRCh38")
43
+
44
+
45
+ def test_ref_mismatch_raises_reference_mismatch_error():
46
+ variant = _construct_mismatching_variant()
47
+ with pytest.raises(ReferenceMismatchError):
48
+ variant.effects()
49
+
50
+
51
+ def test_reference_mismatch_error_is_value_error_subclass():
52
+ # Keep the existing contract: callers that catch ValueError still
53
+ # see this. (predict_variant_effect_on_transcript_or_failure relies
54
+ # on this for the Failure-effect fallback path.)
55
+ assert issubclass(ReferenceMismatchError, ValueError)
56
+
57
+
58
+ def test_reference_mismatch_error_message_is_actionable():
59
+ variant = _construct_mismatching_variant()
60
+ try:
61
+ variant.effects()
62
+ except ReferenceMismatchError as e:
63
+ msg = str(e)
64
+ # Names the variant so the user can find it.
65
+ assert "117531114" in msg
66
+ # Shows both the expected (genome) and observed (variant) bases.
67
+ assert "'T'" in msg # variant's claimed ref
68
+ assert "'G'" in msg # actual genome base
69
+ # Suggests the most common causes.
70
+ assert "genome build" in msg or "germline" in msg or "strand" in msg
71
+ # Points at the escape hatch.
72
+ assert "raise_on_error=False" in msg
73
+ else:
74
+ raise AssertionError("Expected ReferenceMismatchError")
75
+
76
+
77
+ def test_reference_mismatch_error_carries_structured_fields():
78
+ variant = _construct_mismatching_variant()
79
+ try:
80
+ variant.effects()
81
+ except ReferenceMismatchError as e:
82
+ assert e.variant == variant
83
+ assert e.transcript is not None
84
+ assert e.expected_ref == "G"
85
+ assert e.observed_ref == "T"
86
+ else:
87
+ raise AssertionError("Expected ReferenceMismatchError")
88
+
89
+
90
+ def test_ref_mismatch_with_raise_on_error_false_returns_failure():
91
+ # When the user opts into error suppression, the mismatch should
92
+ # collapse into a Failure effect (the existing contract).
93
+ from varcode.effects import Failure
94
+ variant = _construct_mismatching_variant()
95
+ effects = variant.effects(raise_on_error=False)
96
+ assert any(isinstance(e, Failure) for e in effects), \
97
+ "Expected at least one Failure effect when raise_on_error=False"
98
+
99
+
100
+ def test_reference_mismatch_error_exposed_at_package_level():
101
+ # Users should be able to catch varcode.ReferenceMismatchError
102
+ # without importing from a submodule.
103
+ assert varcode.ReferenceMismatchError is ReferenceMismatchError
@@ -11,6 +11,7 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
 
14
+ from .errors import ReferenceMismatchError
14
15
  from .variant import Variant
15
16
  from .variant_collection import VariantCollection
16
17
  from .maf import load_maf, load_maf_dataframe
@@ -22,10 +23,10 @@ from .effects import (
22
23
  MutationEffect,
23
24
  NonsilentCodingMutation,
24
25
  )
25
- from .version import __version__
26
+ from .version import __version__
26
27
 
27
28
  __all__ = [
28
- "__version__",
29
+ "__version__",
29
30
 
30
31
  # basic classes
31
32
  "Variant",
@@ -38,6 +39,9 @@ __all__ = [
38
39
  "MutationEffect",
39
40
  "NonsilentCodingMutation",
40
41
 
42
+ # exceptions
43
+ "ReferenceMismatchError",
44
+
41
45
  # file loading
42
46
  "load_maf",
43
47
  "load_maf_dataframe",
@@ -16,6 +16,7 @@ from Bio.Seq import reverse_complement
16
16
  from pyensembl import Transcript
17
17
 
18
18
  from ..common import groupby_field
19
+ from ..errors import ReferenceMismatchError
19
20
 
20
21
  from .transcript_helpers import interval_offset_on_transcript
21
22
  from .effect_helpers import changes_exonic_splice_site
@@ -399,17 +400,15 @@ def exonic_transcript_effect(variant, exon, exon_number, transcript):
399
400
  transcript.sequence[transcript_offset:transcript_offset + n_ref])
400
401
 
401
402
  if cdna_ref != expected_ref:
402
- raise ValueError(
403
- ("Found ref nucleotides '%s' in sequence"
404
- " of %s at offset %d (chromosome positions %d:%d)"
405
- " but variant %s has '%s'") % (
406
- expected_ref,
407
- transcript,
408
- transcript_offset,
409
- genome_start,
410
- genome_end,
411
- variant,
412
- cdna_ref))
403
+ raise ReferenceMismatchError(
404
+ variant=variant,
405
+ transcript=transcript,
406
+ expected_ref=expected_ref,
407
+ observed_ref=cdna_ref,
408
+ transcript_offset=transcript_offset,
409
+ genome_start=genome_start,
410
+ genome_end=genome_end,
411
+ )
413
412
 
414
413
  utr5_length = min(transcript.start_codon_spliced_offsets)
415
414
 
@@ -10,6 +10,7 @@
10
10
  # See the License for the specific language governing permissions and
11
11
  # limitations under the License.
12
12
 
13
+ from ..errors import ReferenceMismatchError
13
14
  from .effect_prediction_coding_frameshift import predict_frameshift_coding_effect
14
15
  from .effect_prediction_coding_in_frame import predict_in_frame_coding_effect
15
16
 
@@ -57,13 +58,14 @@ def predict_variant_coding_effect_on_transcript(
57
58
 
58
59
  # Make sure that the reference sequence agrees with what we expected
59
60
  # from the VCF
60
- assert ref_nucleotides_from_transcript == trimmed_cdna_ref, \
61
- "%s: expected ref '%s' at offset %d of %s, transcript has '%s'" % (
62
- variant,
63
- trimmed_cdna_ref,
64
- transcript_offset,
65
- transcript,
66
- ref_nucleotides_from_transcript)
61
+ if ref_nucleotides_from_transcript != trimmed_cdna_ref:
62
+ raise ReferenceMismatchError(
63
+ variant=variant,
64
+ transcript=transcript,
65
+ expected_ref=ref_nucleotides_from_transcript,
66
+ observed_ref=trimmed_cdna_ref,
67
+ transcript_offset=transcript_offset,
68
+ )
67
69
 
68
70
  start_codon_offset = transcript.first_start_codon_spliced_offset
69
71
  stop_codon_offset = transcript.last_stop_codon_spliced_offset
@@ -0,0 +1,69 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """
14
+ Exception types raised by varcode. ``ReferenceMismatchError`` subclasses
15
+ ``ValueError`` for backwards compatibility with callers that catch
16
+ ``ValueError`` already (including the internal
17
+ ``predict_variant_effect_on_transcript_or_failure`` fallback).
18
+ """
19
+
20
+
21
+ class ReferenceMismatchError(ValueError):
22
+ """Raised when a variant's reported ref allele does not match the
23
+ reference genome at the variant's position.
24
+
25
+ This most often means one of:
26
+
27
+ * The variant was called against a different reference build than
28
+ the one being used for annotation (e.g. GRCh37 vs GRCh38).
29
+ * The variant's ref field was populated with the patient's germline
30
+ allele rather than the canonical reference. VCF requires the
31
+ ref field to match the reference genome; germline variants at
32
+ the same position should be encoded as separate variants.
33
+ * Strand confusion: the variant is specified on the negative
34
+ strand but varcode expects positive-strand coordinates.
35
+
36
+ Callers who would rather continue past this error can pass
37
+ ``raise_on_error=False`` to :meth:`Variant.effects` to receive
38
+ ``Failure`` effects instead.
39
+ """
40
+
41
+ def __init__(self, variant, transcript, expected_ref, observed_ref,
42
+ transcript_offset=None, genome_start=None, genome_end=None):
43
+ self.variant = variant
44
+ self.transcript = transcript
45
+ self.expected_ref = expected_ref
46
+ self.observed_ref = observed_ref
47
+ self.transcript_offset = transcript_offset
48
+ self.genome_start = genome_start
49
+ self.genome_end = genome_end
50
+
51
+ location = ""
52
+ if transcript_offset is not None:
53
+ location = " at transcript offset %d" % transcript_offset
54
+ if genome_start is not None and genome_end is not None:
55
+ location += " (chromosome positions %d:%d)" % (
56
+ genome_start, genome_end)
57
+
58
+ message = (
59
+ "Reference allele mismatch for %s on %s%s: variant reports "
60
+ "ref=%r but the reference genome has %r at this position.\n"
61
+ "This usually means the variant was called against a "
62
+ "different genome build, the ref field was filled in with "
63
+ "the patient's germline allele rather than the reference, "
64
+ "or the variant is on the wrong strand. Pass "
65
+ "raise_on_error=False to .effects() to receive a Failure "
66
+ "effect instead of raising." % (
67
+ variant, transcript, location, observed_ref, expected_ref)
68
+ )
69
+ super().__init__(message)
@@ -0,0 +1 @@
1
+ __version__ = "2.2.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: varcode
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Variant annotation in Python
5
5
  Author-email: Alex Rubinsteyn <alex.rubinsteyn@unc.edu>
6
6
  Project-URL: Homepage, https://github.com/openvax/varcode
@@ -31,6 +31,7 @@ tests/test_no_duplicate_variants.py
31
31
  tests/test_premature_stop_short_description.py
32
32
  tests/test_problematic_variants.py
33
33
  tests/test_reference.py
34
+ tests/test_reference_mismatch_error.py
34
35
  tests/test_silent_aa_pos.py
35
36
  tests/test_silent_hgvs_description.py
36
37
  tests/test_splice_site_effects.py
@@ -45,6 +46,7 @@ tests/test_vcf_output.py
45
46
  varcode/__init__.py
46
47
  varcode/common.py
47
48
  varcode/csv_helpers.py
49
+ varcode/errors.py
48
50
  varcode/maf.py
49
51
  varcode/nucleotides.py
50
52
  varcode/reference.py
@@ -1 +0,0 @@
1
- __version__ = "2.2.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes