topiary 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tests/__init__.py ADDED
File without changes
tests/common.py ADDED
@@ -0,0 +1,13 @@
1
+ from contextlib import contextmanager
2
+
3
+ import pytest
4
+
5
+
6
+ def eq_(x, y):
7
+ assert x == y, "Expected %s == %s" % (x, y)
8
+
9
+
10
+ @contextmanager
11
+ def assert_raises(e_expected):
12
+ with pytest.raises(e_expected):
13
+ yield
tests/data.py ADDED
@@ -0,0 +1,56 @@
1
+ # Copyright (c) 2015. Mount Sinai School of Medicine
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Helper functions and shared datasets for tests
17
+ """
18
+
19
+
20
+ from __future__ import print_function, division, absolute_import
21
+ import os
22
+
23
+ from varcode import Variant, VariantCollection
24
+ from pyensembl import ensembl_grch38
25
+
26
+ def data_path(name):
27
+ """
28
+ Return the absolute path to a file in the varcode/test/data directory.
29
+ The name specified should be relative to varcode/test/data.
30
+ """
31
+ return os.path.join(os.path.dirname(__file__), "data", name)
32
+
33
+ # BRAF variant coordinates from COSMIC entry:
34
+ # http://cancer.sanger.ac.uk/cosmic/mutation/overview?id=476
35
+ braf_V600E_variant = Variant(7, 140753336, "A", "T", ensembl_grch38)
36
+
37
+ # TP53 variant coordinates from COSMIC entry:
38
+ # http://cancer.sanger.ac.uk/cosmic/mutation/overview?id=10656
39
+ tp53_R248W_variant = Variant(17, 7674221, "G", "A", ensembl_grch38)
40
+
41
+ cancer_test_variants = VariantCollection([
42
+ braf_V600E_variant,
43
+ tp53_R248W_variant
44
+ ])
45
+
46
+ cancer_test_variant_gene_ids = {
47
+ gene_id
48
+ for v in cancer_test_variants
49
+ for gene_id in v.gene_ids
50
+ }
51
+
52
+ cancer_test_variant_transcript_ids = {
53
+ transcript_id
54
+ for v in cancer_test_variants
55
+ for transcript_id in v.transcript_ids
56
+ }
@@ -0,0 +1,38 @@
1
+ from topiary.cli.args import arg_parser
2
+ from topiary.cli.outputs import write_outputs
3
+ import tempfile
4
+ import pandas as pd
5
+
6
+ from .common import eq_
7
+
8
+
9
+ def test_write_outputs():
10
+
11
+ with tempfile.NamedTemporaryFile(mode="r+", delete=False) as f:
12
+ df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
13
+ args = arg_parser.parse_args(
14
+ [
15
+ "--output-csv",
16
+ f.name,
17
+ "--subset-output-columns",
18
+ "x",
19
+ "--rename-output-column",
20
+ "x",
21
+ "X",
22
+ "--mhc-predictor",
23
+ "random",
24
+ "--mhc-alleles",
25
+ "A0201",
26
+ ]
27
+ )
28
+
29
+ write_outputs(
30
+ df, args, print_df_before_filtering=True, print_df_after_filtering=True
31
+ )
32
+ print("File: %s" % f.name)
33
+ df_from_file = pd.read_csv(f.name, index_col="#")
34
+
35
+ df_expected = pd.DataFrame({"X": [1, 2, 3]})
36
+ print(df_from_file)
37
+ eq_(len(df_expected), len(df_from_file))
38
+ assert (df_expected == df_from_file).all().all()
@@ -0,0 +1,64 @@
1
+ from topiary.cli.protein_changes import protein_change_effects_from_args
2
+ from topiary.cli.args import create_arg_parser
3
+
4
+ from .common import eq_
5
+
6
+ arg_parser = create_arg_parser(mhc=False, rna=False, output=False)
7
+
8
+
9
+ def test_protein_change_effects_from_args_substitutions():
10
+ args = arg_parser.parse_args(
11
+ [
12
+ "--protein-change",
13
+ "EGFR",
14
+ "T790M",
15
+ "--genome",
16
+ "grch37",
17
+ ]
18
+ )
19
+
20
+ effects = protein_change_effects_from_args(args)
21
+ eq_(len(effects), 1)
22
+ effect = effects[0]
23
+ eq_(effect.aa_ref, "T")
24
+ eq_(effect.aa_mutation_start_offset, 789)
25
+ eq_(effect.aa_alt, "M")
26
+
27
+ transcript = effect.transcript
28
+ eq_(transcript.name, "EGFR-001")
29
+
30
+
31
+ def test_protein_change_effects_from_args_malformed_missing_ref():
32
+
33
+ args = arg_parser.parse_args(
34
+ ["--protein-change", "EGFR", "790M", "--genome", "grch37"]
35
+ )
36
+
37
+ effects = protein_change_effects_from_args(args)
38
+ eq_(len(effects), 0)
39
+
40
+
41
+ def test_protein_change_effects_from_args_malformed_missing_alt():
42
+ args = arg_parser.parse_args(
43
+ ["--protein-change", "EGFR", "T790", "--genome", "grch37"]
44
+ )
45
+ effects = protein_change_effects_from_args(args)
46
+ eq_(len(effects), 0)
47
+
48
+
49
+ def test_protein_change_effects_from_args_multiple_effects():
50
+ args = arg_parser.parse_args(
51
+ [
52
+ "--protein-change",
53
+ "EGFR",
54
+ "T790M",
55
+ "--protein-change",
56
+ "KRAS",
57
+ "G10D",
58
+ "--genome",
59
+ "grch37",
60
+ ]
61
+ )
62
+ effects = protein_change_effects_from_args(args)
63
+ print(effects)
64
+ eq_(len(effects), 2)
@@ -0,0 +1,95 @@
1
+ from topiary import contains_mutant_residues
2
+
3
+ from .common import eq_
4
+
5
+
6
+ def test_contains_mutant_residues_before():
7
+ eq_(
8
+ contains_mutant_residues(
9
+ peptide_start_in_protein=10,
10
+ peptide_length=9,
11
+ mutation_start_in_protein=5,
12
+ mutation_end_in_protein=6,
13
+ ),
14
+ False,
15
+ )
16
+
17
+
18
+ def test_contains_mutant_residues_after():
19
+ eq_(
20
+ contains_mutant_residues(
21
+ peptide_start_in_protein=10,
22
+ peptide_length=9,
23
+ mutation_start_in_protein=25,
24
+ mutation_end_in_protein=26,
25
+ ),
26
+ False,
27
+ )
28
+
29
+
30
+ def test_contains_mutant_residues_inside():
31
+ eq_(
32
+ contains_mutant_residues(
33
+ peptide_start_in_protein=10,
34
+ peptide_length=9,
35
+ mutation_start_in_protein=12,
36
+ mutation_end_in_protein=13,
37
+ ),
38
+ True,
39
+ )
40
+
41
+
42
+ def test_contains_mutant_residues_deletion_before_beginning():
43
+ # peptide only contains the residue *after* the mutation
44
+ # so it still looks like it's wildtype
45
+ eq_(
46
+ contains_mutant_residues(
47
+ peptide_start_in_protein=10,
48
+ peptide_length=9,
49
+ mutation_start_in_protein=10,
50
+ mutation_end_in_protein=10,
51
+ ),
52
+ False,
53
+ )
54
+
55
+
56
+ def test_contains_mutant_residues_deletion_at_beginning():
57
+ # peptide contains mutation before *and* after mutation so
58
+ # it should count as having a mutant juxtaposition of residues
59
+ eq_(
60
+ contains_mutant_residues(
61
+ peptide_start_in_protein=10,
62
+ peptide_length=9,
63
+ mutation_start_in_protein=11,
64
+ mutation_end_in_protein=11,
65
+ ),
66
+ True,
67
+ )
68
+
69
+
70
+ def test_contains_mutant_residues_deletion_after_end():
71
+ # peptide only contains the residue *before* the mutation
72
+ # so it still looks like it's wildtype
73
+ eq_(
74
+ contains_mutant_residues(
75
+ peptide_start_in_protein=10,
76
+ peptide_length=9,
77
+ mutation_start_in_protein=19,
78
+ mutation_end_in_protein=19,
79
+ ),
80
+ False,
81
+ )
82
+
83
+
84
+ def test_contains_mutant_residues_deletion_at_end():
85
+ # peptide contains mutation before *and* after mutation so
86
+ # it should count as having a mutant juxtaposition of residues
87
+ eq_(
88
+ contains_mutant_residues(
89
+ peptide_start_in_protein=10,
90
+ peptide_length=9,
91
+ mutation_start_in_protein=18,
92
+ mutation_end_in_protein=18,
93
+ ),
94
+ True,
95
+ )
@@ -0,0 +1,52 @@
1
+ from mhctools import NetMHC
2
+ from topiary import TopiaryPredictor
3
+ from .data import cancer_test_variants
4
+
5
+ alleles = [
6
+ "A02:01",
7
+ "B*07:02",
8
+ "HLA-C*07:02",
9
+ ]
10
+
11
+ mhc_model = NetMHC(alleles=alleles, default_peptide_lengths=[8, 9, 10])
12
+
13
+ DEFAULT_FPKM = 1.0
14
+
15
+
16
+ def test_epitopes_to_dataframe_transcript_expression():
17
+ predictor = TopiaryPredictor(mhc_model=mhc_model, only_novel_epitopes=False)
18
+ df = predictor.predict_from_variants(
19
+ variants=cancer_test_variants,
20
+ transcript_expression_dict={
21
+ transcript_id: DEFAULT_FPKM
22
+ for variant in cancer_test_variants
23
+ for transcript_id in variant.transcript_ids
24
+ },
25
+ )
26
+
27
+ assert (
28
+ "transcript_expression" in df.columns
29
+ ), "transcript_expression missing from %s" % (df.columns,)
30
+ assert (
31
+ df["transcript_expression"] == DEFAULT_FPKM
32
+ ).all(), "Invalid FPKM values in DataFrame transcript_expression column"
33
+
34
+
35
+ def test_epitopes_to_dataframe_gene_expression():
36
+ predictor = TopiaryPredictor(mhc_model=mhc_model, only_novel_epitopes=False)
37
+
38
+ df = predictor.predict_from_variants(
39
+ variants=cancer_test_variants,
40
+ gene_expression_dict={
41
+ gene_id: DEFAULT_FPKM
42
+ for variant in cancer_test_variants
43
+ for gene_id in variant.gene_ids
44
+ },
45
+ )
46
+
47
+ assert "gene_expression" in df.columns, "gene_expression missing from %s" % (
48
+ df.columns,
49
+ )
50
+ assert (
51
+ df["gene_expression"] == DEFAULT_FPKM
52
+ ).all(), "Invalid FPKM values in DataFrame gene_expression column"
@@ -0,0 +1,100 @@
1
+ from .data import (
2
+ cancer_test_variants,
3
+ cancer_test_variant_gene_ids,
4
+ cancer_test_variant_transcript_ids,
5
+ )
6
+ from topiary.filters import apply_effect_expression_filters
7
+
8
+ cancer_test_effects = cancer_test_variants.effects()
9
+
10
+ DEFAULT_FPKM = 1.0
11
+
12
+ # associate every gene ID with 1.0 FPKM
13
+ gene_expression_dict = {
14
+ gene_id: DEFAULT_FPKM for gene_id in cancer_test_variant_gene_ids
15
+ }
16
+
17
+ # associate every transcript with 1.0 FPKM
18
+ transcript_expression_dict = {
19
+ transcript_id: DEFAULT_FPKM for transcript_id in cancer_test_variant_transcript_ids
20
+ }
21
+
22
+
23
+ def test_apply_effect_gene_expression_below_threshold():
24
+ filtered = apply_effect_expression_filters(
25
+ cancer_test_effects,
26
+ gene_expression_dict=gene_expression_dict,
27
+ gene_expression_threshold=2 * DEFAULT_FPKM,
28
+ transcript_expression_dict=None,
29
+ transcript_expression_threshold=None,
30
+ )
31
+ assert (
32
+ len(filtered) == 0
33
+ ), "All variants should have been filtered out but got: %s" % (filtered,)
34
+
35
+
36
+ def test_apply_effect_gene_expression_above_threshold():
37
+ filtered = apply_effect_expression_filters(
38
+ cancer_test_effects,
39
+ gene_expression_dict=gene_expression_dict,
40
+ gene_expression_threshold=0.5 * DEFAULT_FPKM,
41
+ transcript_expression_dict=None,
42
+ transcript_expression_threshold=None,
43
+ )
44
+ assert len(filtered) == len(
45
+ cancer_test_effects
46
+ ), "Expected %s effects but got %s" % (len(cancer_test_effects), len(filtered))
47
+
48
+
49
+ def test_apply_effect_gene_expression_equal_threshold():
50
+ # expect genes with expression at threshold to NOT get filtered
51
+ filtered = apply_effect_expression_filters(
52
+ cancer_test_effects,
53
+ gene_expression_dict=gene_expression_dict,
54
+ gene_expression_threshold=DEFAULT_FPKM,
55
+ transcript_expression_dict=None,
56
+ transcript_expression_threshold=None,
57
+ )
58
+ assert len(filtered) == len(
59
+ cancer_test_effects
60
+ ), "Expected %s effects but got %s" % (len(cancer_test_effects), len(filtered))
61
+
62
+
63
+ def test_apply_effect_transcript_expression_below_threshold():
64
+ filtered = apply_effect_expression_filters(
65
+ cancer_test_effects,
66
+ gene_expression_dict=None,
67
+ gene_expression_threshold=None,
68
+ transcript_expression_dict=transcript_expression_dict,
69
+ transcript_expression_threshold=2 * DEFAULT_FPKM,
70
+ )
71
+ assert (
72
+ len(filtered) == 0
73
+ ), "All effects should have been filtered out but got: %s" % (filtered,)
74
+
75
+
76
+ def test_apply_effect_transcript_expression_above_threshold():
77
+ filtered = apply_effect_expression_filters(
78
+ cancer_test_effects,
79
+ gene_expression_dict=None,
80
+ gene_expression_threshold=None,
81
+ transcript_expression_dict=transcript_expression_dict,
82
+ transcript_expression_threshold=0.5 * DEFAULT_FPKM,
83
+ )
84
+ assert len(filtered) == len(
85
+ cancer_test_effects
86
+ ), "Expected %s effects but got %s" % (len(cancer_test_effects), len(filtered))
87
+
88
+
89
+ def test_apply_effect_transcript_expression_equal_threshold():
90
+ # expect transcripts with expression at threshold to NOT be filtered
91
+ filtered = apply_effect_expression_filters(
92
+ cancer_test_effects,
93
+ gene_expression_dict=None,
94
+ gene_expression_threshold=None,
95
+ transcript_expression_dict=transcript_expression_dict,
96
+ transcript_expression_threshold=DEFAULT_FPKM,
97
+ )
98
+ assert len(filtered) == len(
99
+ cancer_test_effects
100
+ ), "Expected %s effects but got %s" % (len(cancer_test_effects), len(filtered))
@@ -0,0 +1,35 @@
1
+ from topiary.cli.args import arg_parser, predict_epitopes_from_args
2
+
3
+ from .common import eq_
4
+ from .data import cancer_test_variants
5
+
6
+
7
+ def test_cancer_epitopes_from_args():
8
+ epitope_lengths = [9, 10]
9
+ alleles = ["HLA-A*02:01", "C0701"]
10
+ args_list = [
11
+ "--mhc-predictor",
12
+ "netmhc",
13
+ "--mhc-epitope-lengths",
14
+ ",".join(str(x) for x in epitope_lengths),
15
+ "--mhc-alleles",
16
+ ",".join(alleles),
17
+ "--genome",
18
+ "GRCh38",
19
+ "--only-novel-epitopes",
20
+ ]
21
+ for variant in cancer_test_variants:
22
+ args_list.append("--variant")
23
+ args_list.append(str(variant.contig))
24
+ args_list.append(str(variant.start))
25
+ args_list.append(variant.ref)
26
+ args_list.append(variant.alt)
27
+
28
+ parsed_args = arg_parser.parse_args(args_list)
29
+ epitope_predictions = predict_epitopes_from_args(parsed_args)
30
+ expected_number_of_epitopes = 0
31
+ for epitope_length in epitope_lengths:
32
+ expected_number_of_epitopes += (
33
+ epitope_length * len(cancer_test_variants) * len(alleles)
34
+ )
35
+ eq_(len(epitope_predictions), expected_number_of_epitopes)
@@ -0,0 +1,107 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """
14
+ test_cufflinks : Test that we can correctly load Cufflinks tracking files which
15
+ contain the estimated expression levels of genes and isoforms (computed from
16
+ RNA-Seq reads).
17
+ """
18
+
19
+
20
+ from __future__ import print_function, division, absolute_import
21
+
22
+ from topiary.rna import load_cufflinks_dataframe
23
+
24
+ from .common import eq_
25
+ from .data import data_path
26
+
27
+
28
+ def test_load_cufflinks_genes():
29
+ genes_df = load_cufflinks_dataframe(
30
+ data_path("genes.fpkm_tracking"),
31
+ drop_lowdata=True,
32
+ drop_hidata=True,
33
+ drop_failed=True,
34
+ drop_novel=False,
35
+ )
36
+ gene_ids = set(genes_df.id)
37
+ expected_gene_ids = {
38
+ "ENSG00000240361",
39
+ "ENSG00000268020",
40
+ "ENSG00000186092",
41
+ "ENSG00000269308",
42
+ "CUFF.1",
43
+ "CUFF.2",
44
+ "CUFF.3",
45
+ "CUFF.4",
46
+ "CUFF.5",
47
+ }
48
+ eq_(gene_ids, expected_gene_ids)
49
+
50
+
51
+ def test_load_cufflinks_genes_drop_novel():
52
+ genes_df = load_cufflinks_dataframe(
53
+ data_path("genes.fpkm_tracking"),
54
+ drop_lowdata=True,
55
+ drop_hidata=True,
56
+ drop_failed=True,
57
+ drop_novel=True,
58
+ )
59
+ gene_ids = set(genes_df.id)
60
+ expected_gene_ids = {
61
+ "ENSG00000240361",
62
+ "ENSG00000268020",
63
+ "ENSG00000186092",
64
+ "ENSG00000269308",
65
+ }
66
+ eq_(gene_ids, expected_gene_ids)
67
+
68
+
69
+ def test_load_cufflinks_isoforms():
70
+ transcripts_df = load_cufflinks_dataframe(
71
+ data_path("isoforms.fpkm_tracking"),
72
+ drop_lowdata=True,
73
+ drop_hidata=True,
74
+ drop_failed=True,
75
+ drop_novel=False,
76
+ )
77
+ transcript_ids = set(transcripts_df.id)
78
+ expected_transcript_ids = {
79
+ "ENST00000492842",
80
+ "ENST00000594647",
81
+ "ENST00000335137",
82
+ "ENST00000417324",
83
+ "ENST00000461467",
84
+ "ENST00000518655",
85
+ "CUFF.7604.1",
86
+ }
87
+ eq_(transcript_ids, expected_transcript_ids)
88
+
89
+
90
+ def test_load_cufflinks_isoforms_drop_novel():
91
+ transcripts_df = load_cufflinks_dataframe(
92
+ data_path("isoforms.fpkm_tracking"),
93
+ drop_lowdata=True,
94
+ drop_hidata=True,
95
+ drop_failed=True,
96
+ drop_novel=True,
97
+ )
98
+ transcript_ids = set(transcripts_df.id)
99
+ expected_transcript_ids = {
100
+ "ENST00000492842",
101
+ "ENST00000594647",
102
+ "ENST00000335137",
103
+ "ENST00000417324",
104
+ "ENST00000461467",
105
+ "ENST00000518655",
106
+ }
107
+ eq_(transcript_ids, expected_transcript_ids)
@@ -0,0 +1,20 @@
1
+ from topiary.rna import load_transcript_fpkm_dict_from_gtf
2
+
3
+ from .common import eq_
4
+ from .data import data_path
5
+
6
+
7
+ def test_load_stringtie_gtf_transcripts():
8
+ transcript_fpkms = load_transcript_fpkm_dict_from_gtf(
9
+ data_path("B16-StringTie-chr1-subset.gtf")
10
+ )
11
+ transcript_ids = set(transcript_fpkms.keys())
12
+ expected_fpkms_dict = {
13
+ "ENSMUST00000192505": 0.125126,
14
+ "ENSMUST00000191939": 0.680062,
15
+ "ENSMUST00000182774": 0.054028,
16
+ }
17
+ expected_transcript_ids = set(expected_fpkms_dict.keys())
18
+ eq_(expected_transcript_ids, transcript_ids)
19
+ for transcript_id, fpkm in expected_fpkms_dict.items():
20
+ eq_(fpkm, transcript_fpkms[transcript_id])
@@ -0,0 +1,73 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ import pytest
14
+ from pyensembl import ensembl_grch37
15
+ from topiary import TopiaryPredictor
16
+ from varcode import Variant, VariantCollection
17
+
18
+ from .common import eq_
19
+
20
+ try:
21
+ from mhctools import NetMHCpan
22
+
23
+ mhc_model = NetMHCpan(
24
+ alleles=["A02:01", "a0204", "B*07:02", "HLA-B14:02", "HLA-C*07:02", "hla-c07:01"],
25
+ default_peptide_lengths=[9],
26
+ )
27
+ HAS_NETMHC = True
28
+ except Exception:
29
+ mhc_model = None
30
+ HAS_NETMHC = False
31
+
32
+ pytestmark = pytest.mark.skipif(not HAS_NETMHC, reason="NetMHCpan not installed")
33
+
34
+ # TODO: find out about these variants,
35
+ # what do we expect from them?
36
+ variants = VariantCollection(
37
+ [
38
+ Variant(contig=10, start=100018900, ref="C", alt="T", ensembl=ensembl_grch37),
39
+ Variant(contig=11, start=32861682, ref="G", alt="A", ensembl=ensembl_grch37),
40
+ ]
41
+ )
42
+
43
+
44
+ def test_epitope_prediction_without_padding():
45
+ output_without_padding = TopiaryPredictor(
46
+ mhc_model=mhc_model, only_novel_epitopes=True
47
+ ).predict_from_variants(variants=variants)
48
+ # one prediction for each variant * number of alleles
49
+ strong_binders = output_without_padding[output_without_padding.affinity <= 500]
50
+ eq_(len(strong_binders), 5)
51
+
52
+
53
+ def test_epitope_prediction_with_invalid_padding():
54
+ with pytest.raises(ValueError):
55
+ TopiaryPredictor(
56
+ mhc_model=mhc_model, padding_around_mutation=7
57
+ ).predict_from_variants(variants=variants)
58
+
59
+
60
+ def test_epitope_prediction_with_invalid_zero_padding():
61
+ with pytest.raises(ValueError):
62
+ TopiaryPredictor(
63
+ mhc_model=mhc_model, padding_around_mutation=7
64
+ ).predict_from_variants(variants=variants)
65
+
66
+
67
+ def test_epitope_prediction_with_valid_padding():
68
+ predictor = TopiaryPredictor(
69
+ mhc_model=mhc_model, padding_around_mutation=8, only_novel_epitopes=True
70
+ )
71
+ output_with_padding = predictor.predict_from_variants(variants=variants)
72
+ # 6 alleles * 2 mutations * 9 distinct windows = 108
73
+ eq_(len(output_with_padding), 108)