snplib 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snplib/__init__.py +8 -8
- snplib/finalreport/__init__.py +7 -7
- snplib/finalreport/_finalreport.py +251 -251
- snplib/format/__init__.py +19 -19
- snplib/format/__settings.py +7 -7
- snplib/format/_plink.py +291 -305
- snplib/format/_snp.py +113 -113
- snplib/parentage/__init__.py +15 -15
- snplib/parentage/_discov.py +102 -102
- snplib/parentage/_isagmark.py +15 -15
- snplib/parentage/_verif.py +91 -91
- snplib/parentage/isag_disc.pl +0 -0
- snplib/parentage/isag_verif.pl +0 -0
- snplib/statistics/__init__.py +16 -16
- snplib/statistics/_callrate.py +60 -59
- snplib/statistics/_freq.py +67 -67
- snplib/statistics/_snphwe.py +132 -132
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/LICENSE +674 -674
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/METADATA +80 -97
- snplib-1.0.9.dist-info/RECORD +22 -0
- snplib/finalreport/tests/__init__.py +0 -7
- snplib/finalreport/tests/test_finalreport.py +0 -215
- snplib/format/tests/__init__.py +0 -7
- snplib/format/tests/test_plink_fam.py +0 -121
- snplib/format/tests/test_plink_lgen.py +0 -106
- snplib/format/tests/test_plink_map.py +0 -42
- snplib/format/tests/test_plink_ped.py +0 -136
- snplib/format/tests/test_snp.py +0 -128
- snplib/parentage/tests/__init__.py +0 -7
- snplib/parentage/tests/test_discov.py +0 -164
- snplib/parentage/tests/test_verif.py +0 -160
- snplib/statistics/tests/__init__.py +0 -7
- snplib/statistics/tests/test_callrate.py +0 -171
- snplib/statistics/tests/test_freq_allele.py +0 -87
- snplib/statistics/tests/test_freq_maf.py +0 -17
- snplib/statistics/tests/test_hwe_t.py +0 -41
- snplib/statistics/tests/test_snphwe.py +0 -41
- snplib-1.0.7.dist-info/RECORD +0 -37
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/WHEEL +0 -0
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/top_level.txt +0 -0
@@ -1,42 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_FILES
|
6
|
-
from .. import make_map
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data_map() -> pd.DataFrame:
|
14
|
-
return pd.read_csv(DIR_FILES / "fplink/map/file_bovinesnp50.csv")
|
15
|
-
|
16
|
-
|
17
|
-
class TestPlinkFormatMap(object):
|
18
|
-
|
19
|
-
def test_map_true(self, data_map) -> None:
|
20
|
-
|
21
|
-
res = make_map(data_map)
|
22
|
-
assert not res.empty
|
23
|
-
|
24
|
-
def test_map_raise(self, data_map) -> None:
|
25
|
-
with pytest.raises(
|
26
|
-
KeyError, match="Manifest has no data to build map format!"
|
27
|
-
):
|
28
|
-
make_map(data_map)
|
29
|
-
make_map(pd.DataFrame())
|
30
|
-
make_map(
|
31
|
-
pd.DataFrame(columns=['Chr', 'Name', 'MapInfo', 'morgans'])
|
32
|
-
)
|
33
|
-
|
34
|
-
with pytest.raises(
|
35
|
-
KeyError, match="Manifest has no data to build map format!"
|
36
|
-
):
|
37
|
-
make_map(pd.DataFrame())
|
38
|
-
|
39
|
-
def test_map_empty(self) -> None:
|
40
|
-
assert make_map(
|
41
|
-
pd.DataFrame(columns=['Chr', 'Name', 'MapInfo', 'morgans'])
|
42
|
-
).empty
|
@@ -1,136 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_FILES
|
6
|
-
from .. import make_ped
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data_ped(request) -> pd.DataFrame | None:
|
14
|
-
return pd.read_pickle(DIR_FILES / f"fplink/ped/{request.param}")
|
15
|
-
|
16
|
-
|
17
|
-
class TestPlinkFormatPed(object):
|
18
|
-
|
19
|
-
@pytest.mark.parametrize("data_ped", ["file.pl"], indirect=True)
|
20
|
-
def test_ped_true(self, data_ped: pd.DataFrame) -> None:
|
21
|
-
assert not make_ped(
|
22
|
-
data_ped,
|
23
|
-
"SAMPLE_ID",
|
24
|
-
"SNP",
|
25
|
-
fid_col="SAMPLE_ID"
|
26
|
-
).empty
|
27
|
-
|
28
|
-
assert not make_ped(
|
29
|
-
data_ped,
|
30
|
-
"SAMPLE_ID",
|
31
|
-
"SNP"
|
32
|
-
).empty
|
33
|
-
|
34
|
-
def test_ped_empty(self) -> None:
|
35
|
-
assert make_ped(
|
36
|
-
pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
|
37
|
-
"SAMPLE_ID",
|
38
|
-
"SNP"
|
39
|
-
).empty
|
40
|
-
|
41
|
-
assert make_ped(
|
42
|
-
pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
|
43
|
-
"SAMPLE_ID",
|
44
|
-
"SNP",
|
45
|
-
fid_col="SAMPLE_ID"
|
46
|
-
).empty
|
47
|
-
|
48
|
-
@pytest.mark.parametrize("data_ped", ["file.pl"], indirect=True)
|
49
|
-
def test_ped_raise_columns(self, data_ped: pd.DataFrame) -> None:
|
50
|
-
# SID_COL
|
51
|
-
with pytest.raises(
|
52
|
-
KeyError, match="Data has not in name columns!"
|
53
|
-
):
|
54
|
-
make_ped(
|
55
|
-
data=data_ped,
|
56
|
-
sid_col="SAMPLE_ID1",
|
57
|
-
fid_col="SAMPLE_ID",
|
58
|
-
snp_col="SNP"
|
59
|
-
)
|
60
|
-
|
61
|
-
# SNP_COL
|
62
|
-
with pytest.raises(
|
63
|
-
KeyError, match="Data has not in name columns!"
|
64
|
-
):
|
65
|
-
make_ped(
|
66
|
-
data_ped,
|
67
|
-
"SAMPLE_ID",
|
68
|
-
"SNP1",
|
69
|
-
fid_col="SAMPLE_ID"
|
70
|
-
)
|
71
|
-
|
72
|
-
# FID_COL
|
73
|
-
with pytest.raises(
|
74
|
-
KeyError, match="Data has not in name columns SAMPLE_ID1!"
|
75
|
-
):
|
76
|
-
make_ped(
|
77
|
-
data_ped,
|
78
|
-
"SAMPLE_ID",
|
79
|
-
"SNP",
|
80
|
-
fid_col="SAMPLE_ID1"
|
81
|
-
)
|
82
|
-
|
83
|
-
@pytest.mark.parametrize("data_ped", ["file2.pl"], indirect=True)
|
84
|
-
def test_ped_raises_underscope_sid(self, data_ped: pd.DataFrame) -> None:
|
85
|
-
|
86
|
-
# SID_COL
|
87
|
-
with pytest.raises(
|
88
|
-
Exception,
|
89
|
-
match="Replace in 'Sample ID' columns '_' on another a simbols"
|
90
|
-
):
|
91
|
-
res = make_ped(
|
92
|
-
data_ped,
|
93
|
-
"SAMPLE_ID",
|
94
|
-
"SNP"
|
95
|
-
)
|
96
|
-
|
97
|
-
@pytest.mark.parametrize("data_ped", ["file3.pl"], indirect=True)
|
98
|
-
def test_ped_raises_underscope_fid(self, data_ped: pd.DataFrame) -> None:
|
99
|
-
|
100
|
-
# FID_COL
|
101
|
-
with pytest.raises(
|
102
|
-
Exception,
|
103
|
-
match="Replace in 'Family ID' columns '_' on another a simbols"
|
104
|
-
):
|
105
|
-
res = make_ped(
|
106
|
-
data_ped,
|
107
|
-
"SAMPLE_ID",
|
108
|
-
"SNP",
|
109
|
-
fid_col="FAMILY_ID"
|
110
|
-
)
|
111
|
-
|
112
|
-
@pytest.mark.parametrize("data_ped", ["file4.pl"], indirect=True)
|
113
|
-
def test_ped_check_data(self, data_ped: pd.DataFrame) -> None:
|
114
|
-
res = make_ped(
|
115
|
-
data_ped,
|
116
|
-
"SAMPLE_ID",
|
117
|
-
"SNP",
|
118
|
-
fid_col="FAMILY_ID",
|
119
|
-
father_col="father",
|
120
|
-
mother_col="mother",
|
121
|
-
sex_col="sex"
|
122
|
-
)
|
123
|
-
|
124
|
-
res2 = make_ped(
|
125
|
-
data_ped,
|
126
|
-
"SAMPLE_ID",
|
127
|
-
"SNP",
|
128
|
-
fid_col="FAMILY_ID",
|
129
|
-
)
|
130
|
-
|
131
|
-
assert all(res.father.values == list('1234'))
|
132
|
-
assert all(res.mother.values == list('5678'))
|
133
|
-
assert all(res.sex.values == list('1210'))
|
134
|
-
assert all(res2.father.values == list('0000'))
|
135
|
-
assert all(res2.mother.values == list('0000'))
|
136
|
-
assert all(res2.sex.values == list('0000'))
|
snplib/format/tests/test_snp.py
DELETED
@@ -1,128 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_FILES
|
6
|
-
from .. import Snp
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data_fr(request) -> pd.DataFrame:
|
14
|
-
return pd.read_csv(DIR_FILES / f"fsnp/{request.param}", sep="\t")
|
15
|
-
|
16
|
-
|
17
|
-
@pytest.fixture
|
18
|
-
def obj_snp(request) -> Snp:
|
19
|
-
return Snp(fmt=request.param)
|
20
|
-
|
21
|
-
|
22
|
-
class TestSNP(object):
|
23
|
-
|
24
|
-
@pytest.mark.parametrize(
|
25
|
-
"obj_snp, data_fr", [("uga", 'file1.txt')], indirect=True
|
26
|
-
)
|
27
|
-
def test_snp_process_uga_true(
|
28
|
-
self, obj_snp: Snp, data_fr: pd.DataFrame
|
29
|
-
) -> None:
|
30
|
-
|
31
|
-
obj_snp.process(data_fr)
|
32
|
-
assert obj_snp.data is not None and not obj_snp.data.empty
|
33
|
-
assert obj_snp.data.SNP.isin([
|
34
|
-
'02011015010000500', '01110152120222512'
|
35
|
-
]).all()
|
36
|
-
|
37
|
-
@pytest.mark.parametrize("obj_snp", ["uga"], indirect=True)
|
38
|
-
def test_snp_process_uga_empty(self, obj_snp: Snp) -> None:
|
39
|
-
|
40
|
-
obj_snp.process(pd.DataFrame(columns=[
|
41
|
-
'SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB',
|
42
|
-
'GC Score', 'GT Score'
|
43
|
-
]))
|
44
|
-
assert obj_snp.data is not None and obj_snp.data.empty
|
45
|
-
|
46
|
-
@pytest.mark.parametrize(
|
47
|
-
"obj_snp, data_fr",
|
48
|
-
[("uga", 'file1.txt'), (None, 'file1.txt')],
|
49
|
-
indirect=True
|
50
|
-
)
|
51
|
-
def test_snp_process_raises(
|
52
|
-
self, obj_snp: Snp, data_fr: pd.DataFrame
|
53
|
-
) -> None:
|
54
|
-
|
55
|
-
with pytest.raises(KeyError):
|
56
|
-
obj_snp.process(pd.DataFrame(columns=[
|
57
|
-
'SNP Name1', 'Sample ID1', 'Allele1 - AB', 'Allele2 - AB',
|
58
|
-
'GC Score', 'GT Score'
|
59
|
-
]))
|
60
|
-
|
61
|
-
assert obj_snp.data is None
|
62
|
-
|
63
|
-
@pytest.mark.parametrize(
|
64
|
-
"obj_snp, data_fr", [(None, 'file1.txt')], indirect=True
|
65
|
-
)
|
66
|
-
def test_snp_process_df(
|
67
|
-
self, obj_snp: Snp, data_fr: pd.DataFrame
|
68
|
-
) -> None:
|
69
|
-
|
70
|
-
obj_snp.process(data_fr)
|
71
|
-
assert obj_snp.data is not None and not obj_snp.data.empty
|
72
|
-
|
73
|
-
@pytest.mark.parametrize("obj_snp", [None], indirect=True)
|
74
|
-
def test_snp_process_df_empty(self, obj_snp: Snp) -> None:
|
75
|
-
|
76
|
-
obj_snp.process(pd.DataFrame(columns=[
|
77
|
-
'SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB',
|
78
|
-
'GC Score', 'GT Score'
|
79
|
-
]))
|
80
|
-
assert obj_snp.data is not None and obj_snp.data.empty
|
81
|
-
|
82
|
-
@pytest.mark.parametrize(
|
83
|
-
"obj_snp, data_fr", [("uga", 'file1.txt')], indirect=True
|
84
|
-
)
|
85
|
-
def test_snp_to_file_uga1(
|
86
|
-
self, obj_snp: Snp, data_fr: pd.DataFrame, tmp_path
|
87
|
-
) -> None:
|
88
|
-
"""
|
89
|
-
The name sample_id is one length
|
90
|
-
"""
|
91
|
-
|
92
|
-
_dir_sub = tmp_path / "sub"
|
93
|
-
_dir_sub.mkdir()
|
94
|
-
_file_save = _dir_sub / "data_snp.csv"
|
95
|
-
|
96
|
-
obj_snp.process(data_fr)
|
97
|
-
assert obj_snp.data is not None and not obj_snp.data.empty
|
98
|
-
|
99
|
-
obj_snp.to_file(_file_save)
|
100
|
-
assert _file_save.is_file() and _file_save.exists()
|
101
|
-
assert (
|
102
|
-
_file_save.read_text() ==
|
103
|
-
"14814 02011015010000500\n14815 01110152120222512\n"
|
104
|
-
)
|
105
|
-
|
106
|
-
@pytest.mark.parametrize(
|
107
|
-
"obj_snp, data_fr", [("uga", 'file2.txt')], indirect=True
|
108
|
-
)
|
109
|
-
def test_snp_to_file_uga2(
|
110
|
-
self, obj_snp: Snp, data_fr: pd.DataFrame, tmp_path
|
111
|
-
) -> None:
|
112
|
-
"""
|
113
|
-
The name sample_id of different length
|
114
|
-
"""
|
115
|
-
|
116
|
-
_dir_sub = tmp_path / "sub"
|
117
|
-
_dir_sub.mkdir()
|
118
|
-
_file_save = _dir_sub / "data_snp.csv"
|
119
|
-
|
120
|
-
obj_snp.process(data_fr)
|
121
|
-
assert obj_snp.data is not None and not obj_snp.data.empty
|
122
|
-
|
123
|
-
obj_snp.to_file(_file_save)
|
124
|
-
assert _file_save.is_file() and _file_save.exists()
|
125
|
-
assert (
|
126
|
-
_file_save.read_text() ==
|
127
|
-
"14814qwert 02011015010000500\n14815 01110152120222512\n"
|
128
|
-
)
|
@@ -1,164 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_DATA
|
6
|
-
from .. import Discovery, isag_disc
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data() -> pd.DataFrame:
|
14
|
-
return pd.read_csv(DIR_DATA / "parentage_test_disc.csv", sep=" ")
|
15
|
-
|
16
|
-
|
17
|
-
@pytest.fixture
|
18
|
-
def obj_discovery() -> Discovery:
|
19
|
-
return Discovery(isag_markers=isag_disc().markers)
|
20
|
-
|
21
|
-
|
22
|
-
class TestDiscovery(object):
|
23
|
-
|
24
|
-
def test_search_parent_successfully(
|
25
|
-
self, data: pd.DataFrame, obj_discovery: Discovery
|
26
|
-
) -> None:
|
27
|
-
|
28
|
-
assert obj_discovery.search_parent(
|
29
|
-
data=data,
|
30
|
-
descendant="BY000041988163",
|
31
|
-
parents="EE10512586",
|
32
|
-
snp_name_col="SNP_Name"
|
33
|
-
) is None
|
34
|
-
assert obj_discovery.num_conflicts == 77
|
35
|
-
assert obj_discovery.status == "Excluded"
|
36
|
-
assert obj_discovery.perc_conflicts == 14.86
|
37
|
-
|
38
|
-
def test_search_parent_1(self, data: pd.DataFrame) -> None:
|
39
|
-
"""
|
40
|
-
An exception is thrown for the absence of data with isag markers
|
41
|
-
"""
|
42
|
-
obj_discovery = Discovery()
|
43
|
-
|
44
|
-
with pytest.raises(
|
45
|
-
ValueError, match="Error. No array of snp names to verify"
|
46
|
-
):
|
47
|
-
obj_discovery.search_parent(
|
48
|
-
data=data,
|
49
|
-
descendant="BY000041988163",
|
50
|
-
parents="EE10512586",
|
51
|
-
snp_name_col="SNP_Name"
|
52
|
-
)
|
53
|
-
assert obj_discovery.status is None
|
54
|
-
assert obj_discovery.num_conflicts is None
|
55
|
-
assert obj_discovery.perc_conflicts is None
|
56
|
-
|
57
|
-
def test_search_parent_2(
|
58
|
-
self, data: pd.DataFrame, obj_discovery: Discovery
|
59
|
-
) -> None:
|
60
|
-
"""
|
61
|
-
Exception when the number of markers required to confirm paternity is
|
62
|
-
less than the established value.
|
63
|
-
"""
|
64
|
-
|
65
|
-
with pytest.raises(
|
66
|
-
Exception, match="Calf call rate is low."
|
67
|
-
):
|
68
|
-
obj_discovery.search_parent(
|
69
|
-
data=data[:-100],
|
70
|
-
descendant="BY000041988163",
|
71
|
-
parents="EE10512586",
|
72
|
-
snp_name_col="SNP_Name"
|
73
|
-
)
|
74
|
-
assert obj_discovery.status is None
|
75
|
-
assert obj_discovery.num_conflicts is None
|
76
|
-
assert obj_discovery.perc_conflicts is None
|
77
|
-
|
78
|
-
def test_search_parent_3(
|
79
|
-
self, data: pd.DataFrame, obj_discovery: Discovery
|
80
|
-
) -> None:
|
81
|
-
"""
|
82
|
-
Test if the transmitted animal names are not in the dataframe.
|
83
|
-
"""
|
84
|
-
|
85
|
-
# For descendant
|
86
|
-
with pytest.raises(KeyError):
|
87
|
-
obj_discovery.search_parent(
|
88
|
-
data=data,
|
89
|
-
descendant="BY00004198816",
|
90
|
-
parents="EE10512586",
|
91
|
-
snp_name_col="SNP_Name"
|
92
|
-
)
|
93
|
-
assert obj_discovery.status is None
|
94
|
-
assert obj_discovery.num_conflicts is None
|
95
|
-
assert obj_discovery.perc_conflicts is None
|
96
|
-
|
97
|
-
# For parents
|
98
|
-
with pytest.raises(KeyError):
|
99
|
-
obj_discovery.search_parent(
|
100
|
-
data=data,
|
101
|
-
descendant="BY000041988163",
|
102
|
-
parents="EE105125864",
|
103
|
-
snp_name_col="SNP_Name"
|
104
|
-
)
|
105
|
-
assert obj_discovery.status is None
|
106
|
-
assert obj_discovery.num_conflicts is None
|
107
|
-
assert obj_discovery.perc_conflicts is None
|
108
|
-
|
109
|
-
def test_search_parent_4(
|
110
|
-
self, data: pd.DataFrame, obj_discovery: Discovery
|
111
|
-
) -> None:
|
112
|
-
"""
|
113
|
-
Test when all snp data is not read - equal to 5.
|
114
|
-
"""
|
115
|
-
data[["BY000041988163", "EE10512586"]] = 5
|
116
|
-
|
117
|
-
with pytest.raises(
|
118
|
-
Exception, match="Calf call rate is low."
|
119
|
-
):
|
120
|
-
obj_discovery.search_parent(
|
121
|
-
data=data,
|
122
|
-
descendant="BY000041988163",
|
123
|
-
parents="EE10512586",
|
124
|
-
snp_name_col="SNP_Name"
|
125
|
-
)
|
126
|
-
assert obj_discovery.status is None
|
127
|
-
assert obj_discovery.num_conflicts is None
|
128
|
-
assert obj_discovery.perc_conflicts is None
|
129
|
-
|
130
|
-
def test_search_parent_5(
|
131
|
-
self, data: pd.DataFrame, obj_discovery: Discovery
|
132
|
-
) -> None:
|
133
|
-
"""
|
134
|
-
Test when there is a complete match.
|
135
|
-
"""
|
136
|
-
data[["BY000041988163", "EE10512586"]] = 2
|
137
|
-
|
138
|
-
obj_discovery.search_parent(
|
139
|
-
data=data,
|
140
|
-
descendant="BY000041988163",
|
141
|
-
parents="EE10512586",
|
142
|
-
snp_name_col="SNP_Name"
|
143
|
-
)
|
144
|
-
assert obj_discovery.status == "Discovered"
|
145
|
-
assert obj_discovery.num_conflicts == 0
|
146
|
-
assert obj_discovery.perc_conflicts == 0.0
|
147
|
-
|
148
|
-
def test_search_parent_6(
|
149
|
-
self, data: pd.DataFrame, obj_discovery: Discovery
|
150
|
-
) -> None:
|
151
|
-
"""
|
152
|
-
Partial match test.
|
153
|
-
"""
|
154
|
-
data.loc[202:, "EE10512586"] = 1
|
155
|
-
|
156
|
-
obj_discovery.search_parent(
|
157
|
-
data=data,
|
158
|
-
descendant="BY000041988163",
|
159
|
-
parents="EE10512586",
|
160
|
-
snp_name_col="SNP_Name"
|
161
|
-
)
|
162
|
-
assert obj_discovery.status == "Doubtful"
|
163
|
-
assert obj_discovery.num_conflicts == 14
|
164
|
-
assert obj_discovery.perc_conflicts == 2.70
|
@@ -1,160 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_DATA
|
6
|
-
from .. import Verification, isag_verif
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data() -> pd.DataFrame:
|
14
|
-
return pd.read_csv(DIR_DATA / "parentage_test_verf.csv", sep=" ")
|
15
|
-
|
16
|
-
|
17
|
-
@pytest.fixture
|
18
|
-
def obj_verification() -> Verification:
|
19
|
-
return Verification(isag_marks=isag_verif().markers)
|
20
|
-
|
21
|
-
|
22
|
-
class TestVerification(object):
|
23
|
-
|
24
|
-
def test_check_on_successfully(
|
25
|
-
self, data: pd.DataFrame, obj_verification: Verification
|
26
|
-
) -> None:
|
27
|
-
|
28
|
-
assert obj_verification.check_on(
|
29
|
-
data=data,
|
30
|
-
descendant="BY000041988163",
|
31
|
-
parent="EE10512586",
|
32
|
-
snp_name_col="SNP_Name"
|
33
|
-
) is None
|
34
|
-
assert obj_verification.num_conflicts == 31
|
35
|
-
assert obj_verification.status == "Excluded"
|
36
|
-
|
37
|
-
def test_check_on_1(self, data: pd.DataFrame) -> None:
|
38
|
-
"""
|
39
|
-
The test checks the exception for missing token data for verification.
|
40
|
-
"""
|
41
|
-
obj_verification = Verification()
|
42
|
-
|
43
|
-
with pytest.raises(
|
44
|
-
ValueError, match="Error. No array of snp names to verify"
|
45
|
-
):
|
46
|
-
obj_verification.check_on(
|
47
|
-
data=data,
|
48
|
-
descendant="BY000041988163",
|
49
|
-
parent="EE10512586",
|
50
|
-
snp_name_col="SNP_Name"
|
51
|
-
)
|
52
|
-
assert obj_verification.status is None
|
53
|
-
assert obj_verification.num_conflicts is None
|
54
|
-
|
55
|
-
def test_check_on_2(
|
56
|
-
self, data: pd.DataFrame, obj_verification: Verification
|
57
|
-
) -> None:
|
58
|
-
"""
|
59
|
-
Exception for low call rate in both animals.
|
60
|
-
"""
|
61
|
-
|
62
|
-
with pytest.raises(
|
63
|
-
Exception, match="Calf and parent have low call rate"
|
64
|
-
):
|
65
|
-
obj_verification.check_on(
|
66
|
-
data=data[:-100],
|
67
|
-
descendant="BY000041988163",
|
68
|
-
parent="EE10512586",
|
69
|
-
snp_name_col="SNP_Name"
|
70
|
-
)
|
71
|
-
assert obj_verification.status is None
|
72
|
-
assert obj_verification.num_conflicts is None
|
73
|
-
|
74
|
-
def test_check_on_3(
|
75
|
-
self, data: pd.DataFrame, obj_verification: Verification
|
76
|
-
) -> None:
|
77
|
-
"""
|
78
|
-
Exception when paired call rate is below threshold.
|
79
|
-
"""
|
80
|
-
|
81
|
-
data.loc[228:, 'BY000041988163'] = 5
|
82
|
-
data.loc[239:, 'EE10512586'] = 5
|
83
|
-
|
84
|
-
with pytest.raises(
|
85
|
-
Exception, match="Pair call rate is low"
|
86
|
-
):
|
87
|
-
obj_verification.check_on(
|
88
|
-
data=data,
|
89
|
-
descendant="BY000041988163",
|
90
|
-
parent="EE10512586",
|
91
|
-
snp_name_col="SNP_Name"
|
92
|
-
)
|
93
|
-
assert obj_verification.status is None
|
94
|
-
assert obj_verification.num_conflicts is None
|
95
|
-
|
96
|
-
def test_search_parent_4(
|
97
|
-
self, data: pd.DataFrame, obj_verification: Verification
|
98
|
-
) -> None:
|
99
|
-
"""
|
100
|
-
Test if the transmitted animal names are not in the dataframe.
|
101
|
-
"""
|
102
|
-
|
103
|
-
# For descendant
|
104
|
-
with pytest.raises(KeyError):
|
105
|
-
obj_verification.check_on(
|
106
|
-
data=data,
|
107
|
-
descendant="BY00004198816",
|
108
|
-
parent="EE10512586",
|
109
|
-
snp_name_col="SNP_Name"
|
110
|
-
)
|
111
|
-
assert obj_verification.status is None
|
112
|
-
assert obj_verification.num_conflicts is None
|
113
|
-
|
114
|
-
# For parents
|
115
|
-
with pytest.raises(KeyError):
|
116
|
-
obj_verification.check_on(
|
117
|
-
data=data,
|
118
|
-
descendant="BY000041988163",
|
119
|
-
parent="EE105125864",
|
120
|
-
snp_name_col="SNP_Name"
|
121
|
-
)
|
122
|
-
assert obj_verification.status is None
|
123
|
-
assert obj_verification.num_conflicts is None
|
124
|
-
|
125
|
-
def test_search_parent_5(
|
126
|
-
self, data: pd.DataFrame, obj_verification: Verification
|
127
|
-
) -> None:
|
128
|
-
"""
|
129
|
-
Test when all snp data is not read - equal to 5
|
130
|
-
"""
|
131
|
-
data[["BY000041988163", "EE10512586"]] = 5
|
132
|
-
|
133
|
-
with pytest.raises(
|
134
|
-
Exception, match="Calf and parent have low call rate"
|
135
|
-
):
|
136
|
-
obj_verification.check_on(
|
137
|
-
data=data,
|
138
|
-
descendant="BY000041988163",
|
139
|
-
parent="EE10512586",
|
140
|
-
snp_name_col="SNP_Name"
|
141
|
-
)
|
142
|
-
assert obj_verification.status is None
|
143
|
-
assert obj_verification.num_conflicts is None
|
144
|
-
|
145
|
-
def test_search_parent_6(
|
146
|
-
self, data: pd.DataFrame, obj_verification: Verification
|
147
|
-
) -> None:
|
148
|
-
"""
|
149
|
-
Test when there is a complete match
|
150
|
-
"""
|
151
|
-
data[["BY000041988163", "EE10512586"]] = 2
|
152
|
-
|
153
|
-
obj_verification.check_on(
|
154
|
-
data=data,
|
155
|
-
descendant="BY000041988163",
|
156
|
-
parent="EE10512586",
|
157
|
-
snp_name_col="SNP_Name"
|
158
|
-
)
|
159
|
-
assert obj_verification.status == "Accept"
|
160
|
-
assert obj_verification.num_conflicts == 0
|