snplib 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. snplib/__init__.py +8 -8
  2. snplib/finalreport/__init__.py +7 -7
  3. snplib/finalreport/_finalreport.py +251 -251
  4. snplib/format/__init__.py +19 -19
  5. snplib/format/__settings.py +7 -7
  6. snplib/format/_plink.py +291 -305
  7. snplib/format/_snp.py +113 -113
  8. snplib/parentage/__init__.py +15 -15
  9. snplib/parentage/_discov.py +102 -102
  10. snplib/parentage/_isagmark.py +15 -15
  11. snplib/parentage/_verif.py +91 -91
  12. snplib/parentage/isag_disc.pl +0 -0
  13. snplib/parentage/isag_verif.pl +0 -0
  14. snplib/statistics/__init__.py +16 -16
  15. snplib/statistics/_callrate.py +60 -59
  16. snplib/statistics/_freq.py +67 -67
  17. snplib/statistics/_snphwe.py +132 -132
  18. {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/LICENSE +674 -674
  19. {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/METADATA +80 -97
  20. snplib-1.0.9.dist-info/RECORD +22 -0
  21. snplib/finalreport/tests/__init__.py +0 -7
  22. snplib/finalreport/tests/test_finalreport.py +0 -215
  23. snplib/format/tests/__init__.py +0 -7
  24. snplib/format/tests/test_plink_fam.py +0 -121
  25. snplib/format/tests/test_plink_lgen.py +0 -106
  26. snplib/format/tests/test_plink_map.py +0 -42
  27. snplib/format/tests/test_plink_ped.py +0 -136
  28. snplib/format/tests/test_snp.py +0 -128
  29. snplib/parentage/tests/__init__.py +0 -7
  30. snplib/parentage/tests/test_discov.py +0 -164
  31. snplib/parentage/tests/test_verif.py +0 -160
  32. snplib/statistics/tests/__init__.py +0 -7
  33. snplib/statistics/tests/test_callrate.py +0 -171
  34. snplib/statistics/tests/test_freq_allele.py +0 -87
  35. snplib/statistics/tests/test_freq_maf.py +0 -17
  36. snplib/statistics/tests/test_hwe_t.py +0 -41
  37. snplib/statistics/tests/test_snphwe.py +0 -41
  38. snplib-1.0.7.dist-info/RECORD +0 -37
  39. {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/WHEEL +0 -0
  40. {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/top_level.txt +0 -0
@@ -1,171 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from . import DIR_DATA
6
- from .. import call_rate
7
-
8
- import pytest
9
- import numpy as np
10
- import pandas as pd
11
-
12
-
13
- @pytest.fixture
14
- def data_df(request) -> pd.DataFrame:
15
- match request.param:
16
- case "cra":
17
- return pd.read_pickle(DIR_DATA / "cr/file_cra.pl")
18
-
19
- case "crm":
20
- return pd.read_pickle(DIR_DATA / "cr/file_crm.pl")
21
-
22
-
23
- @pytest.fixture
24
- def data_str() -> list[str]:
25
- return ['02011015010000500', '01110152120222512']
26
-
27
-
28
- class TestCallRateAnimal(object):
29
-
30
- @pytest.mark.parametrize("data_df", ["cra"], indirect=True)
31
- def test_cra_datafame_dtype_obj(self, data_df: pd.DataFrame) -> None:
32
- data_df.SNP = data_df.SNP.astype(str)
33
- result = call_rate(data=data_df, id_col="SAMPLE_ID", snp_col="SNP")
34
-
35
- assert isinstance(result, pd.DataFrame) and not result.empty
36
- assert result.SNP.round(6).isin([0.882353, 0.882353]).all()
37
-
38
- @pytest.mark.parametrize("data_df", ["cra"], indirect=True)
39
- def test_cra_datafame_dtype_int(self, data_df: pd.DataFrame) -> None:
40
- data_df.SNP = data_df.SNP.astype("int8")
41
- result = call_rate(data=data_df, id_col="SAMPLE_ID", snp_col="SNP")
42
-
43
- assert isinstance(result, pd.DataFrame) and not result.empty
44
- assert result.SNP.round(6).isin([0.882353, 0.882353]).all()
45
-
46
- @pytest.mark.parametrize("data_df", ["cra"], indirect=True)
47
- def test_cra_datafame_dtype_float(self, data_df: pd.DataFrame) -> None:
48
- data_df.SNP = data_df.SNP.astype("float32")
49
- result = call_rate(data=data_df, id_col="SAMPLE_ID", snp_col="SNP")
50
-
51
- assert isinstance(result, pd.DataFrame) and not result.empty
52
- assert result.SNP.round(6).isin([0.882353, 0.882353]).all()
53
-
54
- @pytest.mark.parametrize("data_df", ["cra"], indirect=True)
55
- def test_cra_datafame_dtype_random_simbols(
56
- self, data_df: pd.DataFrame
57
- ) -> None:
58
- data_df.SNP = [
59
- np.random.choice(["A", "C", "G", "T"])
60
- for _ in range(data_df.SNP.shape[0])
61
- ]
62
- result = call_rate(data=data_df, id_col="SAMPLE_ID", snp_col="SNP")
63
-
64
- assert result is None
65
-
66
- def test_cra_datafame_empty1(self) -> None:
67
- with pytest.raises(KeyError):
68
- call_rate(data=pd.DataFrame(), id_col="SAMPLE_ID", snp_col="SNP")
69
-
70
- def test_cra_datafame_empty2(self) -> None:
71
- result = call_rate(
72
- data=pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
73
- id_col="SAMPLE_ID",
74
- snp_col="SNP"
75
- )
76
-
77
- assert isinstance(result, pd.DataFrame) and result.empty
78
-
79
- @pytest.mark.parametrize("data_df", ["cra"], indirect=True)
80
- def test_cra_datafame_fail(self, data_df: pd.DataFrame) -> None:
81
- with pytest.raises(KeyError):
82
- call_rate(data=data_df, id_col="SAMPLE_ID")
83
- call_rate(data=data_df, snp_col="SNP")
84
- call_rate(data=data_df)
85
-
86
- def test_cra_str_int(self, data_str: list[str]) -> None:
87
- for sequence in data_str:
88
- assert call_rate(data=sequence) == 0.882353
89
-
90
- def test_cra_str_simbols(self) -> None:
91
- data_str = ['GCATGAGGTATACTCTA', 'CGCCATGCTGTATATCC']
92
-
93
- for sequence in data_str:
94
- assert call_rate(data=sequence) is None
95
-
96
- def test_cra_str_empty(self) -> None:
97
- assert call_rate(data="") is None
98
-
99
- def test_cra_str_mixid(self) -> None:
100
- assert call_rate(data="GCATGAG3G4T6A67TACTCTA") is None
101
-
102
-
103
- class TestCallRateMarker(object):
104
-
105
- @pytest.mark.parametrize("data_df", ["crm"], indirect=True)
106
- def test_crm_datafame_dtype_obj(self, data_df: pd.DataFrame) -> None:
107
- data_df.SNP = data_df.SNP.astype(str)
108
- result = call_rate(data=data_df, id_col="SNP_NAME", snp_col="SNP")
109
-
110
- assert isinstance(result, pd.DataFrame) and not result.empty
111
- assert result.SNP.round(6).isin([0.727273, 0.909091, 0.818182]).all()
112
-
113
- @pytest.mark.parametrize("data_df", ["crm"], indirect=True)
114
- def test_crm_datafame_dtype_int(self, data_df: pd.DataFrame) -> None:
115
- data_df.SNP = data_df.SNP.astype("int8")
116
- result = call_rate(data=data_df, id_col="SNP_NAME", snp_col="SNP")
117
-
118
- assert isinstance(result, pd.DataFrame) and not result.empty
119
- assert result.SNP.round(6).isin([0.727273, 0.909091, 0.818182]).all()
120
-
121
- @pytest.mark.parametrize("data_df", ["crm"], indirect=True)
122
- def test_crm_datafame_dtype_float(self, data_df: pd.DataFrame) -> None:
123
- data_df.SNP = data_df.SNP.astype("float32")
124
- result = call_rate(data=data_df, id_col="SNP_NAME", snp_col="SNP")
125
-
126
- assert isinstance(result, pd.DataFrame) and not result.empty
127
- assert result.SNP.round(6).isin([0.727273, 0.909091, 0.818182]).all()
128
-
129
- @pytest.mark.parametrize("data_df", ["crm"], indirect=True)
130
- def test_crm_datafame_dtype_random_simbols(
131
- self, data_df: pd.DataFrame
132
- ) -> None:
133
- data_df.SNP = [
134
- np.random.choice(["A", "C", "G", "T"])
135
- for _ in range(data_df.SNP.shape[0])
136
- ]
137
- result = call_rate(data=data_df, id_col="SNP_NAME", snp_col="SNP")
138
-
139
- assert result is None
140
-
141
- def test_crm_datafame_empty1(self) -> None:
142
- with pytest.raises(KeyError):
143
- call_rate(data=pd.DataFrame(), id_col="SNP_NAME", snp_col="SNP")
144
-
145
- def test_crm_datafame_empty2(self) -> None:
146
- result = call_rate(
147
- data=pd.DataFrame(columns=["SNP_NAME", "SNP"]),
148
- id_col="SNP_NAME",
149
- snp_col="SNP"
150
- )
151
-
152
- assert isinstance(result, pd.DataFrame) and result.empty
153
-
154
- @pytest.mark.parametrize("data_df", ["crm"], indirect=True)
155
- def test_crm_datafame_fail(self, data_df: pd.DataFrame) -> None:
156
- with pytest.raises(KeyError):
157
- call_rate(data=data_df, id_col="SNP_NAME")
158
- call_rate(data=data_df, snp_col="SNP")
159
- call_rate(data=data_df)
160
-
161
- def test_crm_str_simbols(self) -> None:
162
- data_str = ['GCATGAGGTATACTCTA', 'CGCCATGCTGTATATCC']
163
-
164
- for sequence in data_str:
165
- assert call_rate(data=sequence) is None
166
-
167
- def test_crm_str_empty(self) -> None:
168
- assert call_rate(data="") is None
169
-
170
- def test_crm_str_mixid(self) -> None:
171
- assert call_rate(data="GCATGAG3G4T6A67TACTCTA") is None
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from . import DIR_DATA
6
- from .. import allele_freq
7
-
8
- import pytest
9
- import numpy as np
10
- import pandas as pd
11
-
12
-
13
- @pytest.fixture
14
- def data_df() -> pd.DataFrame:
15
- return pd.read_pickle(DIR_DATA / "freq/file.pl")
16
- # [0. , 0.9 , 0.889]
17
-
18
-
19
- def data_str() -> list[tuple]:
20
- return [
21
- ('2212120', 0.714),
22
- ('02011015010000500', 0.2),
23
- ('01110152120222512', 0.6)
24
- ]
25
-
26
-
27
- class TestAlleleFreq(object):
28
-
29
- def test_allele_freq_df_dtype_obj(self, data_df: pd.DataFrame) -> None:
30
- data_df.SNP = data_df.SNP.astype(str)
31
- result = allele_freq(data=data_df, id_col="SNP_NAME", seq_col="SNP")
32
-
33
- assert isinstance(result, pd.DataFrame) and not result.empty
34
- assert result.SNP.round(6).isin([0.000, 0.900, 0.889]).all()
35
-
36
- def test_allele_freq_df_dtype_int(self, data_df: pd.DataFrame) -> None:
37
- data_df.SNP = data_df.SNP.astype("int8")
38
- result = allele_freq(data=data_df, id_col="SNP_NAME", seq_col="SNP")
39
-
40
- assert isinstance(result, pd.DataFrame) and not result.empty
41
- assert result.SNP.round(6).isin([0.000, 0.900, 0.889]).all()
42
-
43
- def test_allele_freq_df_dtype_float(self, data_df: pd.DataFrame) -> None:
44
- data_df.SNP = data_df.SNP.astype("float32")
45
- result = allele_freq(data=data_df, id_col="SNP_NAME", seq_col="SNP")
46
-
47
- assert isinstance(result, pd.DataFrame) and not result.empty
48
- assert result.SNP.round(6).isin([0.000, 0.900, 0.889]).all()
49
-
50
- def test_allele_freq_df_data_rand_simbols(
51
- self, data_df: pd.DataFrame
52
- ) -> None:
53
- data_df.SNP = [
54
- np.random.choice(["A", "C", "G", "T"])
55
- for _ in range(data_df.SNP.shape[0])
56
- ]
57
- assert allele_freq(
58
- data=data_df, id_col="SNP_NAME", seq_col="SNP"
59
- ) is None
60
-
61
- def test_allele_freq_df_empty(self) -> None:
62
- with pytest.raises(KeyError):
63
- allele_freq(
64
- data=pd.DataFrame(), id_col="SNP_NAME", seq_col="SNP"
65
- )
66
-
67
- def test_allele_freq_df_empty_only_columns(self) -> None:
68
- result = allele_freq(
69
- data=pd.DataFrame(columns=["SNP_NAME", "SNP"]),
70
- id_col="SNP_NAME",
71
- seq_col="SNP"
72
- )
73
-
74
- assert isinstance(result, pd.DataFrame) and result.empty
75
-
76
- def test_allele_freq_df_raises(self, data_df: pd.DataFrame) -> None:
77
- with pytest.raises(KeyError):
78
- allele_freq(data=data_df, id_col="SNP_NAME")
79
- allele_freq(data=data_df, seq_col="SNP")
80
- allele_freq(data=data_df)
81
-
82
- @pytest.mark.parametrize("data, obs_value", data_str())
83
- def test_allele_freq_str(self, data: str, obs_value: float) -> None:
84
- assert allele_freq(data=data) == obs_value
85
-
86
- def test_allele_freq_non_type(self) -> None:
87
- assert allele_freq(data=1423) is None
@@ -1,17 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from .. import minor_allele_freq as maf
6
-
7
- import pytest
8
-
9
-
10
- class TestMinorAlleleFreq(object):
11
-
12
- @pytest.mark.parametrize("value, res", [
13
- (0.0, 0.0), (0.9, 0.1), (0.889, 0.111),
14
- (0.714, 0.286), (0.22, 0.22), (0.45, 0.45), (0.6, 0.4)
15
- ])
16
- def test_minor_allele_freq(self, value: float, res: float) -> None:
17
- assert maf(value) == res
@@ -1,41 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from .. import hwe_test
6
-
7
- import pytest
8
- import pandas as pd
9
-
10
-
11
- class TestHWE(object):
12
-
13
- @pytest.mark.parametrize(
14
- "seq, freq",
15
- [
16
- ('2212120', 0.714),
17
- ('02011015010000500', 0.2),
18
- ('01110152120222512', 0.6),
19
- ('00005005005', 0.0),
20
- ('22521212222', 0.9),
21
- ('52221521222', 0.889)
22
- ]
23
- )
24
- def test_hweT_true(self, seq: str, freq: float) -> None:
25
- """
26
- check snphwe gives expected p-values
27
- """
28
-
29
- _seq_snp = pd.Series(list(map(int, seq)))
30
-
31
- assert hwe_test(_seq_snp, freq)
32
-
33
- @pytest.mark.parametrize("seq, freq", [('000000000102', 0.125)])
34
- def test_hweT_false(self, seq: str, freq: float) -> None:
35
- """
36
- check snphwe gives expected p-values
37
- """
38
-
39
- _seq_snp = pd.Series(list(map(int, seq)))
40
-
41
- assert not hwe_test(_seq_snp, freq)
@@ -1,41 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from .. import hwe
6
-
7
- import pytest
8
-
9
-
10
- class TestHWE(object):
11
-
12
- def test_snphwe(self) -> None:
13
- """
14
- check snphwe gives expected p-values
15
- """
16
- assert hwe(500, 10, 5000) == 0.6515718999145375
17
- assert hwe(1000, 20, 5000) == 1.2659849194317374e-05
18
-
19
- def test_snphwe_odd_inputs(self) -> None:
20
- """
21
- check snphwe with odd inputs
22
- """
23
- # should raise errors with odd inputs
24
-
25
- with pytest.raises(ValueError, match="snphwe: zero genotypes"):
26
- hwe(0, 0, 0)
27
-
28
- with pytest.raises(ValueError, match="snphwe: negative allele count"):
29
- hwe(-5, 10, 1000)
30
-
31
- def test_snphwe_large_input(self) -> None:
32
- """
33
- check snphwe doesn't give errors with large sample sizes
34
- """
35
- assert hwe(200000, 200000, 200000) == 0.0
36
-
37
- def test_snphwe_uncertain_genotypes(self) -> None:
38
- """
39
- check uncertain genotypes give correct p-values
40
- """
41
- assert hwe(4989.99999, 494999.999, 9.9999999) == 0.5702231983054381
@@ -1,37 +0,0 @@
1
- snplib/__init__.py,sha256=xw0VDA6vCABCOwfQHF2L2paFnc4Q_ss7OvFLJYjeU-U,204
2
- snplib/finalreport/__init__.py,sha256=yF48v321n31WqnzzX3F_vZ41t0NkeWruJVIOnM1yLj0,168
3
- snplib/finalreport/_finalreport.py,sha256=mhggK7IlcwfAywAFKTxqKnmErduyWzPbgA-uPGKkBYA,6127
4
- snplib/finalreport/tests/__init__.py,sha256=px8I01mwlI8cLhKIroJus5ga7PDXGI7T0QL7G3bJLSc,181
5
- snplib/finalreport/tests/test_finalreport.py,sha256=OASntKSCbLGfS9JX4z4Q4cnivKkVk6YQlsuDgLy5gZ0,6713
6
- snplib/format/__init__.py,sha256=nA7SIUSKZumP62yWvstew8ASLsC4Uz__Ywr8roGdc18,280
7
- snplib/format/__settings.py,sha256=UUK1kc3Fnzl9S3U7MQEbGrumyzogKNW0nllys41ymfk,296
8
- snplib/format/_plink.py,sha256=Haqd28yTdGMYwBaWaE9qf9TKMRtY12KvBygljZz6CWM,10834
9
- snplib/format/_snp.py,sha256=vKUhsNHjfRCXPxNV0wP1xcPM831SOGIu4RPmuLYh-g4,3285
10
- snplib/format/tests/__init__.py,sha256=px8I01mwlI8cLhKIroJus5ga7PDXGI7T0QL7G3bJLSc,181
11
- snplib/format/tests/test_plink_fam.py,sha256=TulkoH1OIATa2sJRKXx2DvOcF0Y7vESJCQ1LKC9aa5E,2878
12
- snplib/format/tests/test_plink_lgen.py,sha256=P1id3isYbiL7jqaC_-hKrw_-H_zbrX2_OopyDN54ueI,2311
13
- snplib/format/tests/test_plink_map.py,sha256=mNErvrP3gfE19-UunsO-miEjURyjj226L4UiZ3tqMnE,989
14
- snplib/format/tests/test_plink_ped.py,sha256=0QtejE_V70jaATTp5mDrDvEvqehV2pTMM4MRU9j3lAE,3074
15
- snplib/format/tests/test_snp.py,sha256=LL61fAy2U4pybv_eua85XfvAPGTW3lgi475aL1lyRd4,3391
16
- snplib/parentage/__init__.py,sha256=9wgWZxDJA0FfXs8MT-VVF3O7LfZhDh9y-jEV4_wE4-c,295
17
- snplib/parentage/_discov.py,sha256=YHM83V92uIc8wBtKySU3cschikCCg1vc4wY2fWkg5Xc,3282
18
- snplib/parentage/_isagmark.py,sha256=djEKEHn15vEzcovIh2t_r9PLJOztEM40enWZJf1Un44,371
19
- snplib/parentage/_verif.py,sha256=lcfFMZrNC6ymIMU2cel9Dudgqc3OZ6CIASjDUhnos7E,3110
20
- snplib/parentage/tests/__init__.py,sha256=-mLPBAje75GqCilX0dkqw9KQj_HPnxjm5R6_xHVe6k0,179
21
- snplib/parentage/tests/test_discov.py,sha256=u2vdAYSoiB6PRHlys07LWetQb-rEwT_8p_5Ajm5Jneg,4314
22
- snplib/parentage/tests/test_verif.py,sha256=FgZ_MRpGa2WbEC0S6sYiElkDzt3JZ4ENVza68eJFRtQ,4111
23
- snplib/statistics/__init__.py,sha256=2xN_dUTProTBUzPW3oKi_CZ0QlqjIUlxbtGfsBqLD2c,318
24
- snplib/statistics/_callrate.py,sha256=r0cn5s1RHSBVDG6GrcmN210zeMTpHCOiOjdgkETV7Yg,1924
25
- snplib/statistics/_freq.py,sha256=ZxuZLm8vFS60YKMcrTZDNuj1Y22FkqfaFp76R4hivLY,1690
26
- snplib/statistics/_snphwe.py,sha256=lvIuRYYs625tduNa_DU5OfqWUB7fHnXnh5VoSkNQe_Q,3485
27
- snplib/statistics/tests/__init__.py,sha256=-mLPBAje75GqCilX0dkqw9KQj_HPnxjm5R6_xHVe6k0,179
28
- snplib/statistics/tests/test_callrate.py,sha256=-PK_VFWx76MWFXMOPLwlzEb570fKXt1nK4gjYe4bNAQ,5824
29
- snplib/statistics/tests/test_freq_allele.py,sha256=5ik_2sdupTcf4cCnEflyH5aPJ0k8tk68mih2faeF-ZY,2673
30
- snplib/statistics/tests/test_freq_maf.py,sha256=DIqpl52fMKyHTL4YEP1ZZ9_QNV91T-QgjTrA4lQeGPw,447
31
- snplib/statistics/tests/test_hwe_t.py,sha256=34wr6EE1ZVqL2zXgSNgBwsGplt0YtbyVFJ7c0uiYBZw,888
32
- snplib/statistics/tests/test_snphwe.py,sha256=dB62NvmrSrZPXXgJu6ni1xSxJxRqeLBkVp9ZXpfAV8s,1036
33
- snplib-1.0.7.dist-info/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
34
- snplib-1.0.7.dist-info/METADATA,sha256=iR0uXLWtZ_T_ztUacJ4TeYhXI_mbFxvEDY61lDO7vpU,2521
35
- snplib-1.0.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
36
- snplib-1.0.7.dist-info/top_level.txt,sha256=CGCrLXuCSyXPCTwMFQjPxQR7b93FFFft56sAPPun_2g,7
37
- snplib-1.0.7.dist-info/RECORD,,
File without changes