snplib 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snplib/__init__.py +8 -8
- snplib/finalreport/__init__.py +7 -7
- snplib/finalreport/_finalreport.py +251 -251
- snplib/format/__init__.py +19 -19
- snplib/format/__settings.py +7 -7
- snplib/format/_plink.py +291 -305
- snplib/format/_snp.py +113 -113
- snplib/parentage/__init__.py +15 -15
- snplib/parentage/_discov.py +102 -102
- snplib/parentage/_isagmark.py +15 -15
- snplib/parentage/_verif.py +91 -91
- snplib/parentage/isag_disc.pl +0 -0
- snplib/parentage/isag_verif.pl +0 -0
- snplib/statistics/__init__.py +16 -16
- snplib/statistics/_callrate.py +60 -59
- snplib/statistics/_freq.py +67 -67
- snplib/statistics/_snphwe.py +132 -132
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/LICENSE +674 -674
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/METADATA +80 -97
- snplib-1.0.9.dist-info/RECORD +22 -0
- snplib/finalreport/tests/__init__.py +0 -7
- snplib/finalreport/tests/test_finalreport.py +0 -215
- snplib/format/tests/__init__.py +0 -7
- snplib/format/tests/test_plink_fam.py +0 -121
- snplib/format/tests/test_plink_lgen.py +0 -106
- snplib/format/tests/test_plink_map.py +0 -42
- snplib/format/tests/test_plink_ped.py +0 -136
- snplib/format/tests/test_snp.py +0 -128
- snplib/parentage/tests/__init__.py +0 -7
- snplib/parentage/tests/test_discov.py +0 -164
- snplib/parentage/tests/test_verif.py +0 -160
- snplib/statistics/tests/__init__.py +0 -7
- snplib/statistics/tests/test_callrate.py +0 -171
- snplib/statistics/tests/test_freq_allele.py +0 -87
- snplib/statistics/tests/test_freq_maf.py +0 -17
- snplib/statistics/tests/test_hwe_t.py +0 -41
- snplib/statistics/tests/test_snphwe.py +0 -41
- snplib-1.0.7.dist-info/RECORD +0 -37
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/WHEEL +0 -0
- {snplib-1.0.7.dist-info → snplib-1.0.9.dist-info}/top_level.txt +0 -0
@@ -1,97 +1,80 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: snplib
|
3
|
-
Version: 1.0.
|
4
|
-
Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
|
5
|
-
Author-email: Igor <igor.loschinin@gmail.com>
|
6
|
-
License: GNU
|
7
|
-
Project-URL: Homepage, https://github.com/IgorekLoschinin/snptools
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
9
|
-
Classifier: Operating System :: OS Independent
|
10
|
-
Requires-Python: >=3.10
|
11
|
-
Description-Content-Type: text/markdown
|
12
|
-
License-File: LICENSE
|
13
|
-
Requires-Dist: numpy>=1.26.1
|
14
|
-
Requires-Dist: pandas>=2.1.1
|
15
|
-
Requires-Dist: six>=1.16.0
|
16
|
-
Requires-Dist: swifter>=1.4.0
|
17
|
-
Requires-Dist: xlrd>=2.0.1
|
18
|
-
Requires-Dist: XlsxWriter>=3.1.9
|
19
|
-
Requires-Dist: openpyxl>=3.1.2
|
20
|
-
Requires-Dist: pydantic>=2.4.2
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
-
|
37
|
-
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
```
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
####
|
69
|
-
```
|
70
|
-
from
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
Snp, make_fam, make_ped, make_lgen, make_map
|
82
|
-
)
|
83
|
-
```
|
84
|
-
|
85
|
-
#### Stat:
|
86
|
-
```
|
87
|
-
from snptools import (
|
88
|
-
hwe, hwe_test, call_rate, allele_freq, minor_allele_freq
|
89
|
-
)
|
90
|
-
```
|
91
|
-
|
92
|
-
## Documentation
|
93
|
-
Detailed documentation on how to use SNPTools is available see the [docs](./docs/_build/index.html).
|
94
|
-
|
95
|
-
## License
|
96
|
-
This project is licensed under the GNU General Public License - see the
|
97
|
-
LICENSE file for details.
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: snplib
|
3
|
+
Version: 1.0.9
|
4
|
+
Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
|
5
|
+
Author-email: Igor <igor.loschinin@gmail.com>
|
6
|
+
License: GNU
|
7
|
+
Project-URL: Homepage, https://github.com/IgorekLoschinin/snptools
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.10
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
License-File: LICENSE
|
13
|
+
Requires-Dist: numpy>=1.26.1
|
14
|
+
Requires-Dist: pandas>=2.1.1
|
15
|
+
Requires-Dist: six>=1.16.0
|
16
|
+
Requires-Dist: swifter>=1.4.0
|
17
|
+
Requires-Dist: xlrd>=2.0.1
|
18
|
+
Requires-Dist: XlsxWriter>=3.1.9
|
19
|
+
Requires-Dist: openpyxl>=3.1.2
|
20
|
+
Requires-Dist: pydantic>=2.4.2
|
21
|
+
|
22
|
+
# snptools
|
23
|
+
<p align="center">
|
24
|
+
<img width="150" height="150" src="./iconlib.png">
|
25
|
+
</p>
|
26
|
+
|
27
|
+
**Snptools** is a tool for SNP (Single Nucleotide Polymorphism) data processing,
|
28
|
+
parentage calculation and call rate estimation.
|
29
|
+
|
30
|
+
## Introduction
|
31
|
+
|
32
|
+
SNP (Single Nucleotide Polymorphism) represent genetic variations, that can
|
33
|
+
be used to analyze genetic data. SNPTools provides a set of tools for working
|
34
|
+
with SNP data, including the following capabilities:
|
35
|
+
|
36
|
+
- SNP data processing - FinalReport.
|
37
|
+
- Parentage Verification and Parentage Discovery Based on SNP Genotypes (ICAR).
|
38
|
+
- Call rate estimation (percentage of missing data).
|
39
|
+
- Processing and preparation of data in plink formats.
|
40
|
+
|
41
|
+
## Installation
|
42
|
+
You can install snptools via pip from [PyPI](https://pypi.org/project/snplib/):
|
43
|
+
```
|
44
|
+
pip install snplib
|
45
|
+
```
|
46
|
+
|
47
|
+
## Usage
|
48
|
+
Snptools provides commands for a variety of operations. Here are examples of
|
49
|
+
usage:
|
50
|
+
|
51
|
+
#### SNP data processing:
|
52
|
+
```
|
53
|
+
from snplib import FinalReport
|
54
|
+
```
|
55
|
+
|
56
|
+
#### Computation of parentage:
|
57
|
+
```
|
58
|
+
from snplib import Discovery, Verification
|
59
|
+
```
|
60
|
+
|
61
|
+
#### Preparation format files:
|
62
|
+
```
|
63
|
+
from snplib import (
|
64
|
+
Snp, make_fam, make_ped, make_lgen, make_map
|
65
|
+
)
|
66
|
+
```
|
67
|
+
|
68
|
+
#### Stat:
|
69
|
+
```
|
70
|
+
from snplib import (
|
71
|
+
hwe, hwe_test, call_rate, allele_freq, minor_allele_freq
|
72
|
+
)
|
73
|
+
```
|
74
|
+
|
75
|
+
## Documentation
|
76
|
+
Detailed documentation on how to use SNPTools is available see the [docs](docs/_build/index.html).
|
77
|
+
|
78
|
+
## License
|
79
|
+
This project is licensed under the GNU General Public License - see the
|
80
|
+
LICENSE file for details.
|
@@ -0,0 +1,22 @@
|
|
1
|
+
snplib/__init__.py,sha256=xhjj4ZywdwCq91GBh1zfBP_TwFW26-KpHcCUUVvMdgI,196
|
2
|
+
snplib/finalreport/__init__.py,sha256=Yk49x8t-STIfsdP6QLMtaGm1gTj_n-XS8kchPguvW1g,161
|
3
|
+
snplib/finalreport/_finalreport.py,sha256=el_d8MVmpic3wKCRJ-J52VZYSmMuNSf4p_tmPkgh0Z0,5876
|
4
|
+
snplib/format/__init__.py,sha256=3W_l_sP1u9HV3HWwnsJxPGw9anrVknstqLaJmWQaG0k,261
|
5
|
+
snplib/format/__settings.py,sha256=kyAVZ4tiU61sNr3jQhjXbLXRyBA3pjFfCw3fOfSkY14,289
|
6
|
+
snplib/format/_plink.py,sha256=cjT6PkvDJr8KwvQo76i7_Hm1Og4bASYCDN9G7CHsQ00,10372
|
7
|
+
snplib/format/_snp.py,sha256=oI-V4-_w28aX-VxoimywLDDnX6owhdjLbqt9a54_ouU,3172
|
8
|
+
snplib/parentage/__init__.py,sha256=bN3mWTxmaFQ1qzRtyMLaAoxfomz6jnoWa-kmnJ9q_fE,280
|
9
|
+
snplib/parentage/_discov.py,sha256=qGlNzpl4xKOWRr6-fi1osylzizgiPO8vCus8VE56nec,3180
|
10
|
+
snplib/parentage/_isagmark.py,sha256=0xi9YhuIpU7zf16HnWw1XIkcQLk4rTNeAeCE-5p9hQE,356
|
11
|
+
snplib/parentage/_verif.py,sha256=VbX46dC4tl4Qeuw65aRg1s6hSn0FI25Hy9-3U_jxmrg,3019
|
12
|
+
snplib/parentage/isag_disc.pl,sha256=XzjcsnO_kwPg4WaE2YMuZXBNHQ9ixi6pg5n2mfGOuJU,14219
|
13
|
+
snplib/parentage/isag_verif.pl,sha256=e_c4YGd5_JXGWqFQwmcxjp6hEkdcqpK_5y5MqJ8J9YY,8254
|
14
|
+
snplib/statistics/__init__.py,sha256=XJFU7mEwAJJ2M187jEkO8rFNYKoxF-g9KF_stS7eFFw,302
|
15
|
+
snplib/statistics/_callrate.py,sha256=yfHxnNVpcDfV3qxZVwrk2RWPgy9dTf7NHWczDUORwtY,1866
|
16
|
+
snplib/statistics/_freq.py,sha256=ZPZBZM3xq9EseOxuMzRVvzkjjFfaaA4ZvF7XI8ctON0,1623
|
17
|
+
snplib/statistics/_snphwe.py,sha256=KcoRGwovMCc53-GJ8VfYs_3ZEHObgt8B0EvrW5nFnmM,3353
|
18
|
+
snplib-1.0.9.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
19
|
+
snplib-1.0.9.dist-info/METADATA,sha256=tgDJRi81nc66_LSZtTTkvPENK6r9OZ6WKf_MGPk7hcg,2163
|
20
|
+
snplib-1.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
21
|
+
snplib-1.0.9.dist-info/top_level.txt,sha256=CGCrLXuCSyXPCTwMFQjPxQR7b93FFFft56sAPPun_2g,7
|
22
|
+
snplib-1.0.9.dist-info/RECORD,,
|
@@ -1,215 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
|
6
|
-
from . import DIR_FILES
|
7
|
-
from .._finalreport import FinalReport
|
8
|
-
|
9
|
-
import pytest
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def report(request) -> FinalReport:
|
14
|
-
return FinalReport(allele=request.param)
|
15
|
-
|
16
|
-
|
17
|
-
class TestFinalReport(object):
|
18
|
-
|
19
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
20
|
-
def test_handle_1(self, report: FinalReport) -> None:
|
21
|
-
""" If both files do not exist """
|
22
|
-
|
23
|
-
assert not report.handle(
|
24
|
-
DIR_FILES / "fr/f.txt", DIR_FILES / "fr/f.xlsx",
|
25
|
-
)
|
26
|
-
|
27
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
28
|
-
def test_handle_2(self, report: FinalReport) -> None:
|
29
|
-
""" If the file to convert does not exist """
|
30
|
-
|
31
|
-
assert not report.handle(
|
32
|
-
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/f.xlsx",
|
33
|
-
)
|
34
|
-
|
35
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
36
|
-
def test_handle_3(self, report: FinalReport) -> None:
|
37
|
-
""" If the data does not contain header data """
|
38
|
-
|
39
|
-
report.handle(
|
40
|
-
DIR_FILES / "fr/file2.txt", DIR_FILES / "fr/file2.xlsx",
|
41
|
-
)
|
42
|
-
|
43
|
-
assert len(report.header) == 0 and not report.snp_data.empty
|
44
|
-
|
45
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
46
|
-
def test_handle_4(self, report: FinalReport) -> None:
|
47
|
-
""" If the file contains only header and field names """
|
48
|
-
|
49
|
-
report.handle(
|
50
|
-
DIR_FILES / "fr/file3.txt", DIR_FILES / "fr/file3.xlsx",
|
51
|
-
)
|
52
|
-
|
53
|
-
assert report.snp_data is not None and report.snp_data.empty
|
54
|
-
|
55
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
56
|
-
def test_handle_5(self, report: FinalReport) -> None:
|
57
|
-
""" If the data file is empty """
|
58
|
-
|
59
|
-
with pytest.raises(
|
60
|
-
Exception, match="Not data in file FinalReport.txt"
|
61
|
-
):
|
62
|
-
report.handle(
|
63
|
-
DIR_FILES / "fr/file5.txt", DIR_FILES / "fr/file5.xlsx",
|
64
|
-
)
|
65
|
-
|
66
|
-
assert report.snp_data is None
|
67
|
-
|
68
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
69
|
-
def test_handle_6(self, report: FinalReport) -> None:
|
70
|
-
""" If the conversion file is empty """
|
71
|
-
|
72
|
-
assert report.handle(
|
73
|
-
DIR_FILES / "fr/file6.txt", DIR_FILES / "fr/file6.xlsx",
|
74
|
-
)
|
75
|
-
|
76
|
-
assert not report.snp_data.empty
|
77
|
-
assert len(report.header) != 0
|
78
|
-
|
79
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
80
|
-
def test_handle_7(self, report: FinalReport) -> None:
|
81
|
-
""" If the data file is not needed to convert ID name """
|
82
|
-
|
83
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
84
|
-
|
85
|
-
assert not report.snp_data.empty
|
86
|
-
assert len(report.header) != 0
|
87
|
-
|
88
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
89
|
-
def test_handle_8(self, report: FinalReport) -> None:
|
90
|
-
""" If files exist """
|
91
|
-
|
92
|
-
assert report.handle(
|
93
|
-
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx",
|
94
|
-
)
|
95
|
-
|
96
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
97
|
-
def test_allele_none(self, report: FinalReport) -> None:
|
98
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
99
|
-
|
100
|
-
_fields = [
|
101
|
-
'SNP Name', 'Sample ID', 'Allele1 - Forward', 'Allele2 - Forward',
|
102
|
-
'Allele1 - Top', 'Allele2 - Top', 'Allele1 - AB', 'Allele2 - AB',
|
103
|
-
'GC Score', 'X', 'Y'
|
104
|
-
]
|
105
|
-
|
106
|
-
assert report.snp_data.columns.difference(_fields).empty
|
107
|
-
|
108
|
-
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
109
|
-
def test_sample_allele_ab(self, report: FinalReport) -> None:
|
110
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
111
|
-
|
112
|
-
_fields = [
|
113
|
-
'SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB',
|
114
|
-
'GC Score', 'X', 'Y'
|
115
|
-
]
|
116
|
-
|
117
|
-
assert report.snp_data.columns.difference(_fields).empty
|
118
|
-
|
119
|
-
@pytest.mark.parametrize("report", ["Forward"], indirect=True)
|
120
|
-
def test_sample_allele_forward(self, report: FinalReport) -> None:
|
121
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
122
|
-
|
123
|
-
_fields = [
|
124
|
-
'SNP Name', 'Sample ID', 'Allele1 - Forward', 'Allele2 - Forward',
|
125
|
-
'GC Score', 'X', 'Y'
|
126
|
-
]
|
127
|
-
|
128
|
-
assert report.snp_data.columns.difference(_fields).empty
|
129
|
-
|
130
|
-
@pytest.mark.parametrize("report", ["Top"], indirect=True)
|
131
|
-
def test_sample_allele_top(self, report: FinalReport) -> None:
|
132
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
133
|
-
|
134
|
-
_fields = [
|
135
|
-
'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
|
136
|
-
'GC Score', 'X', 'Y'
|
137
|
-
]
|
138
|
-
|
139
|
-
assert report.snp_data.columns.difference(_fields).empty
|
140
|
-
|
141
|
-
@pytest.mark.parametrize("report", [["AB", "Top"]], indirect=True)
|
142
|
-
def test_sample_allele_list1(self, report: FinalReport) -> None:
|
143
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
144
|
-
|
145
|
-
_fields = [
|
146
|
-
'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
|
147
|
-
'Allele1 - AB', 'Allele2 - AB', 'GC Score', 'X', 'Y'
|
148
|
-
]
|
149
|
-
|
150
|
-
assert report.snp_data.columns.difference(_fields).empty
|
151
|
-
|
152
|
-
@pytest.mark.parametrize("report", [["AB"]], indirect=True)
|
153
|
-
def test_sample_allele_list2(self, report: FinalReport) -> None:
|
154
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
155
|
-
|
156
|
-
_fields = [
|
157
|
-
'SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB',
|
158
|
-
'GC Score', 'X', 'Y'
|
159
|
-
]
|
160
|
-
|
161
|
-
assert report.snp_data.columns.difference(_fields).empty
|
162
|
-
|
163
|
-
@pytest.mark.parametrize("report", [("AB", "Top")], indirect=True)
|
164
|
-
def test_sample_allele_tuple(self, report: FinalReport) -> None:
|
165
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
166
|
-
|
167
|
-
_fields = [
|
168
|
-
'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
|
169
|
-
'Allele1 - AB', 'Allele2 - AB', 'GC Score', 'X', 'Y'
|
170
|
-
]
|
171
|
-
|
172
|
-
assert report.snp_data.columns.difference(_fields).empty
|
173
|
-
|
174
|
-
@pytest.mark.parametrize("report", [{"AB", "Top"}], indirect=True)
|
175
|
-
def test_sample_allele_set(self, report: FinalReport) -> None:
|
176
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
177
|
-
|
178
|
-
_fields = [
|
179
|
-
'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
|
180
|
-
'Allele1 - AB', 'Allele2 - AB', 'GC Score', 'X', 'Y'
|
181
|
-
]
|
182
|
-
|
183
|
-
assert report.snp_data.columns.difference(_fields).empty
|
184
|
-
|
185
|
-
@pytest.mark.parametrize("report", ["GG"], indirect=True)
|
186
|
-
def test_sample_allele_not_exist(self, report: FinalReport) -> None:
|
187
|
-
|
188
|
-
with pytest.raises(
|
189
|
-
Exception, match="Error. Allele GG not in data."
|
190
|
-
):
|
191
|
-
report.handle(DIR_FILES / "fr/file4.txt", None)
|
192
|
-
|
193
|
-
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
194
|
-
def test_7(self, report: FinalReport) -> None:
|
195
|
-
|
196
|
-
with pytest.raises(
|
197
|
-
Exception, match="Error. Unique keys contain Cyrillic alphabet."
|
198
|
-
):
|
199
|
-
report.handle(
|
200
|
-
DIR_FILES / "fr/file7.txt", DIR_FILES / "fr/file7.xlsx"
|
201
|
-
)
|
202
|
-
|
203
|
-
# assert not report.snp_data.empty
|
204
|
-
#
|
205
|
-
# @pytest.mark.parametrize("report", ["AB"], indirect=True)
|
206
|
-
# def test_8(self, report: FinalReport) -> None:
|
207
|
-
# ...
|
208
|
-
#
|
209
|
-
# @pytest.mark.parametrize("report", ["AB"], indirect=True)
|
210
|
-
# def test_9(self, report: FinalReport) -> None:
|
211
|
-
# ...
|
212
|
-
#
|
213
|
-
# @pytest.mark.parametrize("report", ["AB"], indirect=True)
|
214
|
-
# def test_10(self, report: FinalReport) -> None:
|
215
|
-
# ...
|
snplib/format/tests/__init__.py
DELETED
@@ -1,121 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_FILES
|
6
|
-
from .. import make_fam
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data_fam(request) -> pd.DataFrame | None:
|
14
|
-
return pd.read_pickle(DIR_FILES / f"fplink/fam/{request.param}")
|
15
|
-
|
16
|
-
|
17
|
-
class TestPlinkFormatPed(object):
|
18
|
-
|
19
|
-
@pytest.mark.parametrize("data_fam", ["file.pl"], indirect=True)
|
20
|
-
def test_fam_true(self, data_fam: pd.DataFrame) -> None:
|
21
|
-
assert not make_fam(
|
22
|
-
data_fam,
|
23
|
-
"SAMPLE_ID",
|
24
|
-
"SAMPLE_ID"
|
25
|
-
).empty
|
26
|
-
|
27
|
-
assert not make_fam(
|
28
|
-
data_fam,
|
29
|
-
"SAMPLE_ID",
|
30
|
-
"SAMPLE_ID"
|
31
|
-
).empty
|
32
|
-
|
33
|
-
def test_fam_empty(self) -> None:
|
34
|
-
assert make_fam(
|
35
|
-
pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
|
36
|
-
"SAMPLE_ID",
|
37
|
-
).empty
|
38
|
-
|
39
|
-
assert make_fam(
|
40
|
-
pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
|
41
|
-
"SAMPLE_ID",
|
42
|
-
"SAMPLE_ID",
|
43
|
-
).empty
|
44
|
-
|
45
|
-
@pytest.mark.parametrize("data_fam", ["file.pl"], indirect=True)
|
46
|
-
def test_fam_raise_columns(self, data_fam: pd.DataFrame) -> None:
|
47
|
-
# SID_COL
|
48
|
-
with pytest.raises(
|
49
|
-
KeyError, match="Data has not in name columns SAMPLE_ID1!"
|
50
|
-
):
|
51
|
-
make_fam(
|
52
|
-
data_fam,
|
53
|
-
"SAMPLE_ID1",
|
54
|
-
"SAMPLE_ID",
|
55
|
-
)
|
56
|
-
|
57
|
-
# FID_COL
|
58
|
-
with pytest.raises(
|
59
|
-
KeyError, match="Data has not in name columns SAMPLE_ID1!"
|
60
|
-
):
|
61
|
-
make_fam(
|
62
|
-
data_fam,
|
63
|
-
"SAMPLE_ID",
|
64
|
-
"SAMPLE_ID1"
|
65
|
-
)
|
66
|
-
|
67
|
-
@pytest.mark.parametrize("data_fam", ["file2.pl"], indirect=True)
|
68
|
-
def test_fam_raises_underscope_sid(self, data_fam: pd.DataFrame) -> None:
|
69
|
-
|
70
|
-
# SID_COL
|
71
|
-
with pytest.raises(
|
72
|
-
Exception,
|
73
|
-
match="Replace in 'Sample ID' columns '_' on another a simbols"
|
74
|
-
):
|
75
|
-
make_fam(
|
76
|
-
data_fam,
|
77
|
-
"SAMPLE_ID",
|
78
|
-
"SAMPLE_ID"
|
79
|
-
)
|
80
|
-
|
81
|
-
@pytest.mark.parametrize("data_fam", ["file3.pl"], indirect=True)
|
82
|
-
def test_fam_raises_underscope_fid(self, data_fam: pd.DataFrame) -> None:
|
83
|
-
|
84
|
-
# FID_COL
|
85
|
-
with pytest.raises(
|
86
|
-
Exception,
|
87
|
-
match="Replace in 'Family ID' columns '_' on another a simbols"
|
88
|
-
):
|
89
|
-
make_fam(
|
90
|
-
data_fam,
|
91
|
-
"SAMPLE_ID",
|
92
|
-
"FAMILY_ID"
|
93
|
-
)
|
94
|
-
|
95
|
-
@pytest.mark.parametrize("data_fam", ["file4.pl"], indirect=True)
|
96
|
-
def test_fam_check_data(self, data_fam: pd.DataFrame) -> None:
|
97
|
-
res = make_fam(
|
98
|
-
data_fam,
|
99
|
-
"SAMPLE_ID",
|
100
|
-
"FAMILY_ID",
|
101
|
-
father_col="father",
|
102
|
-
mother_col="mother",
|
103
|
-
sex_col="sex",
|
104
|
-
pheno_col="pheno"
|
105
|
-
)
|
106
|
-
|
107
|
-
res2 = make_fam(
|
108
|
-
data_fam,
|
109
|
-
"SAMPLE_ID",
|
110
|
-
"FAMILY_ID",
|
111
|
-
)
|
112
|
-
|
113
|
-
assert all(res.father.values == list('1234'))
|
114
|
-
assert all(res.mother.values == list('5678'))
|
115
|
-
assert all(res.sex.values == list('1210'))
|
116
|
-
assert all(res.pheno.values == ['12', '13', '14', '15'])
|
117
|
-
|
118
|
-
assert all(res2.father.values == list('0000'))
|
119
|
-
assert all(res2.mother.values == list('0000'))
|
120
|
-
assert all(res2.sex.values == list('0000'))
|
121
|
-
assert all(res2.pheno.values == ['-9', '-9', '-9', '-9'])
|
@@ -1,106 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from . import DIR_FILES
|
6
|
-
from .. import make_lgen
|
7
|
-
|
8
|
-
import pytest
|
9
|
-
import pandas as pd
|
10
|
-
|
11
|
-
|
12
|
-
@pytest.fixture
|
13
|
-
def data_lgen(request) -> pd.DataFrame:
|
14
|
-
return pd.read_pickle(DIR_FILES / f"fplink/lgen/{request.param}")
|
15
|
-
|
16
|
-
|
17
|
-
class TestPlinkFormatLgen(object):
|
18
|
-
|
19
|
-
@pytest.mark.parametrize("data_lgen", ["file.pl"], indirect=True)
|
20
|
-
def test_lgen_true(self, data_lgen: pd.DataFrame) -> None:
|
21
|
-
assert not make_lgen(
|
22
|
-
data_lgen,
|
23
|
-
"Sample ID",
|
24
|
-
"SNP Name",
|
25
|
-
["Allele1 - AB", "Allele2 - AB"]
|
26
|
-
).empty
|
27
|
-
|
28
|
-
def test_lgen_empty(self) -> None:
|
29
|
-
assert make_lgen(
|
30
|
-
pd.DataFrame(columns=[
|
31
|
-
"Sample ID", "SNP Name", "Allele1 - AB", "Allele2 - AB"
|
32
|
-
]),
|
33
|
-
"Sample ID",
|
34
|
-
"SNP Name",
|
35
|
-
["Allele1 - AB", "Allele2 - AB"]
|
36
|
-
).empty
|
37
|
-
|
38
|
-
@pytest.mark.parametrize("data_lgen", ["file.pl"], indirect=True)
|
39
|
-
def test_lgen_raise_columns(self, data_lgen: pd.DataFrame) -> None:
|
40
|
-
|
41
|
-
with pytest.raises(
|
42
|
-
Exception,
|
43
|
-
match="Replace in 'Sample ID' columns '_' on another a simbols"
|
44
|
-
):
|
45
|
-
res1 = data_lgen.copy(deep=True)
|
46
|
-
res1["Sample ID"] = res1["Sample ID"] + "_"
|
47
|
-
|
48
|
-
make_lgen(
|
49
|
-
res1,
|
50
|
-
"Sample ID",
|
51
|
-
"SNP Name",
|
52
|
-
["Allele1 - AB", "Allele2 - AB"]
|
53
|
-
)
|
54
|
-
|
55
|
-
with pytest.raises(
|
56
|
-
Exception,
|
57
|
-
match="Replace in 'Family ID' columns '_' on another a simbols"
|
58
|
-
):
|
59
|
-
res1 = data_lgen.copy(deep=True)
|
60
|
-
res1["Family ID"] = res1["Sample ID"] + "_"
|
61
|
-
|
62
|
-
make_lgen(
|
63
|
-
res1,
|
64
|
-
"Sample ID",
|
65
|
-
"SNP Name",
|
66
|
-
["Allele1 - AB", "Allele2 - AB"],
|
67
|
-
fid_col="Family ID"
|
68
|
-
)
|
69
|
-
|
70
|
-
# SID
|
71
|
-
with pytest.raises(KeyError):
|
72
|
-
make_lgen(
|
73
|
-
data_lgen,
|
74
|
-
"Sample ID1",
|
75
|
-
"SNP Name",
|
76
|
-
["Allele1 - AB", "Allele2 - AB"],
|
77
|
-
fid_col="Family ID"
|
78
|
-
)
|
79
|
-
|
80
|
-
# FID_COL
|
81
|
-
with pytest.raises(KeyError):
|
82
|
-
make_lgen(
|
83
|
-
data_lgen,
|
84
|
-
"Sample ID",
|
85
|
-
"SNP Name",
|
86
|
-
["Allele1 - AB", "Allele2 - AB"],
|
87
|
-
fid_col="Family ID"
|
88
|
-
)
|
89
|
-
|
90
|
-
# SNP name
|
91
|
-
with pytest.raises(KeyError):
|
92
|
-
make_lgen(
|
93
|
-
data_lgen,
|
94
|
-
"Sample ID",
|
95
|
-
"SNP Name1",
|
96
|
-
["Allele1 - AB", "Allele2 - AB"]
|
97
|
-
)
|
98
|
-
|
99
|
-
# Alleles
|
100
|
-
with pytest.raises(KeyError):
|
101
|
-
make_lgen(
|
102
|
-
data_lgen,
|
103
|
-
"Sample ID",
|
104
|
-
"SNP Name",
|
105
|
-
["Allele1 - AB1", "Allele2 - AB1"]
|
106
|
-
)
|