snplib 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. snplib/__init__.py +8 -8
  2. snplib/finalreport/__init__.py +7 -7
  3. snplib/finalreport/_finalreport.py +251 -251
  4. snplib/format/__init__.py +19 -19
  5. snplib/format/__settings.py +7 -7
  6. snplib/format/_plink.py +305 -305
  7. snplib/format/_snp.py +113 -113
  8. snplib/parentage/__init__.py +15 -15
  9. snplib/parentage/_discov.py +102 -102
  10. snplib/parentage/_isagmark.py +15 -15
  11. snplib/parentage/_verif.py +91 -91
  12. snplib/parentage/isag_disc.pl +0 -0
  13. snplib/parentage/isag_verif.pl +0 -0
  14. snplib/statistics/__init__.py +16 -16
  15. snplib/statistics/_callrate.py +59 -59
  16. snplib/statistics/_freq.py +67 -67
  17. snplib/statistics/_snphwe.py +132 -132
  18. {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/LICENSE +674 -674
  19. {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/METADATA +80 -97
  20. snplib-1.0.8.dist-info/RECORD +22 -0
  21. snplib/finalreport/tests/__init__.py +0 -7
  22. snplib/finalreport/tests/test_finalreport.py +0 -215
  23. snplib/format/tests/__init__.py +0 -7
  24. snplib/format/tests/test_plink_fam.py +0 -121
  25. snplib/format/tests/test_plink_lgen.py +0 -106
  26. snplib/format/tests/test_plink_map.py +0 -42
  27. snplib/format/tests/test_plink_ped.py +0 -136
  28. snplib/format/tests/test_snp.py +0 -128
  29. snplib/parentage/tests/__init__.py +0 -7
  30. snplib/parentage/tests/test_discov.py +0 -164
  31. snplib/parentage/tests/test_verif.py +0 -160
  32. snplib/statistics/tests/__init__.py +0 -7
  33. snplib/statistics/tests/test_callrate.py +0 -171
  34. snplib/statistics/tests/test_freq_allele.py +0 -87
  35. snplib/statistics/tests/test_freq_maf.py +0 -17
  36. snplib/statistics/tests/test_hwe_t.py +0 -41
  37. snplib/statistics/tests/test_snphwe.py +0 -41
  38. snplib-1.0.7.dist-info/RECORD +0 -37
  39. {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/WHEEL +0 -0
  40. {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/top_level.txt +0 -0
@@ -1,97 +1,80 @@
1
- Metadata-Version: 2.2
2
- Name: snplib
3
- Version: 1.0.7
4
- Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
- Author-email: Igor <igor.loschinin@gmail.com>
6
- License: GNU
7
- Project-URL: Homepage, https://github.com/IgorekLoschinin/snptools
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.10
11
- Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
- Requires-Dist: numpy>=1.26.1
14
- Requires-Dist: pandas>=2.1.1
15
- Requires-Dist: six>=1.16.0
16
- Requires-Dist: swifter>=1.4.0
17
- Requires-Dist: xlrd>=2.0.1
18
- Requires-Dist: XlsxWriter>=3.1.9
19
- Requires-Dist: openpyxl>=3.1.2
20
- Requires-Dist: pydantic>=2.4.2
21
- Requires-Dist: pytest>=7.4.2
22
-
23
- # snptools
24
-
25
- **Snptools** is a tool for SNP (Single Nucleotide Polymorphism) data processing,
26
- parentage calculation and call rate estimation.
27
-
28
- ## Introduction
29
-
30
- SNP (Single Nucleotide Polymorphism) represent genetic variations, that can
31
- be used to analyze genetic data. SNPTools provides a set of tools for working
32
- with SNP data, including the following capabilities:
33
-
34
- - SNP data processing - FinalReport.
35
- - Parentage Verification and Parentage Discovery Based on SNP Genotypes (ICAR).
36
- - Call rate estimation (percentage of missing data).
37
- - Processing and preparation of data in plink formats.
38
-
39
- ## Installation
40
-
41
- To install SNPTools, follow the steps below:
42
-
43
- 1. Clone the repository into your project directory:
44
- ```
45
- git clone https://github.com/yourusername/snpTools.git
46
- ```
47
- 2. Set dependencies:
48
- ```
49
- pip install -r requirements.txt
50
- ```
51
- 3. Use SNPTools:
52
- ```
53
- import snptools
54
- ```
55
-
56
- or
57
-
58
- You can install snptools via pip from [PyPI](https://pypi.org/project/snplib/):
59
- ```
60
- pip install snplib
61
- ```
62
-
63
-
64
- ## Usage
65
- Snptools provides commands for a variety of operations. Here are examples of
66
- usage:
67
-
68
- #### SNP data processing:
69
- ```
70
- from snptools import FinalReport
71
- ```
72
-
73
- #### Computation of parentage:
74
- ```
75
- from snptools import Discovery, Verification
76
- ```
77
-
78
- #### Preparation format files:
79
- ```
80
- from snptools import (
81
- Snp, make_fam, make_ped, make_lgen, make_map
82
- )
83
- ```
84
-
85
- #### Stat:
86
- ```
87
- from snptools import (
88
- hwe, hwe_test, call_rate, allele_freq, minor_allele_freq
89
- )
90
- ```
91
-
92
- ## Documentation
93
- Detailed documentation on how to use SNPTools is available see the [docs](./docs/_build/index.html).
94
-
95
- ## License
96
- This project is licensed under the GNU General Public License - see the
97
- LICENSE file for details.
1
+ Metadata-Version: 2.2
2
+ Name: snplib
3
+ Version: 1.0.8
4
+ Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
+ Author-email: Igor <igor.loschinin@gmail.com>
6
+ License: GNU
7
+ Project-URL: Homepage, https://github.com/IgorekLoschinin/snptools
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: numpy>=1.26.1
14
+ Requires-Dist: pandas>=2.1.1
15
+ Requires-Dist: six>=1.16.0
16
+ Requires-Dist: swifter>=1.4.0
17
+ Requires-Dist: xlrd>=2.0.1
18
+ Requires-Dist: XlsxWriter>=3.1.9
19
+ Requires-Dist: openpyxl>=3.1.2
20
+ Requires-Dist: pydantic>=2.4.2
21
+
22
+ # snptools
23
+ <p align="center">
24
+ <img width="150" height="150" src="./iconlib.png">
25
+ </p>
26
+
27
+ **Snptools** is a tool for SNP (Single Nucleotide Polymorphism) data processing,
28
+ parentage calculation and call rate estimation.
29
+
30
+ ## Introduction
31
+
32
+ SNP (Single Nucleotide Polymorphism) represent genetic variations, that can
33
+ be used to analyze genetic data. SNPTools provides a set of tools for working
34
+ with SNP data, including the following capabilities:
35
+
36
+ - SNP data processing - FinalReport.
37
+ - Parentage Verification and Parentage Discovery Based on SNP Genotypes (ICAR).
38
+ - Call rate estimation (percentage of missing data).
39
+ - Processing and preparation of data in plink formats.
40
+
41
+ ## Installation
42
+ You can install snptools via pip from [PyPI](https://pypi.org/project/snplib/):
43
+ ```
44
+ pip install snplib
45
+ ```
46
+
47
+ ## Usage
48
+ Snptools provides commands for a variety of operations. Here are examples of
49
+ usage:
50
+
51
+ #### SNP data processing:
52
+ ```
53
+ from snplib import FinalReport
54
+ ```
55
+
56
+ #### Computation of parentage:
57
+ ```
58
+ from snplib import Discovery, Verification
59
+ ```
60
+
61
+ #### Preparation format files:
62
+ ```
63
+ from snplib import (
64
+ Snp, make_fam, make_ped, make_lgen, make_map
65
+ )
66
+ ```
67
+
68
+ #### Stat:
69
+ ```
70
+ from snplib import (
71
+ hwe, hwe_test, call_rate, allele_freq, minor_allele_freq
72
+ )
73
+ ```
74
+
75
+ ## Documentation
76
+ Detailed documentation on how to use SNPTools is available see the [docs](./docs/_build/index.html).
77
+
78
+ ## License
79
+ This project is licensed under the GNU General Public License - see the
80
+ LICENSE file for details.
@@ -0,0 +1,22 @@
1
+ snplib/__init__.py,sha256=xhjj4ZywdwCq91GBh1zfBP_TwFW26-KpHcCUUVvMdgI,196
2
+ snplib/finalreport/__init__.py,sha256=Yk49x8t-STIfsdP6QLMtaGm1gTj_n-XS8kchPguvW1g,161
3
+ snplib/finalreport/_finalreport.py,sha256=el_d8MVmpic3wKCRJ-J52VZYSmMuNSf4p_tmPkgh0Z0,5876
4
+ snplib/format/__init__.py,sha256=3W_l_sP1u9HV3HWwnsJxPGw9anrVknstqLaJmWQaG0k,261
5
+ snplib/format/__settings.py,sha256=kyAVZ4tiU61sNr3jQhjXbLXRyBA3pjFfCw3fOfSkY14,289
6
+ snplib/format/_plink.py,sha256=Z09IOPACOt3n8CKEVRkE4tLT16I8e_6ZoMaWRxSImrA,10529
7
+ snplib/format/_snp.py,sha256=oI-V4-_w28aX-VxoimywLDDnX6owhdjLbqt9a54_ouU,3172
8
+ snplib/parentage/__init__.py,sha256=bN3mWTxmaFQ1qzRtyMLaAoxfomz6jnoWa-kmnJ9q_fE,280
9
+ snplib/parentage/_discov.py,sha256=qGlNzpl4xKOWRr6-fi1osylzizgiPO8vCus8VE56nec,3180
10
+ snplib/parentage/_isagmark.py,sha256=0xi9YhuIpU7zf16HnWw1XIkcQLk4rTNeAeCE-5p9hQE,356
11
+ snplib/parentage/_verif.py,sha256=VbX46dC4tl4Qeuw65aRg1s6hSn0FI25Hy9-3U_jxmrg,3019
12
+ snplib/parentage/isag_disc.pl,sha256=XzjcsnO_kwPg4WaE2YMuZXBNHQ9ixi6pg5n2mfGOuJU,14219
13
+ snplib/parentage/isag_verif.pl,sha256=e_c4YGd5_JXGWqFQwmcxjp6hEkdcqpK_5y5MqJ8J9YY,8254
14
+ snplib/statistics/__init__.py,sha256=XJFU7mEwAJJ2M187jEkO8rFNYKoxF-g9KF_stS7eFFw,302
15
+ snplib/statistics/_callrate.py,sha256=ghB1EXT5JLQeIEIzh8LjWpqAnhCtCOk6l5ecNMLtQa0,1865
16
+ snplib/statistics/_freq.py,sha256=ZPZBZM3xq9EseOxuMzRVvzkjjFfaaA4ZvF7XI8ctON0,1623
17
+ snplib/statistics/_snphwe.py,sha256=KcoRGwovMCc53-GJ8VfYs_3ZEHObgt8B0EvrW5nFnmM,3353
18
+ snplib-1.0.8.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
19
+ snplib-1.0.8.dist-info/METADATA,sha256=Imm2fupAtiH61erPCanyjqNv50ssw0nLTEDwHJ92KE0,2165
20
+ snplib-1.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
21
+ snplib-1.0.8.dist-info/top_level.txt,sha256=CGCrLXuCSyXPCTwMFQjPxQR7b93FFFft56sAPPun_2g,7
22
+ snplib-1.0.8.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from pathlib import Path
6
-
7
- DIR_FILES = Path(__file__).parent.joinpath("files")
@@ -1,215 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
-
6
- from . import DIR_FILES
7
- from .._finalreport import FinalReport
8
-
9
- import pytest
10
-
11
-
12
- @pytest.fixture
13
- def report(request) -> FinalReport:
14
- return FinalReport(allele=request.param)
15
-
16
-
17
- class TestFinalReport(object):
18
-
19
- @pytest.mark.parametrize("report", [None], indirect=True)
20
- def test_handle_1(self, report: FinalReport) -> None:
21
- """ If both files do not exist """
22
-
23
- assert not report.handle(
24
- DIR_FILES / "fr/f.txt", DIR_FILES / "fr/f.xlsx",
25
- )
26
-
27
- @pytest.mark.parametrize("report", [None], indirect=True)
28
- def test_handle_2(self, report: FinalReport) -> None:
29
- """ If the file to convert does not exist """
30
-
31
- assert not report.handle(
32
- DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/f.xlsx",
33
- )
34
-
35
- @pytest.mark.parametrize("report", [None], indirect=True)
36
- def test_handle_3(self, report: FinalReport) -> None:
37
- """ If the data does not contain header data """
38
-
39
- report.handle(
40
- DIR_FILES / "fr/file2.txt", DIR_FILES / "fr/file2.xlsx",
41
- )
42
-
43
- assert len(report.header) == 0 and not report.snp_data.empty
44
-
45
- @pytest.mark.parametrize("report", [None], indirect=True)
46
- def test_handle_4(self, report: FinalReport) -> None:
47
- """ If the file contains only header and field names """
48
-
49
- report.handle(
50
- DIR_FILES / "fr/file3.txt", DIR_FILES / "fr/file3.xlsx",
51
- )
52
-
53
- assert report.snp_data is not None and report.snp_data.empty
54
-
55
- @pytest.mark.parametrize("report", [None], indirect=True)
56
- def test_handle_5(self, report: FinalReport) -> None:
57
- """ If the data file is empty """
58
-
59
- with pytest.raises(
60
- Exception, match="Not data in file FinalReport.txt"
61
- ):
62
- report.handle(
63
- DIR_FILES / "fr/file5.txt", DIR_FILES / "fr/file5.xlsx",
64
- )
65
-
66
- assert report.snp_data is None
67
-
68
- @pytest.mark.parametrize("report", [None], indirect=True)
69
- def test_handle_6(self, report: FinalReport) -> None:
70
- """ If the conversion file is empty """
71
-
72
- assert report.handle(
73
- DIR_FILES / "fr/file6.txt", DIR_FILES / "fr/file6.xlsx",
74
- )
75
-
76
- assert not report.snp_data.empty
77
- assert len(report.header) != 0
78
-
79
- @pytest.mark.parametrize("report", [None], indirect=True)
80
- def test_handle_7(self, report: FinalReport) -> None:
81
- """ If the data file is not needed to convert ID name """
82
-
83
- report.handle(DIR_FILES / "fr/file4.txt", None)
84
-
85
- assert not report.snp_data.empty
86
- assert len(report.header) != 0
87
-
88
- @pytest.mark.parametrize("report", [None], indirect=True)
89
- def test_handle_8(self, report: FinalReport) -> None:
90
- """ If files exist """
91
-
92
- assert report.handle(
93
- DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx",
94
- )
95
-
96
- @pytest.mark.parametrize("report", [None], indirect=True)
97
- def test_allele_none(self, report: FinalReport) -> None:
98
- report.handle(DIR_FILES / "fr/file4.txt", None)
99
-
100
- _fields = [
101
- 'SNP Name', 'Sample ID', 'Allele1 - Forward', 'Allele2 - Forward',
102
- 'Allele1 - Top', 'Allele2 - Top', 'Allele1 - AB', 'Allele2 - AB',
103
- 'GC Score', 'X', 'Y'
104
- ]
105
-
106
- assert report.snp_data.columns.difference(_fields).empty
107
-
108
- @pytest.mark.parametrize("report", ["AB"], indirect=True)
109
- def test_sample_allele_ab(self, report: FinalReport) -> None:
110
- report.handle(DIR_FILES / "fr/file4.txt", None)
111
-
112
- _fields = [
113
- 'SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB',
114
- 'GC Score', 'X', 'Y'
115
- ]
116
-
117
- assert report.snp_data.columns.difference(_fields).empty
118
-
119
- @pytest.mark.parametrize("report", ["Forward"], indirect=True)
120
- def test_sample_allele_forward(self, report: FinalReport) -> None:
121
- report.handle(DIR_FILES / "fr/file4.txt", None)
122
-
123
- _fields = [
124
- 'SNP Name', 'Sample ID', 'Allele1 - Forward', 'Allele2 - Forward',
125
- 'GC Score', 'X', 'Y'
126
- ]
127
-
128
- assert report.snp_data.columns.difference(_fields).empty
129
-
130
- @pytest.mark.parametrize("report", ["Top"], indirect=True)
131
- def test_sample_allele_top(self, report: FinalReport) -> None:
132
- report.handle(DIR_FILES / "fr/file4.txt", None)
133
-
134
- _fields = [
135
- 'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
136
- 'GC Score', 'X', 'Y'
137
- ]
138
-
139
- assert report.snp_data.columns.difference(_fields).empty
140
-
141
- @pytest.mark.parametrize("report", [["AB", "Top"]], indirect=True)
142
- def test_sample_allele_list1(self, report: FinalReport) -> None:
143
- report.handle(DIR_FILES / "fr/file4.txt", None)
144
-
145
- _fields = [
146
- 'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
147
- 'Allele1 - AB', 'Allele2 - AB', 'GC Score', 'X', 'Y'
148
- ]
149
-
150
- assert report.snp_data.columns.difference(_fields).empty
151
-
152
- @pytest.mark.parametrize("report", [["AB"]], indirect=True)
153
- def test_sample_allele_list2(self, report: FinalReport) -> None:
154
- report.handle(DIR_FILES / "fr/file4.txt", None)
155
-
156
- _fields = [
157
- 'SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB',
158
- 'GC Score', 'X', 'Y'
159
- ]
160
-
161
- assert report.snp_data.columns.difference(_fields).empty
162
-
163
- @pytest.mark.parametrize("report", [("AB", "Top")], indirect=True)
164
- def test_sample_allele_tuple(self, report: FinalReport) -> None:
165
- report.handle(DIR_FILES / "fr/file4.txt", None)
166
-
167
- _fields = [
168
- 'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
169
- 'Allele1 - AB', 'Allele2 - AB', 'GC Score', 'X', 'Y'
170
- ]
171
-
172
- assert report.snp_data.columns.difference(_fields).empty
173
-
174
- @pytest.mark.parametrize("report", [{"AB", "Top"}], indirect=True)
175
- def test_sample_allele_set(self, report: FinalReport) -> None:
176
- report.handle(DIR_FILES / "fr/file4.txt", None)
177
-
178
- _fields = [
179
- 'SNP Name', 'Sample ID', 'Allele1 - Top', 'Allele2 - Top',
180
- 'Allele1 - AB', 'Allele2 - AB', 'GC Score', 'X', 'Y'
181
- ]
182
-
183
- assert report.snp_data.columns.difference(_fields).empty
184
-
185
- @pytest.mark.parametrize("report", ["GG"], indirect=True)
186
- def test_sample_allele_not_exist(self, report: FinalReport) -> None:
187
-
188
- with pytest.raises(
189
- Exception, match="Error. Allele GG not in data."
190
- ):
191
- report.handle(DIR_FILES / "fr/file4.txt", None)
192
-
193
- @pytest.mark.parametrize("report", ["AB"], indirect=True)
194
- def test_7(self, report: FinalReport) -> None:
195
-
196
- with pytest.raises(
197
- Exception, match="Error. Unique keys contain Cyrillic alphabet."
198
- ):
199
- report.handle(
200
- DIR_FILES / "fr/file7.txt", DIR_FILES / "fr/file7.xlsx"
201
- )
202
-
203
- # assert not report.snp_data.empty
204
- #
205
- # @pytest.mark.parametrize("report", ["AB"], indirect=True)
206
- # def test_8(self, report: FinalReport) -> None:
207
- # ...
208
- #
209
- # @pytest.mark.parametrize("report", ["AB"], indirect=True)
210
- # def test_9(self, report: FinalReport) -> None:
211
- # ...
212
- #
213
- # @pytest.mark.parametrize("report", ["AB"], indirect=True)
214
- # def test_10(self, report: FinalReport) -> None:
215
- # ...
@@ -1,7 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from pathlib import Path
6
-
7
- DIR_FILES = Path(__file__).parent.joinpath("files")
@@ -1,121 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from . import DIR_FILES
6
- from .. import make_fam
7
-
8
- import pytest
9
- import pandas as pd
10
-
11
-
12
- @pytest.fixture
13
- def data_fam(request) -> pd.DataFrame | None:
14
- return pd.read_pickle(DIR_FILES / f"fplink/fam/{request.param}")
15
-
16
-
17
- class TestPlinkFormatPed(object):
18
-
19
- @pytest.mark.parametrize("data_fam", ["file.pl"], indirect=True)
20
- def test_fam_true(self, data_fam: pd.DataFrame) -> None:
21
- assert not make_fam(
22
- data_fam,
23
- "SAMPLE_ID",
24
- "SAMPLE_ID"
25
- ).empty
26
-
27
- assert not make_fam(
28
- data_fam,
29
- "SAMPLE_ID",
30
- "SAMPLE_ID"
31
- ).empty
32
-
33
- def test_fam_empty(self) -> None:
34
- assert make_fam(
35
- pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
36
- "SAMPLE_ID",
37
- ).empty
38
-
39
- assert make_fam(
40
- pd.DataFrame(columns=["SAMPLE_ID", "SNP"]),
41
- "SAMPLE_ID",
42
- "SAMPLE_ID",
43
- ).empty
44
-
45
- @pytest.mark.parametrize("data_fam", ["file.pl"], indirect=True)
46
- def test_fam_raise_columns(self, data_fam: pd.DataFrame) -> None:
47
- # SID_COL
48
- with pytest.raises(
49
- KeyError, match="Data has not in name columns SAMPLE_ID1!"
50
- ):
51
- make_fam(
52
- data_fam,
53
- "SAMPLE_ID1",
54
- "SAMPLE_ID",
55
- )
56
-
57
- # FID_COL
58
- with pytest.raises(
59
- KeyError, match="Data has not in name columns SAMPLE_ID1!"
60
- ):
61
- make_fam(
62
- data_fam,
63
- "SAMPLE_ID",
64
- "SAMPLE_ID1"
65
- )
66
-
67
- @pytest.mark.parametrize("data_fam", ["file2.pl"], indirect=True)
68
- def test_fam_raises_underscope_sid(self, data_fam: pd.DataFrame) -> None:
69
-
70
- # SID_COL
71
- with pytest.raises(
72
- Exception,
73
- match="Replace in 'Sample ID' columns '_' on another a simbols"
74
- ):
75
- make_fam(
76
- data_fam,
77
- "SAMPLE_ID",
78
- "SAMPLE_ID"
79
- )
80
-
81
- @pytest.mark.parametrize("data_fam", ["file3.pl"], indirect=True)
82
- def test_fam_raises_underscope_fid(self, data_fam: pd.DataFrame) -> None:
83
-
84
- # FID_COL
85
- with pytest.raises(
86
- Exception,
87
- match="Replace in 'Family ID' columns '_' on another a simbols"
88
- ):
89
- make_fam(
90
- data_fam,
91
- "SAMPLE_ID",
92
- "FAMILY_ID"
93
- )
94
-
95
- @pytest.mark.parametrize("data_fam", ["file4.pl"], indirect=True)
96
- def test_fam_check_data(self, data_fam: pd.DataFrame) -> None:
97
- res = make_fam(
98
- data_fam,
99
- "SAMPLE_ID",
100
- "FAMILY_ID",
101
- father_col="father",
102
- mother_col="mother",
103
- sex_col="sex",
104
- pheno_col="pheno"
105
- )
106
-
107
- res2 = make_fam(
108
- data_fam,
109
- "SAMPLE_ID",
110
- "FAMILY_ID",
111
- )
112
-
113
- assert all(res.father.values == list('1234'))
114
- assert all(res.mother.values == list('5678'))
115
- assert all(res.sex.values == list('1210'))
116
- assert all(res.pheno.values == ['12', '13', '14', '15'])
117
-
118
- assert all(res2.father.values == list('0000'))
119
- assert all(res2.mother.values == list('0000'))
120
- assert all(res2.sex.values == list('0000'))
121
- assert all(res2.pheno.values == ['-9', '-9', '-9', '-9'])
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
- __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
-
5
- from . import DIR_FILES
6
- from .. import make_lgen
7
-
8
- import pytest
9
- import pandas as pd
10
-
11
-
12
- @pytest.fixture
13
- def data_lgen(request) -> pd.DataFrame:
14
- return pd.read_pickle(DIR_FILES / f"fplink/lgen/{request.param}")
15
-
16
-
17
- class TestPlinkFormatLgen(object):
18
-
19
- @pytest.mark.parametrize("data_lgen", ["file.pl"], indirect=True)
20
- def test_lgen_true(self, data_lgen: pd.DataFrame) -> None:
21
- assert not make_lgen(
22
- data_lgen,
23
- "Sample ID",
24
- "SNP Name",
25
- ["Allele1 - AB", "Allele2 - AB"]
26
- ).empty
27
-
28
- def test_lgen_empty(self) -> None:
29
- assert make_lgen(
30
- pd.DataFrame(columns=[
31
- "Sample ID", "SNP Name", "Allele1 - AB", "Allele2 - AB"
32
- ]),
33
- "Sample ID",
34
- "SNP Name",
35
- ["Allele1 - AB", "Allele2 - AB"]
36
- ).empty
37
-
38
- @pytest.mark.parametrize("data_lgen", ["file.pl"], indirect=True)
39
- def test_lgen_raise_columns(self, data_lgen: pd.DataFrame) -> None:
40
-
41
- with pytest.raises(
42
- Exception,
43
- match="Replace in 'Sample ID' columns '_' on another a simbols"
44
- ):
45
- res1 = data_lgen.copy(deep=True)
46
- res1["Sample ID"] = res1["Sample ID"] + "_"
47
-
48
- make_lgen(
49
- res1,
50
- "Sample ID",
51
- "SNP Name",
52
- ["Allele1 - AB", "Allele2 - AB"]
53
- )
54
-
55
- with pytest.raises(
56
- Exception,
57
- match="Replace in 'Family ID' columns '_' on another a simbols"
58
- ):
59
- res1 = data_lgen.copy(deep=True)
60
- res1["Family ID"] = res1["Sample ID"] + "_"
61
-
62
- make_lgen(
63
- res1,
64
- "Sample ID",
65
- "SNP Name",
66
- ["Allele1 - AB", "Allele2 - AB"],
67
- fid_col="Family ID"
68
- )
69
-
70
- # SID
71
- with pytest.raises(KeyError):
72
- make_lgen(
73
- data_lgen,
74
- "Sample ID1",
75
- "SNP Name",
76
- ["Allele1 - AB", "Allele2 - AB"],
77
- fid_col="Family ID"
78
- )
79
-
80
- # FID_COL
81
- with pytest.raises(KeyError):
82
- make_lgen(
83
- data_lgen,
84
- "Sample ID",
85
- "SNP Name",
86
- ["Allele1 - AB", "Allele2 - AB"],
87
- fid_col="Family ID"
88
- )
89
-
90
- # SNP name
91
- with pytest.raises(KeyError):
92
- make_lgen(
93
- data_lgen,
94
- "Sample ID",
95
- "SNP Name1",
96
- ["Allele1 - AB", "Allele2 - AB"]
97
- )
98
-
99
- # Alleles
100
- with pytest.raises(KeyError):
101
- make_lgen(
102
- data_lgen,
103
- "Sample ID",
104
- "SNP Name",
105
- ["Allele1 - AB1", "Allele2 - AB1"]
106
- )