snplib 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snplib/__init__.py +8 -8
- snplib/finalreport/__init__.py +7 -7
- snplib/finalreport/_finalreport.py +251 -251
- snplib/format/__init__.py +19 -19
- snplib/format/__settings.py +7 -7
- snplib/format/_plink.py +305 -305
- snplib/format/_snp.py +113 -113
- snplib/parentage/__init__.py +15 -15
- snplib/parentage/_discov.py +102 -102
- snplib/parentage/_isagmark.py +15 -15
- snplib/parentage/_verif.py +91 -91
- snplib/parentage/isag_disc.pl +0 -0
- snplib/parentage/isag_verif.pl +0 -0
- snplib/statistics/__init__.py +16 -16
- snplib/statistics/_callrate.py +59 -59
- snplib/statistics/_freq.py +67 -67
- snplib/statistics/_snphwe.py +132 -132
- {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/LICENSE +674 -674
- {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/METADATA +80 -97
- snplib-1.0.8.dist-info/RECORD +22 -0
- snplib/finalreport/tests/__init__.py +0 -7
- snplib/finalreport/tests/test_finalreport.py +0 -215
- snplib/format/tests/__init__.py +0 -7
- snplib/format/tests/test_plink_fam.py +0 -121
- snplib/format/tests/test_plink_lgen.py +0 -106
- snplib/format/tests/test_plink_map.py +0 -42
- snplib/format/tests/test_plink_ped.py +0 -136
- snplib/format/tests/test_snp.py +0 -128
- snplib/parentage/tests/__init__.py +0 -7
- snplib/parentage/tests/test_discov.py +0 -164
- snplib/parentage/tests/test_verif.py +0 -160
- snplib/statistics/tests/__init__.py +0 -7
- snplib/statistics/tests/test_callrate.py +0 -171
- snplib/statistics/tests/test_freq_allele.py +0 -87
- snplib/statistics/tests/test_freq_maf.py +0 -17
- snplib/statistics/tests/test_hwe_t.py +0 -41
- snplib/statistics/tests/test_snphwe.py +0 -41
- snplib-1.0.7.dist-info/RECORD +0 -37
- {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/WHEEL +0 -0
- {snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/top_level.txt +0 -0
snplib/__init__.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from .finalreport import *
|
6
|
-
from .format import *
|
7
|
-
from .parentage import *
|
8
|
-
from .statistics import *
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding: utf-8
|
3
|
+
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
+
|
5
|
+
from .finalreport import *
|
6
|
+
from .format import *
|
7
|
+
from .parentage import *
|
8
|
+
from .statistics import *
|
snplib/finalreport/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from ._finalreport import FinalReport
|
6
|
-
|
7
|
-
__all__ = ["FinalReport"]
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding: utf-8
|
3
|
+
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
+
|
5
|
+
from ._finalreport import FinalReport
|
6
|
+
|
7
|
+
__all__ = ["FinalReport"]
|
@@ -1,251 +1,251 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
__all__ = ("FinalReport",)
|
5
|
-
|
6
|
-
from pathlib import Path
|
7
|
-
from functools import reduce
|
8
|
-
|
9
|
-
import re
|
10
|
-
import pandas as pd
|
11
|
-
|
12
|
-
|
13
|
-
class FinalReport(object):
|
14
|
-
""" File that contains SNP information.
|
15
|
-
|
16
|
-
:argument allele: A variant form of a single nucleotide polymorphism
|
17
|
-
(SNP), a specific polymorphic site or a whole gene detectable at
|
18
|
-
a locus. Type: 'AB', 'Forward', 'Top', 'Plus', 'Design'
|
19
|
-
:argument sep: Delimiter to use. Default value: "\\t"
|
20
|
-
|
21
|
-
Example:
|
22
|
-
[Header]
|
23
|
-
GSGT Version 2.0.4
|
24
|
-
Processing Date 10/14/2021 4:02 PM
|
25
|
-
Content BovineSNP50_v3_A1.bpm
|
26
|
-
Num SNPs 53218
|
27
|
-
Total SNPs 53218
|
28
|
-
Num Samples 3
|
29
|
-
Total Samples 3
|
30
|
-
[Data]
|
31
|
-
SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
|
32
|
-
ABCA12 1 A A 0.4048 0.8164
|
33
|
-
APAF1 1 B B 0.9067 0.9155
|
34
|
-
...
|
35
|
-
"""
|
36
|
-
|
37
|
-
__PATTERN_HEADER = re.compile(r'(^\[Header\])')
|
38
|
-
__PATTERN_DATA = re.compile(r'(^\[Data\])')
|
39
|
-
|
40
|
-
def __init__(
|
41
|
-
self,
|
42
|
-
allele: str | list | None = None,
|
43
|
-
sep: str = "\t"
|
44
|
-
) -> None:
|
45
|
-
self._delimiter = sep
|
46
|
-
self._full_data = None
|
47
|
-
|
48
|
-
self.__header = {}
|
49
|
-
self.__snp_data = None
|
50
|
-
self.__allele = allele
|
51
|
-
self._map_rn = None
|
52
|
-
|
53
|
-
@property
|
54
|
-
def header(self) -> dict:
|
55
|
-
return self.__header
|
56
|
-
|
57
|
-
@property
|
58
|
-
def snp_data(self) -> pd.DataFrame | None:
|
59
|
-
return self.__snp_data
|
60
|
-
|
61
|
-
def handle(
|
62
|
-
self, file_rep: Path | str, conv_file: Path | str = None
|
63
|
-
) -> bool:
|
64
|
-
""" Processes the FinalReport.txt file. Highlights meta information
|
65
|
-
and data.
|
66
|
-
|
67
|
-
:param file_rep: The file FinalReport.txt or another name.
|
68
|
-
:param conv_file: The file that contains IDs of registration numbers
|
69
|
-
of animals.
|
70
|
-
:return: Returns true if file processing was successful, false if
|
71
|
-
there were errors.
|
72
|
-
"""
|
73
|
-
|
74
|
-
try:
|
75
|
-
|
76
|
-
if isinstance(file_rep, str):
|
77
|
-
file_rep = Path(file_rep)
|
78
|
-
|
79
|
-
if not file_rep.is_file() and not file_rep.exists():
|
80
|
-
return False
|
81
|
-
|
82
|
-
# Processing conversion file
|
83
|
-
if conv_file is not None:
|
84
|
-
if isinstance(conv_file, str):
|
85
|
-
conv_file = Path(conv_file)
|
86
|
-
|
87
|
-
if not conv_file.is_file() and not conv_file.exists():
|
88
|
-
return False
|
89
|
-
|
90
|
-
self.__convert_s_id(conv_file)
|
91
|
-
|
92
|
-
# Processing report file
|
93
|
-
if not self.read(file_rep):
|
94
|
-
return False
|
95
|
-
|
96
|
-
if self._full_data is None:
|
97
|
-
raise Exception("Not data in file FinalReport.txt")
|
98
|
-
|
99
|
-
self.__handler_header()
|
100
|
-
self.__handler_data()
|
101
|
-
|
102
|
-
if self._map_rn is not None:
|
103
|
-
self.__snp_data['Sample ID'] = \
|
104
|
-
self.__snp_data['Sample ID'].map(
|
105
|
-
dict(zip(self._map_rn.SID, self._map_rn.UNIQ_KEY))
|
106
|
-
)
|
107
|
-
|
108
|
-
except Exception as e:
|
109
|
-
raise e
|
110
|
-
|
111
|
-
return True
|
112
|
-
|
113
|
-
def read(self, file_rep: Path) -> bool:
|
114
|
-
""" Reading data from the final_report file
|
115
|
-
|
116
|
-
:param file_rep: path, pointer to the file to be read.
|
117
|
-
:return: Returns true if the read was successful, false if it failed.
|
118
|
-
"""
|
119
|
-
try:
|
120
|
-
if len(data := file_rep.read_text()) != 0:
|
121
|
-
self._full_data = data.strip().split("\n")
|
122
|
-
return True
|
123
|
-
|
124
|
-
self._full_data = None
|
125
|
-
|
126
|
-
except Exception as e:
|
127
|
-
return False
|
128
|
-
|
129
|
-
return True
|
130
|
-
|
131
|
-
def __handler_header(self) -> None:
|
132
|
-
""" Processes data from a file, selects meta-information. """
|
133
|
-
|
134
|
-
for line in self._full_data:
|
135
|
-
if self.__class__.__PATTERN_DATA.findall(line):
|
136
|
-
return
|
137
|
-
|
138
|
-
if self.__class__.__PATTERN_HEADER.findall(line):
|
139
|
-
continue
|
140
|
-
|
141
|
-
key = line.strip().split("\t")[0]
|
142
|
-
value = line.strip().split("\t")[1]
|
143
|
-
|
144
|
-
self.__header[key] = value
|
145
|
-
|
146
|
-
def __handler_data(self) -> None:
|
147
|
-
""" Processes data and forms an array for further processing. """
|
148
|
-
|
149
|
-
temp = 1
|
150
|
-
for line in self._full_data:
|
151
|
-
if self.__class__.__PATTERN_DATA.findall(line):
|
152
|
-
break
|
153
|
-
temp += 1
|
154
|
-
|
155
|
-
names_col = self.__sample_by_allele(
|
156
|
-
self._full_data[temp].split(f"{self._delimiter}")
|
157
|
-
)
|
158
|
-
|
159
|
-
if names_col is None:
|
160
|
-
raise Exception(f"Error. Allele {self.__allele} not in data.")
|
161
|
-
|
162
|
-
self.__snp_data = pd.DataFrame(
|
163
|
-
[
|
164
|
-
item_data.split(f"{self._delimiter}")
|
165
|
-
for item_data in self._full_data[temp + 1:]
|
166
|
-
],
|
167
|
-
columns=self._full_data[temp].split(f"{self._delimiter}")
|
168
|
-
)[names_col]
|
169
|
-
|
170
|
-
def __sample_by_allele(self, names: list[str]) -> list[str] | None:
|
171
|
-
""" Method that generates a list of field names choosing which alleles
|
172
|
-
to keep
|
173
|
-
|
174
|
-
:param names: List of field names in the report file.
|
175
|
-
:return: Returns a filtered list of fields by alleles.
|
176
|
-
"""
|
177
|
-
|
178
|
-
allele_templ = r'(^Allele\d\s[:-]\s{}\b)'
|
179
|
-
|
180
|
-
match self.__allele:
|
181
|
-
case None:
|
182
|
-
return names
|
183
|
-
|
184
|
-
case str():
|
185
|
-
allele_pattern = re.compile(
|
186
|
-
allele_templ.format(self.__allele)
|
187
|
-
)
|
188
|
-
|
189
|
-
case list() | tuple() | set():
|
190
|
-
allele_pattern = re.compile(
|
191
|
-
allele_templ.format("|".join(self.__allele))
|
192
|
-
)
|
193
|
-
case _:
|
194
|
-
return None
|
195
|
-
|
196
|
-
lst_allele = reduce(
|
197
|
-
lambda i, j: i + j,
|
198
|
-
[allele_pattern.findall(item) for item in names]
|
199
|
-
)
|
200
|
-
|
201
|
-
if len(lst_allele) == 0:
|
202
|
-
return None
|
203
|
-
|
204
|
-
exclude_alleles = [
|
205
|
-
item for item in names
|
206
|
-
if item.startswith("Allele") and item not in lst_allele
|
207
|
-
]
|
208
|
-
|
209
|
-
return list(filter(
|
210
|
-
lambda x: True if x not in exclude_alleles else False, names
|
211
|
-
))
|
212
|
-
|
213
|
-
def __convert_s_id(self, path_file: Path) -> None:
|
214
|
-
"""Converts sample id which is in FinalReport to animal registration
|
215
|
-
number.
|
216
|
-
|
217
|
-
:param path_file: xlsx file with animal numbers label
|
218
|
-
"""
|
219
|
-
|
220
|
-
self._map_rn = pd.read_excel(
|
221
|
-
path_file,
|
222
|
-
header=None,
|
223
|
-
names=['SID', 'UNIQ_KEY', 'SEX'],
|
224
|
-
dtype={'SID': str},
|
225
|
-
index_col=False
|
226
|
-
)
|
227
|
-
|
228
|
-
if self._map_rn.empty:
|
229
|
-
self._map_rn = None
|
230
|
-
return
|
231
|
-
|
232
|
-
self._map_rn.SID = self._map_rn.SID.str.strip()
|
233
|
-
self._map_rn.UNIQ_KEY = self._map_rn.UNIQ_KEY.str.strip()
|
234
|
-
|
235
|
-
if self._check_on_ru_symbols(self._map_rn.UNIQ_KEY):
|
236
|
-
raise Exception("Error. Unique keys contain Cyrillic alphabet.")
|
237
|
-
|
238
|
-
if self._map_rn.UNIQ_KEY.isna().any():
|
239
|
-
self._map_rn.fillna('unknown', inplace=True)
|
240
|
-
|
241
|
-
@staticmethod
|
242
|
-
def _check_on_ru_symbols(seq: pd.Series) -> bool | None:
|
243
|
-
"""
|
244
|
-
|
245
|
-
:param seq:
|
246
|
-
:return:
|
247
|
-
"""
|
248
|
-
|
249
|
-
return any(seq.apply(lambda x: bool(re.search('[а-яА-Я]', x))))
|
250
|
-
|
251
|
-
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding: utf-8
|
3
|
+
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
+
__all__ = ("FinalReport",)
|
5
|
+
|
6
|
+
from pathlib import Path
|
7
|
+
from functools import reduce
|
8
|
+
|
9
|
+
import re
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
|
13
|
+
class FinalReport(object):
|
14
|
+
""" File that contains SNP information.
|
15
|
+
|
16
|
+
:argument allele: A variant form of a single nucleotide polymorphism
|
17
|
+
(SNP), a specific polymorphic site or a whole gene detectable at
|
18
|
+
a locus. Type: 'AB', 'Forward', 'Top', 'Plus', 'Design'
|
19
|
+
:argument sep: Delimiter to use. Default value: "\\t"
|
20
|
+
|
21
|
+
Example:
|
22
|
+
[Header]
|
23
|
+
GSGT Version 2.0.4
|
24
|
+
Processing Date 10/14/2021 4:02 PM
|
25
|
+
Content BovineSNP50_v3_A1.bpm
|
26
|
+
Num SNPs 53218
|
27
|
+
Total SNPs 53218
|
28
|
+
Num Samples 3
|
29
|
+
Total Samples 3
|
30
|
+
[Data]
|
31
|
+
SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
|
32
|
+
ABCA12 1 A A 0.4048 0.8164
|
33
|
+
APAF1 1 B B 0.9067 0.9155
|
34
|
+
...
|
35
|
+
"""
|
36
|
+
|
37
|
+
__PATTERN_HEADER = re.compile(r'(^\[Header\])')
|
38
|
+
__PATTERN_DATA = re.compile(r'(^\[Data\])')
|
39
|
+
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
allele: str | list | None = None,
|
43
|
+
sep: str = "\t"
|
44
|
+
) -> None:
|
45
|
+
self._delimiter = sep
|
46
|
+
self._full_data = None
|
47
|
+
|
48
|
+
self.__header = {}
|
49
|
+
self.__snp_data = None
|
50
|
+
self.__allele = allele
|
51
|
+
self._map_rn = None
|
52
|
+
|
53
|
+
@property
|
54
|
+
def header(self) -> dict:
|
55
|
+
return self.__header
|
56
|
+
|
57
|
+
@property
|
58
|
+
def snp_data(self) -> pd.DataFrame | None:
|
59
|
+
return self.__snp_data
|
60
|
+
|
61
|
+
def handle(
|
62
|
+
self, file_rep: Path | str, conv_file: Path | str = None
|
63
|
+
) -> bool:
|
64
|
+
""" Processes the FinalReport.txt file. Highlights meta information
|
65
|
+
and data.
|
66
|
+
|
67
|
+
:param file_rep: The file FinalReport.txt or another name.
|
68
|
+
:param conv_file: The file that contains IDs of registration numbers
|
69
|
+
of animals.
|
70
|
+
:return: Returns true if file processing was successful, false if
|
71
|
+
there were errors.
|
72
|
+
"""
|
73
|
+
|
74
|
+
try:
|
75
|
+
|
76
|
+
if isinstance(file_rep, str):
|
77
|
+
file_rep = Path(file_rep)
|
78
|
+
|
79
|
+
if not file_rep.is_file() and not file_rep.exists():
|
80
|
+
return False
|
81
|
+
|
82
|
+
# Processing conversion file
|
83
|
+
if conv_file is not None:
|
84
|
+
if isinstance(conv_file, str):
|
85
|
+
conv_file = Path(conv_file)
|
86
|
+
|
87
|
+
if not conv_file.is_file() and not conv_file.exists():
|
88
|
+
return False
|
89
|
+
|
90
|
+
self.__convert_s_id(conv_file)
|
91
|
+
|
92
|
+
# Processing report file
|
93
|
+
if not self.read(file_rep):
|
94
|
+
return False
|
95
|
+
|
96
|
+
if self._full_data is None:
|
97
|
+
raise Exception("Not data in file FinalReport.txt")
|
98
|
+
|
99
|
+
self.__handler_header()
|
100
|
+
self.__handler_data()
|
101
|
+
|
102
|
+
if self._map_rn is not None:
|
103
|
+
self.__snp_data['Sample ID'] = \
|
104
|
+
self.__snp_data['Sample ID'].map(
|
105
|
+
dict(zip(self._map_rn.SID, self._map_rn.UNIQ_KEY))
|
106
|
+
)
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
raise e
|
110
|
+
|
111
|
+
return True
|
112
|
+
|
113
|
+
def read(self, file_rep: Path) -> bool:
|
114
|
+
""" Reading data from the final_report file
|
115
|
+
|
116
|
+
:param file_rep: path, pointer to the file to be read.
|
117
|
+
:return: Returns true if the read was successful, false if it failed.
|
118
|
+
"""
|
119
|
+
try:
|
120
|
+
if len(data := file_rep.read_text()) != 0:
|
121
|
+
self._full_data = data.strip().split("\n")
|
122
|
+
return True
|
123
|
+
|
124
|
+
self._full_data = None
|
125
|
+
|
126
|
+
except Exception as e:
|
127
|
+
return False
|
128
|
+
|
129
|
+
return True
|
130
|
+
|
131
|
+
def __handler_header(self) -> None:
|
132
|
+
""" Processes data from a file, selects meta-information. """
|
133
|
+
|
134
|
+
for line in self._full_data:
|
135
|
+
if self.__class__.__PATTERN_DATA.findall(line):
|
136
|
+
return
|
137
|
+
|
138
|
+
if self.__class__.__PATTERN_HEADER.findall(line):
|
139
|
+
continue
|
140
|
+
|
141
|
+
key = line.strip().split("\t")[0]
|
142
|
+
value = line.strip().split("\t")[1]
|
143
|
+
|
144
|
+
self.__header[key] = value
|
145
|
+
|
146
|
+
def __handler_data(self) -> None:
|
147
|
+
""" Processes data and forms an array for further processing. """
|
148
|
+
|
149
|
+
temp = 1
|
150
|
+
for line in self._full_data:
|
151
|
+
if self.__class__.__PATTERN_DATA.findall(line):
|
152
|
+
break
|
153
|
+
temp += 1
|
154
|
+
|
155
|
+
names_col = self.__sample_by_allele(
|
156
|
+
self._full_data[temp].split(f"{self._delimiter}")
|
157
|
+
)
|
158
|
+
|
159
|
+
if names_col is None:
|
160
|
+
raise Exception(f"Error. Allele {self.__allele} not in data.")
|
161
|
+
|
162
|
+
self.__snp_data = pd.DataFrame(
|
163
|
+
[
|
164
|
+
item_data.split(f"{self._delimiter}")
|
165
|
+
for item_data in self._full_data[temp + 1:]
|
166
|
+
],
|
167
|
+
columns=self._full_data[temp].split(f"{self._delimiter}")
|
168
|
+
)[names_col]
|
169
|
+
|
170
|
+
def __sample_by_allele(self, names: list[str]) -> list[str] | None:
|
171
|
+
""" Method that generates a list of field names choosing which alleles
|
172
|
+
to keep
|
173
|
+
|
174
|
+
:param names: List of field names in the report file.
|
175
|
+
:return: Returns a filtered list of fields by alleles.
|
176
|
+
"""
|
177
|
+
|
178
|
+
allele_templ = r'(^Allele\d\s[:-]\s{}\b)'
|
179
|
+
|
180
|
+
match self.__allele:
|
181
|
+
case None:
|
182
|
+
return names
|
183
|
+
|
184
|
+
case str():
|
185
|
+
allele_pattern = re.compile(
|
186
|
+
allele_templ.format(self.__allele)
|
187
|
+
)
|
188
|
+
|
189
|
+
case list() | tuple() | set():
|
190
|
+
allele_pattern = re.compile(
|
191
|
+
allele_templ.format("|".join(self.__allele))
|
192
|
+
)
|
193
|
+
case _:
|
194
|
+
return None
|
195
|
+
|
196
|
+
lst_allele = reduce(
|
197
|
+
lambda i, j: i + j,
|
198
|
+
[allele_pattern.findall(item) for item in names]
|
199
|
+
)
|
200
|
+
|
201
|
+
if len(lst_allele) == 0:
|
202
|
+
return None
|
203
|
+
|
204
|
+
exclude_alleles = [
|
205
|
+
item for item in names
|
206
|
+
if item.startswith("Allele") and item not in lst_allele
|
207
|
+
]
|
208
|
+
|
209
|
+
return list(filter(
|
210
|
+
lambda x: True if x not in exclude_alleles else False, names
|
211
|
+
))
|
212
|
+
|
213
|
+
def __convert_s_id(self, path_file: Path) -> None:
|
214
|
+
"""Converts sample id which is in FinalReport to animal registration
|
215
|
+
number.
|
216
|
+
|
217
|
+
:param path_file: xlsx file with animal numbers label
|
218
|
+
"""
|
219
|
+
|
220
|
+
self._map_rn = pd.read_excel(
|
221
|
+
path_file,
|
222
|
+
header=None,
|
223
|
+
names=['SID', 'UNIQ_KEY', 'SEX'],
|
224
|
+
dtype={'SID': str},
|
225
|
+
index_col=False
|
226
|
+
)
|
227
|
+
|
228
|
+
if self._map_rn.empty:
|
229
|
+
self._map_rn = None
|
230
|
+
return
|
231
|
+
|
232
|
+
self._map_rn.SID = self._map_rn.SID.str.strip()
|
233
|
+
self._map_rn.UNIQ_KEY = self._map_rn.UNIQ_KEY.str.strip()
|
234
|
+
|
235
|
+
if self._check_on_ru_symbols(self._map_rn.UNIQ_KEY):
|
236
|
+
raise Exception("Error. Unique keys contain Cyrillic alphabet.")
|
237
|
+
|
238
|
+
if self._map_rn.UNIQ_KEY.isna().any():
|
239
|
+
self._map_rn.fillna('unknown', inplace=True)
|
240
|
+
|
241
|
+
@staticmethod
|
242
|
+
def _check_on_ru_symbols(seq: pd.Series) -> bool | None:
|
243
|
+
"""
|
244
|
+
|
245
|
+
:param seq:
|
246
|
+
:return:
|
247
|
+
"""
|
248
|
+
|
249
|
+
return any(seq.apply(lambda x: bool(re.search('[а-яА-Я]', x))))
|
250
|
+
|
251
|
+
|
snplib/format/__init__.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
from ._snp import Snp
|
6
|
-
from ._plink import (
|
7
|
-
make_map,
|
8
|
-
make_ped,
|
9
|
-
make_lgen,
|
10
|
-
make_fam
|
11
|
-
)
|
12
|
-
|
13
|
-
__all__ = [
|
14
|
-
"Snp",
|
15
|
-
"make_map",
|
16
|
-
"make_ped",
|
17
|
-
"make_fam",
|
18
|
-
"make_lgen"
|
19
|
-
]
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding: utf-8
|
3
|
+
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
+
|
5
|
+
from ._snp import Snp
|
6
|
+
from ._plink import (
|
7
|
+
make_map,
|
8
|
+
make_ped,
|
9
|
+
make_lgen,
|
10
|
+
make_fam
|
11
|
+
)
|
12
|
+
|
13
|
+
__all__ = [
|
14
|
+
"Snp",
|
15
|
+
"make_map",
|
16
|
+
"make_ped",
|
17
|
+
"make_fam",
|
18
|
+
"make_lgen"
|
19
|
+
]
|
snplib/format/__settings.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding: utf-8
|
3
|
-
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
-
|
5
|
-
FIELDS_ILLUMIN = ['SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB']
|
6
|
-
RENAME_FIELDS = ['SNP_NAME', 'SAMPLE_ID', 'ALLELE1', 'ALLELE2']
|
7
|
-
MAP_FIELDS = dict(zip(FIELDS_ILLUMIN, RENAME_FIELDS))
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding: utf-8
|
3
|
+
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
|
+
|
5
|
+
FIELDS_ILLUMIN = ['SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB']
|
6
|
+
RENAME_FIELDS = ['SNP_NAME', 'SAMPLE_ID', 'ALLELE1', 'ALLELE2']
|
7
|
+
MAP_FIELDS = dict(zip(FIELDS_ILLUMIN, RENAME_FIELDS))
|