snplib 1.1.10__tar.gz → 1.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {snplib-1.1.10/src/snplib.egg-info → snplib-1.2.10}/PKG-INFO +1 -1
- {snplib-1.1.10 → snplib-1.2.10}/docs/examples.rst +25 -0
- {snplib-1.1.10 → snplib-1.2.10}/pyproject.toml +1 -1
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/finalreport/_finalreport.py +113 -62
- {snplib-1.1.10 → snplib-1.2.10/src/snplib.egg-info}/PKG-INFO +1 -1
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/test_finalreport.py +120 -43
- {snplib-1.1.10 → snplib-1.2.10}/.github/workflows/linux.yml +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/.github/workflows/macos.yml +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/.github/workflows/windows.yml +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/.gitignore +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/.readthedocs.yaml +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/LICENSE +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/README.md +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/Makefile +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/conf.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/index.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/install.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/intro.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/logo.png +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/make.bat +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/modules.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/requirements.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.finalreport.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.format.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.parentage.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.statistics.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/docs/usage.rst +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/iconlib.png +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/requirements.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/setup.cfg +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/finalreport/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/__settings.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/_plink.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/_snp.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/_discov.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/_isagmark.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/_verif.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/isag_disc.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/isag_verif.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/_callrate.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/_freq.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/_snphwe.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/SOURCES.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/dependency_links.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/requires.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/top_level.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file1.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file1.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file2.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file2.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file3.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file3.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file4.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file5.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file5.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file6.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file6.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file7.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file7.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file8.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file8.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file9.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file9.xlsx +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file2.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file3.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file4.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/lgen/file.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/map/file_bovinesnp50.csv +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file2.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file3.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file4.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fsnp/file1.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fsnp/file2.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_fam.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_lgen.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_map.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_ped.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_snp.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/data/parentage_test_disc.csv +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/data/parentage_test_verf.csv +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/test_discov.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/test_verif.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/__init__.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/cr/file_cra.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/cr/file_crm.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/freq/etalon.txt +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/freq/file.pl +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_callrate.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_freq_allele.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_freq_maf.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_hwe_t.py +0 -0
- {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_snphwe.py +0 -0
@@ -73,6 +73,31 @@ Output::
|
|
73
73
|
|
74
74
|
...
|
75
75
|
|
76
|
+
To handle large files, use `usecols` and `dtype`. This reduces memory
|
77
|
+
consumption and speeds up processing.
|
78
|
+
|
79
|
+
.. note::
|
80
|
+
`usecols` is used when `allele` is **None**.
|
81
|
+
|
82
|
+
.. code-block:: python
|
83
|
+
|
84
|
+
alleles_ab = FinalReport(
|
85
|
+
usecols=['SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB'],
|
86
|
+
dtype={'SNP Name': 'category'}
|
87
|
+
)
|
88
|
+
alleles_ab.handle("path/to/finalreport.txt")
|
89
|
+
data_ab = alleles_ab.snp_data
|
90
|
+
|
91
|
+
Output::
|
92
|
+
|
93
|
+
SNP Name Sample ID Allele1 - AB Allele2 - AB
|
94
|
+
ARS-BFGL-BAC-10172 HO840M003135245650 B B
|
95
|
+
ARS-BFGL-BAC-1020 HO840M003135245650 B B
|
96
|
+
ARS-BFGL-BAC-10245 HO840M003135245650 B B
|
97
|
+
ARS-BFGL-BAC-10345 HO840M003135245650 A B
|
98
|
+
ARS-BFGL-BAC-10375 HO840M003135245650 A B
|
99
|
+
...
|
100
|
+
|
76
101
|
Preparation SNP files
|
77
102
|
---------------------
|
78
103
|
|
@@ -17,7 +17,7 @@ snplib = ["*.pl"]
|
|
17
17
|
|
18
18
|
[project]
|
19
19
|
name = "snplib"
|
20
|
-
version = "1.
|
20
|
+
version = "1.2.10"
|
21
21
|
description = "Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing"
|
22
22
|
authors = [
|
23
23
|
{name = "Igor", email = "igor.loschinin@gmail.com"}
|
@@ -3,13 +3,12 @@
|
|
3
3
|
__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
|
4
4
|
__all__ = ("FinalReport",)
|
5
5
|
|
6
|
-
from pathlib import Path
|
7
|
-
from functools import reduce
|
8
|
-
|
9
6
|
import re
|
7
|
+
from functools import reduce
|
8
|
+
from pathlib import Path
|
10
9
|
|
11
|
-
from numpy import nan
|
12
10
|
import pandas as pd
|
11
|
+
from numpy import nan
|
13
12
|
|
14
13
|
|
15
14
|
class FinalReport(object):
|
@@ -17,10 +16,14 @@ class FinalReport(object):
|
|
17
16
|
handle method. If values in 'SID' or 'UNIQ_KEY' were missing in the xlsx
|
18
17
|
conversion file, the processed data will contain NAN values.
|
19
18
|
|
20
|
-
:
|
21
|
-
|
22
|
-
|
23
|
-
:
|
19
|
+
:param allele: A variant form of a single nucleotide polymorphism (SNP), a
|
20
|
+
specific polymorphic site or a whole gene detectable at a locus. Type:
|
21
|
+
'AB', 'Forward', 'Top', 'Plus', 'Design'.
|
22
|
+
:param sep: Delimiter to use. Default value: "\\t".
|
23
|
+
:param usecols: Selection of fields for reading. Accelerates processing
|
24
|
+
and reduces memory.
|
25
|
+
:param dtype: Data type(s) to apply to either the whole dataset or
|
26
|
+
individual columns. E.g., {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
|
24
27
|
|
25
28
|
Example:
|
26
29
|
[Header]
|
@@ -38,20 +41,34 @@ class FinalReport(object):
|
|
38
41
|
...
|
39
42
|
"""
|
40
43
|
|
41
|
-
__PATTERN_HEADER = re.compile(r'(^\[Header
|
42
|
-
__PATTERN_DATA = re.compile(r'(^\[Data
|
44
|
+
__PATTERN_HEADER = re.compile(r'(^\[Header])')
|
45
|
+
__PATTERN_DATA = re.compile(r'(^\[Data])')
|
46
|
+
|
47
|
+
__slots__ = (
|
48
|
+
"_delimiter",
|
49
|
+
"__allele",
|
50
|
+
"__usecols",
|
51
|
+
"__dtype",
|
52
|
+
"__snp_data",
|
53
|
+
"__header",
|
54
|
+
"_map_rn",
|
55
|
+
)
|
43
56
|
|
44
57
|
def __init__(
|
45
58
|
self,
|
46
59
|
allele: str | list | None = None,
|
60
|
+
usecols: list[str] | None = None,
|
61
|
+
dtype: dict | None = None,
|
47
62
|
sep: str = "\t"
|
48
63
|
) -> None:
|
49
64
|
self._delimiter = sep
|
50
|
-
self.
|
65
|
+
self.__allele = allele
|
66
|
+
self.__usecols = usecols
|
67
|
+
self.__dtype = dtype
|
51
68
|
|
69
|
+
# self._full_data = None
|
70
|
+
self.__snp_data: pd.DataFrame | None = None
|
52
71
|
self.__header = {}
|
53
|
-
self.__snp_data = None
|
54
|
-
self.__allele = allele
|
55
72
|
self._map_rn = None
|
56
73
|
|
57
74
|
@property
|
@@ -77,6 +94,9 @@ class FinalReport(object):
|
|
77
94
|
|
78
95
|
try:
|
79
96
|
|
97
|
+
if self.__allele is not None and self.__usecols is not None:
|
98
|
+
raise Exception("Error. Usecols is used for allele is none.")
|
99
|
+
|
80
100
|
if isinstance(file_rep, str):
|
81
101
|
file_rep = Path(file_rep)
|
82
102
|
|
@@ -93,17 +113,11 @@ class FinalReport(object):
|
|
93
113
|
|
94
114
|
self.__convert_s_id(conv_file)
|
95
115
|
|
96
|
-
# Processing report file
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
if self._full_data is None:
|
101
|
-
raise Exception("Not data in file FinalReport.txt")
|
102
|
-
|
103
|
-
self.__handler_header()
|
104
|
-
self.__handler_data()
|
116
|
+
# # Processing report file
|
117
|
+
self.__handler_header(file_rep)
|
118
|
+
self.__handler_data(file_rep)
|
105
119
|
|
106
|
-
if self._map_rn is not None:
|
120
|
+
if not self.__snp_data.empty and self._map_rn is not None:
|
107
121
|
self.__snp_data['Sample ID'] = \
|
108
122
|
self.__snp_data['Sample ID'].map(
|
109
123
|
dict(zip(self._map_rn.SID, self._map_rn.UNIQ_KEY))
|
@@ -114,62 +128,99 @@ class FinalReport(object):
|
|
114
128
|
|
115
129
|
return True
|
116
130
|
|
117
|
-
def
|
118
|
-
"""
|
131
|
+
def __handler_header(self, file_rep: Path) -> None:
|
132
|
+
""" Processes data from a file, selects meta-information.
|
119
133
|
|
120
134
|
:param file_rep: path, pointer to the file to be read.
|
121
|
-
:return: Returns true if the read was successful, false if it failed.
|
122
135
|
"""
|
123
|
-
try:
|
124
|
-
if len(data := file_rep.read_text()) != 0:
|
125
|
-
self._full_data = data.strip().split("\n")
|
126
|
-
return True
|
127
136
|
|
128
|
-
|
137
|
+
with open(file_rep, 'r') as file:
|
129
138
|
|
130
|
-
|
131
|
-
|
139
|
+
for line in file:
|
140
|
+
if self.__class__.__PATTERN_DATA.findall(line.strip()):
|
141
|
+
return
|
132
142
|
|
133
|
-
|
143
|
+
if self.__class__.__PATTERN_HEADER.findall(line.strip()) or\
|
144
|
+
len(line.strip()) == 0:
|
145
|
+
continue
|
146
|
+
|
147
|
+
key = line.strip().split("\t")[0]
|
148
|
+
value = line.strip().split("\t")[1]
|
149
|
+
|
150
|
+
self.__header[key] = value
|
151
|
+
|
152
|
+
def __handler_data(self, file_rep: Path) -> None:
|
153
|
+
""" Processes data and forms an array for further processing.
|
154
|
+
|
155
|
+
:param file_rep: path, pointer to the file to be read.
|
156
|
+
"""
|
134
157
|
|
135
|
-
|
136
|
-
|
158
|
+
with open(file_rep, 'r') as file:
|
159
|
+
|
160
|
+
# Search for the data start index and skip
|
161
|
+
for line in file:
|
162
|
+
if self.__class__.__PATTERN_DATA.findall(line.strip()):
|
163
|
+
break
|
164
|
+
|
165
|
+
# line column
|
166
|
+
orig_name_col = file.readline().strip().split(self._delimiter)
|
167
|
+
|
168
|
+
if self.__allele is None and self.__usecols is None:
|
169
|
+
self.__snp_data = pd.read_csv(
|
170
|
+
file,
|
171
|
+
sep=self._delimiter,
|
172
|
+
header=None,
|
173
|
+
names=orig_name_col,
|
174
|
+
dtype=self.__dtype,
|
175
|
+
low_memory=True,
|
176
|
+
na_filter=True
|
177
|
+
)
|
137
178
|
|
138
|
-
for line in self._full_data:
|
139
|
-
if self.__class__.__PATTERN_DATA.findall(line):
|
140
179
|
return
|
141
180
|
|
142
|
-
|
143
|
-
|
181
|
+
sub_n_col = self.__processing_columns(orig_name_col)
|
182
|
+
self.__snp_data = pd.read_csv(
|
183
|
+
file,
|
184
|
+
sep=self._delimiter,
|
185
|
+
header=None,
|
186
|
+
names=orig_name_col,
|
187
|
+
usecols=sub_n_col,
|
188
|
+
dtype=self.__dtype,
|
189
|
+
low_memory=True,
|
190
|
+
na_filter=True
|
191
|
+
)
|
144
192
|
|
145
|
-
|
146
|
-
value = line.strip().split("\t")[1]
|
193
|
+
return
|
147
194
|
|
148
|
-
|
195
|
+
def __processing_columns(self, lst_col: list[str]) -> list[str] | None:
|
196
|
+
""" Processing the line with all the names of the fields and the
|
197
|
+
sample of them.
|
149
198
|
|
150
|
-
|
151
|
-
|
199
|
+
:param lst_col: List of all fields.
|
200
|
+
:return: Returns a tuple with a list of names of selected fields.
|
201
|
+
"""
|
152
202
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
temp += 1
|
203
|
+
if self.__usecols is not None:
|
204
|
+
check_n_col = [
|
205
|
+
item for item in self.__usecols if item in lst_col
|
206
|
+
]
|
158
207
|
|
159
|
-
|
160
|
-
|
161
|
-
|
208
|
+
# Check on empty list
|
209
|
+
if check_n_col:
|
210
|
+
return self.__usecols
|
211
|
+
|
212
|
+
raise Exception(
|
213
|
+
f"Error. The USECOLS list contains not true fields."
|
214
|
+
)
|
162
215
|
|
163
|
-
|
164
|
-
|
216
|
+
# processing alleles
|
217
|
+
sample_n_col = self.__sample_by_allele(lst_col)
|
218
|
+
if sample_n_col is None:
|
219
|
+
raise Exception(
|
220
|
+
f"Error. Allele {self.__allele} not in data."
|
221
|
+
)
|
165
222
|
|
166
|
-
|
167
|
-
[
|
168
|
-
item_data.split(f"{self._delimiter}")
|
169
|
-
for item_data in self._full_data[temp + 1:]
|
170
|
-
],
|
171
|
-
columns=self._full_data[temp].split(f"{self._delimiter}")
|
172
|
-
)[names_col]
|
223
|
+
return sample_n_col
|
173
224
|
|
174
225
|
def __sample_by_allele(self, names: list[str]) -> list[str] | None:
|
175
226
|
""" Method that generates a list of field names choosing which alleles
|
@@ -11,12 +11,12 @@ import pytest
|
|
11
11
|
|
12
12
|
@pytest.fixture
|
13
13
|
def report(request) -> FinalReport:
|
14
|
-
return FinalReport(
|
14
|
+
return FinalReport(**request.param)
|
15
15
|
|
16
16
|
|
17
17
|
class TestFinalReport(object):
|
18
18
|
|
19
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
19
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
20
20
|
def test_handle_1(self, report: FinalReport) -> None:
|
21
21
|
""" If both files do not exist """
|
22
22
|
|
@@ -24,7 +24,17 @@ class TestFinalReport(object):
|
|
24
24
|
DIR_FILES / "fr/f.txt", DIR_FILES / "fr/f.xlsx",
|
25
25
|
)
|
26
26
|
|
27
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
27
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
28
|
+
def test_handle_8(self, report: FinalReport) -> None:
|
29
|
+
""" If files exist """
|
30
|
+
|
31
|
+
assert report.handle(
|
32
|
+
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx",
|
33
|
+
)
|
34
|
+
|
35
|
+
assert len(report.header) != 0 and not report.snp_data.empty
|
36
|
+
|
37
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
28
38
|
def test_handle_2(self, report: FinalReport) -> None:
|
29
39
|
""" If the file to convert does not exist """
|
30
40
|
|
@@ -32,7 +42,7 @@ class TestFinalReport(object):
|
|
32
42
|
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/f.xlsx",
|
33
43
|
)
|
34
44
|
|
35
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
45
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
36
46
|
def test_handle_3(self, report: FinalReport) -> None:
|
37
47
|
""" If the data does not contain header data """
|
38
48
|
|
@@ -42,7 +52,7 @@ class TestFinalReport(object):
|
|
42
52
|
|
43
53
|
assert len(report.header) == 0 and not report.snp_data.empty
|
44
54
|
|
45
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
55
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
46
56
|
def test_handle_4(self, report: FinalReport) -> None:
|
47
57
|
""" If the file contains only header and field names """
|
48
58
|
|
@@ -52,20 +62,18 @@ class TestFinalReport(object):
|
|
52
62
|
|
53
63
|
assert report.snp_data is not None and report.snp_data.empty
|
54
64
|
|
55
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
65
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
56
66
|
def test_handle_5(self, report: FinalReport) -> None:
|
57
67
|
""" If the data file is empty """
|
58
68
|
|
59
|
-
|
60
|
-
|
61
|
-
)
|
62
|
-
report.handle(
|
63
|
-
DIR_FILES / "fr/file5.txt", DIR_FILES / "fr/file5.xlsx",
|
64
|
-
)
|
69
|
+
report.handle(
|
70
|
+
DIR_FILES / "fr/file5.txt", DIR_FILES / "fr/file5.xlsx",
|
71
|
+
)
|
65
72
|
|
66
|
-
assert report.
|
73
|
+
assert len(report.header) == 0
|
74
|
+
assert report.snp_data is not None and report.snp_data.empty
|
67
75
|
|
68
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
76
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
69
77
|
def test_handle_6(self, report: FinalReport) -> None:
|
70
78
|
""" If the conversion file is empty """
|
71
79
|
|
@@ -76,7 +84,7 @@ class TestFinalReport(object):
|
|
76
84
|
assert not report.snp_data.empty
|
77
85
|
assert len(report.header) != 0
|
78
86
|
|
79
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
87
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
80
88
|
def test_handle_7(self, report: FinalReport) -> None:
|
81
89
|
""" If the data file is not needed to convert ID name """
|
82
90
|
|
@@ -85,15 +93,7 @@ class TestFinalReport(object):
|
|
85
93
|
assert not report.snp_data.empty
|
86
94
|
assert len(report.header) != 0
|
87
95
|
|
88
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
89
|
-
def test_handle_8(self, report: FinalReport) -> None:
|
90
|
-
""" If files exist """
|
91
|
-
|
92
|
-
assert report.handle(
|
93
|
-
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx",
|
94
|
-
)
|
95
|
-
|
96
|
-
@pytest.mark.parametrize("report", [None], indirect=True)
|
96
|
+
@pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
|
97
97
|
def test_allele_none(self, report: FinalReport) -> None:
|
98
98
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
99
99
|
|
@@ -105,7 +105,7 @@ class TestFinalReport(object):
|
|
105
105
|
|
106
106
|
assert report.snp_data.columns.difference(_fields).empty
|
107
107
|
|
108
|
-
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
108
|
+
@pytest.mark.parametrize("report", [{'allele': "AB"}], indirect=True)
|
109
109
|
def test_sample_allele_ab(self, report: FinalReport) -> None:
|
110
110
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
111
111
|
|
@@ -116,7 +116,7 @@ class TestFinalReport(object):
|
|
116
116
|
|
117
117
|
assert report.snp_data.columns.difference(_fields).empty
|
118
118
|
|
119
|
-
@pytest.mark.parametrize("report", ["Forward"], indirect=True)
|
119
|
+
@pytest.mark.parametrize("report", [{'allele': "Forward"}], indirect=True)
|
120
120
|
def test_sample_allele_forward(self, report: FinalReport) -> None:
|
121
121
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
122
122
|
|
@@ -127,7 +127,7 @@ class TestFinalReport(object):
|
|
127
127
|
|
128
128
|
assert report.snp_data.columns.difference(_fields).empty
|
129
129
|
|
130
|
-
@pytest.mark.parametrize("report", ["Top"], indirect=True)
|
130
|
+
@pytest.mark.parametrize("report", [{'allele': "Top"}], indirect=True)
|
131
131
|
def test_sample_allele_top(self, report: FinalReport) -> None:
|
132
132
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
133
133
|
|
@@ -138,7 +138,9 @@ class TestFinalReport(object):
|
|
138
138
|
|
139
139
|
assert report.snp_data.columns.difference(_fields).empty
|
140
140
|
|
141
|
-
@pytest.mark.parametrize(
|
141
|
+
@pytest.mark.parametrize(
|
142
|
+
"report", [{'allele': ["AB", "Top"]}], indirect=True
|
143
|
+
)
|
142
144
|
def test_sample_allele_list1(self, report: FinalReport) -> None:
|
143
145
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
144
146
|
|
@@ -149,7 +151,7 @@ class TestFinalReport(object):
|
|
149
151
|
|
150
152
|
assert report.snp_data.columns.difference(_fields).empty
|
151
153
|
|
152
|
-
@pytest.mark.parametrize("report", [["AB"]], indirect=True)
|
154
|
+
@pytest.mark.parametrize("report", [{'allele': ["AB"]}], indirect=True)
|
153
155
|
def test_sample_allele_list2(self, report: FinalReport) -> None:
|
154
156
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
155
157
|
|
@@ -160,7 +162,9 @@ class TestFinalReport(object):
|
|
160
162
|
|
161
163
|
assert report.snp_data.columns.difference(_fields).empty
|
162
164
|
|
163
|
-
@pytest.mark.parametrize(
|
165
|
+
@pytest.mark.parametrize(
|
166
|
+
"report", [{'allele': ("AB", "Top")}], indirect=True
|
167
|
+
)
|
164
168
|
def test_sample_allele_tuple(self, report: FinalReport) -> None:
|
165
169
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
166
170
|
|
@@ -171,7 +175,9 @@ class TestFinalReport(object):
|
|
171
175
|
|
172
176
|
assert report.snp_data.columns.difference(_fields).empty
|
173
177
|
|
174
|
-
@pytest.mark.parametrize(
|
178
|
+
@pytest.mark.parametrize(
|
179
|
+
"report", [{'allele': {"AB", "Top"}}], indirect=True
|
180
|
+
)
|
175
181
|
def test_sample_allele_set(self, report: FinalReport) -> None:
|
176
182
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
177
183
|
|
@@ -182,7 +188,7 @@ class TestFinalReport(object):
|
|
182
188
|
|
183
189
|
assert report.snp_data.columns.difference(_fields).empty
|
184
190
|
|
185
|
-
@pytest.mark.parametrize("report", ["GG"], indirect=True)
|
191
|
+
@pytest.mark.parametrize("report", [{'allele': "GG"}], indirect=True)
|
186
192
|
def test_sample_allele_not_exist(self, report: FinalReport) -> None:
|
187
193
|
|
188
194
|
with pytest.raises(
|
@@ -190,8 +196,8 @@ class TestFinalReport(object):
|
|
190
196
|
):
|
191
197
|
report.handle(DIR_FILES / "fr/file4.txt", None)
|
192
198
|
|
193
|
-
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
194
|
-
def
|
199
|
+
@pytest.mark.parametrize("report", [{'allele': ["AB"]}], indirect=True)
|
200
|
+
def test_on_ru_symbol(self, report: FinalReport) -> None:
|
195
201
|
""" An error is checked if the name of the number is Kirilitsa """
|
196
202
|
|
197
203
|
with pytest.raises(
|
@@ -201,10 +207,10 @@ class TestFinalReport(object):
|
|
201
207
|
DIR_FILES / "fr/file7.txt", DIR_FILES / "fr/file7.xlsx"
|
202
208
|
)
|
203
209
|
|
204
|
-
assert report.snp_data is None
|
210
|
+
assert report.snp_data is None and len(report.header) == 0
|
205
211
|
|
206
|
-
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
207
|
-
def
|
212
|
+
@pytest.mark.parametrize("report", [{'allele': ["AB"]}], indirect=True)
|
213
|
+
def test_on_nan_in_sid_file_xlsx(self, report: FinalReport) -> None:
|
208
214
|
""" Checking for processing empty values in SID """
|
209
215
|
|
210
216
|
report.handle(
|
@@ -212,18 +218,89 @@ class TestFinalReport(object):
|
|
212
218
|
DIR_FILES / "fr/file8.xlsx"
|
213
219
|
)
|
214
220
|
|
215
|
-
assert report.snp_data is not None
|
216
|
-
assert not report.snp_data.empty
|
221
|
+
assert report.snp_data is not None and not report.snp_data.empty
|
217
222
|
assert report.snp_data['Sample ID'].isna().any()
|
218
223
|
|
219
|
-
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
220
|
-
def
|
224
|
+
@pytest.mark.parametrize("report", [{'allele': "AB"}], indirect=True)
|
225
|
+
def test_on_nan_in_fr(self, report: FinalReport) -> None:
|
221
226
|
""" Checking for missing values in SID """
|
222
227
|
report.handle(
|
223
228
|
DIR_FILES / "fr/file9.txt",
|
224
229
|
DIR_FILES / "fr/file9.xlsx"
|
225
230
|
)
|
226
231
|
|
227
|
-
assert report.snp_data is not None
|
228
|
-
assert not report.snp_data.empty
|
232
|
+
assert report.snp_data is not None and not report.snp_data.empty
|
229
233
|
assert report.snp_data['Sample ID'].isna().any()
|
234
|
+
|
235
|
+
@pytest.mark.parametrize(
|
236
|
+
"report",
|
237
|
+
[{'allele': "AB", 'usecols': ['SNP Name', 'Sample ID']}],
|
238
|
+
indirect=True
|
239
|
+
)
|
240
|
+
def test_raise_use_allele_and_usecols(self, report: FinalReport) -> None:
|
241
|
+
""" Checking for allele and usecols sharing error """
|
242
|
+
|
243
|
+
with pytest.raises(
|
244
|
+
Exception, match="Error. Usecols is used for allele is none."
|
245
|
+
):
|
246
|
+
report.handle(
|
247
|
+
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
|
248
|
+
)
|
249
|
+
|
250
|
+
assert report.snp_data is None and len(report.header) == 0
|
251
|
+
|
252
|
+
@pytest.mark.parametrize(
|
253
|
+
"report",
|
254
|
+
[{'usecols': ['SNP Name', 'Sample ID']}],
|
255
|
+
indirect=True
|
256
|
+
)
|
257
|
+
def test_using_usecols(self, report: FinalReport) -> None:
|
258
|
+
""" Checking for use setting usecols """
|
259
|
+
|
260
|
+
_fields = ['SNP Name', 'Sample ID']
|
261
|
+
|
262
|
+
report.handle(
|
263
|
+
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
|
264
|
+
)
|
265
|
+
|
266
|
+
assert not report.snp_data.empty and len(report.header) != 0
|
267
|
+
assert report.snp_data.columns.difference(_fields).empty
|
268
|
+
|
269
|
+
@pytest.mark.parametrize(
|
270
|
+
"report",
|
271
|
+
[{
|
272
|
+
'usecols': ['SNP Name', 'Sample ID'],
|
273
|
+
'dtype': {'SNP Name': 'category'}
|
274
|
+
}],
|
275
|
+
indirect=True
|
276
|
+
)
|
277
|
+
def test_using_dtype_and_combo_usecols(self, report: FinalReport) -> None:
|
278
|
+
""" Checking the use of dtype """
|
279
|
+
|
280
|
+
_fields = ['SNP Name', 'Sample ID']
|
281
|
+
|
282
|
+
report.handle(
|
283
|
+
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
|
284
|
+
)
|
285
|
+
|
286
|
+
assert not report.snp_data.empty and len(report.header) != 0
|
287
|
+
assert report.snp_data.columns.difference(_fields).empty
|
288
|
+
assert report.snp_data['SNP Name'].dtypes == 'category'
|
289
|
+
|
290
|
+
@pytest.mark.parametrize(
|
291
|
+
"report",
|
292
|
+
[{
|
293
|
+
'allele': "AB",
|
294
|
+
'dtype': {'SNP Name': 'category'}
|
295
|
+
}],
|
296
|
+
indirect=True
|
297
|
+
)
|
298
|
+
def test_using_dtype_and_combo_usecols(self, report: FinalReport) -> None:
|
299
|
+
""" Checking the use combo dtype and allele """
|
300
|
+
|
301
|
+
report.handle(
|
302
|
+
DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
|
303
|
+
)
|
304
|
+
|
305
|
+
assert not report.snp_data.empty and len(report.header) != 0
|
306
|
+
assert report.snp_data['SNP Name'].dtypes == 'category'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|