snplib 1.1.10__tar.gz → 1.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {snplib-1.1.10/src/snplib.egg-info → snplib-1.2.10}/PKG-INFO +1 -1
  2. {snplib-1.1.10 → snplib-1.2.10}/docs/examples.rst +25 -0
  3. {snplib-1.1.10 → snplib-1.2.10}/pyproject.toml +1 -1
  4. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/finalreport/_finalreport.py +113 -62
  5. {snplib-1.1.10 → snplib-1.2.10/src/snplib.egg-info}/PKG-INFO +1 -1
  6. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/test_finalreport.py +120 -43
  7. {snplib-1.1.10 → snplib-1.2.10}/.github/workflows/linux.yml +0 -0
  8. {snplib-1.1.10 → snplib-1.2.10}/.github/workflows/macos.yml +0 -0
  9. {snplib-1.1.10 → snplib-1.2.10}/.github/workflows/windows.yml +0 -0
  10. {snplib-1.1.10 → snplib-1.2.10}/.gitignore +0 -0
  11. {snplib-1.1.10 → snplib-1.2.10}/.readthedocs.yaml +0 -0
  12. {snplib-1.1.10 → snplib-1.2.10}/LICENSE +0 -0
  13. {snplib-1.1.10 → snplib-1.2.10}/README.md +0 -0
  14. {snplib-1.1.10 → snplib-1.2.10}/__init__.py +0 -0
  15. {snplib-1.1.10 → snplib-1.2.10}/docs/Makefile +0 -0
  16. {snplib-1.1.10 → snplib-1.2.10}/docs/conf.py +0 -0
  17. {snplib-1.1.10 → snplib-1.2.10}/docs/index.rst +0 -0
  18. {snplib-1.1.10 → snplib-1.2.10}/docs/install.rst +0 -0
  19. {snplib-1.1.10 → snplib-1.2.10}/docs/intro.rst +0 -0
  20. {snplib-1.1.10 → snplib-1.2.10}/docs/logo.png +0 -0
  21. {snplib-1.1.10 → snplib-1.2.10}/docs/make.bat +0 -0
  22. {snplib-1.1.10 → snplib-1.2.10}/docs/modules.rst +0 -0
  23. {snplib-1.1.10 → snplib-1.2.10}/docs/requirements.txt +0 -0
  24. {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.finalreport.rst +0 -0
  25. {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.format.rst +0 -0
  26. {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.parentage.rst +0 -0
  27. {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.rst +0 -0
  28. {snplib-1.1.10 → snplib-1.2.10}/docs/snplib.statistics.rst +0 -0
  29. {snplib-1.1.10 → snplib-1.2.10}/docs/usage.rst +0 -0
  30. {snplib-1.1.10 → snplib-1.2.10}/iconlib.png +0 -0
  31. {snplib-1.1.10 → snplib-1.2.10}/requirements.txt +0 -0
  32. {snplib-1.1.10 → snplib-1.2.10}/setup.cfg +0 -0
  33. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/__init__.py +0 -0
  34. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/finalreport/__init__.py +0 -0
  35. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/__init__.py +0 -0
  36. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/__settings.py +0 -0
  37. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/_plink.py +0 -0
  38. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/format/_snp.py +0 -0
  39. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/__init__.py +0 -0
  40. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/_discov.py +0 -0
  41. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/_isagmark.py +0 -0
  42. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/_verif.py +0 -0
  43. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/isag_disc.pl +0 -0
  44. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/parentage/isag_verif.pl +0 -0
  45. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/__init__.py +0 -0
  46. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/_callrate.py +0 -0
  47. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/_freq.py +0 -0
  48. {snplib-1.1.10 → snplib-1.2.10}/src/snplib/statistics/_snphwe.py +0 -0
  49. {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/SOURCES.txt +0 -0
  50. {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/dependency_links.txt +0 -0
  51. {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/requires.txt +0 -0
  52. {snplib-1.1.10 → snplib-1.2.10}/src/snplib.egg-info/top_level.txt +0 -0
  53. {snplib-1.1.10 → snplib-1.2.10}/tests/__init__.py +0 -0
  54. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/__init__.py +0 -0
  55. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file1.txt +0 -0
  56. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file1.xlsx +0 -0
  57. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file2.txt +0 -0
  58. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file2.xlsx +0 -0
  59. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file3.txt +0 -0
  60. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file3.xlsx +0 -0
  61. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file4.txt +0 -0
  62. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file5.txt +0 -0
  63. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file5.xlsx +0 -0
  64. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file6.txt +0 -0
  65. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file6.xlsx +0 -0
  66. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file7.txt +0 -0
  67. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file7.xlsx +0 -0
  68. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file8.txt +0 -0
  69. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file8.xlsx +0 -0
  70. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file9.txt +0 -0
  71. {snplib-1.1.10 → snplib-1.2.10}/tests/finalreport/files/fr/file9.xlsx +0 -0
  72. {snplib-1.1.10 → snplib-1.2.10}/tests/format/__init__.py +0 -0
  73. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file.pl +0 -0
  74. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file2.pl +0 -0
  75. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file3.pl +0 -0
  76. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file4.pl +0 -0
  77. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/lgen/file.pl +0 -0
  78. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/map/file_bovinesnp50.csv +0 -0
  79. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file.pl +0 -0
  80. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file2.pl +0 -0
  81. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file3.pl +0 -0
  82. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file4.pl +0 -0
  83. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fsnp/file1.txt +0 -0
  84. {snplib-1.1.10 → snplib-1.2.10}/tests/format/files/fsnp/file2.txt +0 -0
  85. {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_fam.py +0 -0
  86. {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_lgen.py +0 -0
  87. {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_map.py +0 -0
  88. {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_plink_ped.py +0 -0
  89. {snplib-1.1.10 → snplib-1.2.10}/tests/format/test_snp.py +0 -0
  90. {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/__init__.py +0 -0
  91. {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/data/parentage_test_disc.csv +0 -0
  92. {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/data/parentage_test_verf.csv +0 -0
  93. {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/test_discov.py +0 -0
  94. {snplib-1.1.10 → snplib-1.2.10}/tests/parentage/test_verif.py +0 -0
  95. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/__init__.py +0 -0
  96. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/cr/file_cra.pl +0 -0
  97. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/cr/file_crm.pl +0 -0
  98. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/freq/etalon.txt +0 -0
  99. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/data/freq/file.pl +0 -0
  100. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_callrate.py +0 -0
  101. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_freq_allele.py +0 -0
  102. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_freq_maf.py +0 -0
  103. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_hwe_t.py +0 -0
  104. {snplib-1.1.10 → snplib-1.2.10}/tests/statistics/test_snphwe.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.1.10
3
+ Version: 1.2.10
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -73,6 +73,31 @@ Output::
73
73
 
74
74
  ...
75
75
 
76
+ To handle large files, use `usecols` and `dtype`. This reduces memory
77
+ consumption and speeds up processing.
78
+
79
+ .. note::
80
+ `usecols` is used when `allele` is **None**.
81
+
82
+ .. code-block:: python
83
+
84
+ alleles_ab = FinalReport(
85
+ usecols=['SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB'],
86
+ dtype={'SNP Name': 'category'}
87
+ )
88
+ alleles_ab.handle("path/to/finalreport.txt")
89
+ data_ab = alleles_ab.snp_data
90
+
91
+ Output::
92
+
93
+ SNP Name Sample ID Allele1 - AB Allele2 - AB
94
+ ARS-BFGL-BAC-10172 HO840M003135245650 B B
95
+ ARS-BFGL-BAC-1020 HO840M003135245650 B B
96
+ ARS-BFGL-BAC-10245 HO840M003135245650 B B
97
+ ARS-BFGL-BAC-10345 HO840M003135245650 A B
98
+ ARS-BFGL-BAC-10375 HO840M003135245650 A B
99
+ ...
100
+
76
101
  Preparation SNP files
77
102
  ---------------------
78
103
 
@@ -17,7 +17,7 @@ snplib = ["*.pl"]
17
17
 
18
18
  [project]
19
19
  name = "snplib"
20
- version = "1.1.10"
20
+ version = "1.2.10"
21
21
  description = "Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing"
22
22
  authors = [
23
23
  {name = "Igor", email = "igor.loschinin@gmail.com"}
@@ -3,13 +3,12 @@
3
3
  __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
4
  __all__ = ("FinalReport",)
5
5
 
6
- from pathlib import Path
7
- from functools import reduce
8
-
9
6
  import re
7
+ from functools import reduce
8
+ from pathlib import Path
10
9
 
11
- from numpy import nan
12
10
  import pandas as pd
11
+ from numpy import nan
13
12
 
14
13
 
15
14
  class FinalReport(object):
@@ -17,10 +16,14 @@ class FinalReport(object):
17
16
  handle method. If values in 'SID' or 'UNIQ_KEY' were missing in the xlsx
18
17
  conversion file, the processed data will contain NAN values.
19
18
 
20
- :argument allele: A variant form of a single nucleotide polymorphism
21
- (SNP), a specific polymorphic site or a whole gene detectable at
22
- a locus. Type: 'AB', 'Forward', 'Top', 'Plus', 'Design'
23
- :argument sep: Delimiter to use. Default value: "\\t"
19
+ :param allele: A variant form of a single nucleotide polymorphism (SNP), a
20
+ specific polymorphic site or a whole gene detectable at a locus. Type:
21
+ 'AB', 'Forward', 'Top', 'Plus', 'Design'.
22
+ :param sep: Delimiter to use. Default value: "\\t".
23
+ :param usecols: Selection of fields for reading. Accelerates processing
24
+ and reduces memory.
25
+ :param dtype: Data type(s) to apply to either the whole dataset or
26
+ individual columns. E.g., {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
24
27
 
25
28
  Example:
26
29
  [Header]
@@ -38,20 +41,34 @@ class FinalReport(object):
38
41
  ...
39
42
  """
40
43
 
41
- __PATTERN_HEADER = re.compile(r'(^\[Header\])')
42
- __PATTERN_DATA = re.compile(r'(^\[Data\])')
44
+ __PATTERN_HEADER = re.compile(r'(^\[Header])')
45
+ __PATTERN_DATA = re.compile(r'(^\[Data])')
46
+
47
+ __slots__ = (
48
+ "_delimiter",
49
+ "__allele",
50
+ "__usecols",
51
+ "__dtype",
52
+ "__snp_data",
53
+ "__header",
54
+ "_map_rn",
55
+ )
43
56
 
44
57
  def __init__(
45
58
  self,
46
59
  allele: str | list | None = None,
60
+ usecols: list[str] | None = None,
61
+ dtype: dict | None = None,
47
62
  sep: str = "\t"
48
63
  ) -> None:
49
64
  self._delimiter = sep
50
- self._full_data = None
65
+ self.__allele = allele
66
+ self.__usecols = usecols
67
+ self.__dtype = dtype
51
68
 
69
+ # self._full_data = None
70
+ self.__snp_data: pd.DataFrame | None = None
52
71
  self.__header = {}
53
- self.__snp_data = None
54
- self.__allele = allele
55
72
  self._map_rn = None
56
73
 
57
74
  @property
@@ -77,6 +94,9 @@ class FinalReport(object):
77
94
 
78
95
  try:
79
96
 
97
+ if self.__allele is not None and self.__usecols is not None:
98
+ raise Exception("Error. Usecols is used for allele is none.")
99
+
80
100
  if isinstance(file_rep, str):
81
101
  file_rep = Path(file_rep)
82
102
 
@@ -93,17 +113,11 @@ class FinalReport(object):
93
113
 
94
114
  self.__convert_s_id(conv_file)
95
115
 
96
- # Processing report file
97
- if not self.read(file_rep):
98
- return False
99
-
100
- if self._full_data is None:
101
- raise Exception("Not data in file FinalReport.txt")
102
-
103
- self.__handler_header()
104
- self.__handler_data()
116
+ # # Processing report file
117
+ self.__handler_header(file_rep)
118
+ self.__handler_data(file_rep)
105
119
 
106
- if self._map_rn is not None:
120
+ if not self.__snp_data.empty and self._map_rn is not None:
107
121
  self.__snp_data['Sample ID'] = \
108
122
  self.__snp_data['Sample ID'].map(
109
123
  dict(zip(self._map_rn.SID, self._map_rn.UNIQ_KEY))
@@ -114,62 +128,99 @@ class FinalReport(object):
114
128
 
115
129
  return True
116
130
 
117
- def read(self, file_rep: Path) -> bool:
118
- """ Reading data from the final_report file
131
+ def __handler_header(self, file_rep: Path) -> None:
132
+ """ Processes data from a file, selects meta-information.
119
133
 
120
134
  :param file_rep: path, pointer to the file to be read.
121
- :return: Returns true if the read was successful, false if it failed.
122
135
  """
123
- try:
124
- if len(data := file_rep.read_text()) != 0:
125
- self._full_data = data.strip().split("\n")
126
- return True
127
136
 
128
- self._full_data = None
137
+ with open(file_rep, 'r') as file:
129
138
 
130
- except Exception as e:
131
- return False
139
+ for line in file:
140
+ if self.__class__.__PATTERN_DATA.findall(line.strip()):
141
+ return
132
142
 
133
- return True
143
+ if self.__class__.__PATTERN_HEADER.findall(line.strip()) or\
144
+ len(line.strip()) == 0:
145
+ continue
146
+
147
+ key = line.strip().split("\t")[0]
148
+ value = line.strip().split("\t")[1]
149
+
150
+ self.__header[key] = value
151
+
152
+ def __handler_data(self, file_rep: Path) -> None:
153
+ """ Processes data and forms an array for further processing.
154
+
155
+ :param file_rep: path, pointer to the file to be read.
156
+ """
134
157
 
135
- def __handler_header(self) -> None:
136
- """ Processes data from a file, selects meta-information. """
158
+ with open(file_rep, 'r') as file:
159
+
160
+ # Search for the data start index and skip
161
+ for line in file:
162
+ if self.__class__.__PATTERN_DATA.findall(line.strip()):
163
+ break
164
+
165
+ # line column
166
+ orig_name_col = file.readline().strip().split(self._delimiter)
167
+
168
+ if self.__allele is None and self.__usecols is None:
169
+ self.__snp_data = pd.read_csv(
170
+ file,
171
+ sep=self._delimiter,
172
+ header=None,
173
+ names=orig_name_col,
174
+ dtype=self.__dtype,
175
+ low_memory=True,
176
+ na_filter=True
177
+ )
137
178
 
138
- for line in self._full_data:
139
- if self.__class__.__PATTERN_DATA.findall(line):
140
179
  return
141
180
 
142
- if self.__class__.__PATTERN_HEADER.findall(line):
143
- continue
181
+ sub_n_col = self.__processing_columns(orig_name_col)
182
+ self.__snp_data = pd.read_csv(
183
+ file,
184
+ sep=self._delimiter,
185
+ header=None,
186
+ names=orig_name_col,
187
+ usecols=sub_n_col,
188
+ dtype=self.__dtype,
189
+ low_memory=True,
190
+ na_filter=True
191
+ )
144
192
 
145
- key = line.strip().split("\t")[0]
146
- value = line.strip().split("\t")[1]
193
+ return
147
194
 
148
- self.__header[key] = value
195
+ def __processing_columns(self, lst_col: list[str]) -> list[str] | None:
196
+ """ Processing the line with all the names of the fields and the
197
+ sample of them.
149
198
 
150
- def __handler_data(self) -> None:
151
- """ Processes data and forms an array for further processing. """
199
+ :param lst_col: List of all fields.
200
+ :return: Returns a tuple with a list of names of selected fields.
201
+ """
152
202
 
153
- temp = 1
154
- for line in self._full_data:
155
- if self.__class__.__PATTERN_DATA.findall(line):
156
- break
157
- temp += 1
203
+ if self.__usecols is not None:
204
+ check_n_col = [
205
+ item for item in self.__usecols if item in lst_col
206
+ ]
158
207
 
159
- names_col = self.__sample_by_allele(
160
- self._full_data[temp].split(f"{self._delimiter}")
161
- )
208
+ # Check on empty list
209
+ if check_n_col:
210
+ return self.__usecols
211
+
212
+ raise Exception(
213
+ f"Error. The USECOLS list contains not true fields."
214
+ )
162
215
 
163
- if names_col is None:
164
- raise Exception(f"Error. Allele {self.__allele} not in data.")
216
+ # processing alleles
217
+ sample_n_col = self.__sample_by_allele(lst_col)
218
+ if sample_n_col is None:
219
+ raise Exception(
220
+ f"Error. Allele {self.__allele} not in data."
221
+ )
165
222
 
166
- self.__snp_data = pd.DataFrame(
167
- [
168
- item_data.split(f"{self._delimiter}")
169
- for item_data in self._full_data[temp + 1:]
170
- ],
171
- columns=self._full_data[temp].split(f"{self._delimiter}")
172
- )[names_col]
223
+ return sample_n_col
173
224
 
174
225
  def __sample_by_allele(self, names: list[str]) -> list[str] | None:
175
226
  """ Method that generates a list of field names choosing which alleles
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.1.10
3
+ Version: 1.2.10
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -11,12 +11,12 @@ import pytest
11
11
 
12
12
  @pytest.fixture
13
13
  def report(request) -> FinalReport:
14
- return FinalReport(allele=request.param)
14
+ return FinalReport(**request.param)
15
15
 
16
16
 
17
17
  class TestFinalReport(object):
18
18
 
19
- @pytest.mark.parametrize("report", [None], indirect=True)
19
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
20
20
  def test_handle_1(self, report: FinalReport) -> None:
21
21
  """ If both files do not exist """
22
22
 
@@ -24,7 +24,17 @@ class TestFinalReport(object):
24
24
  DIR_FILES / "fr/f.txt", DIR_FILES / "fr/f.xlsx",
25
25
  )
26
26
 
27
- @pytest.mark.parametrize("report", [None], indirect=True)
27
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
28
+ def test_handle_8(self, report: FinalReport) -> None:
29
+ """ If files exist """
30
+
31
+ assert report.handle(
32
+ DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx",
33
+ )
34
+
35
+ assert len(report.header) != 0 and not report.snp_data.empty
36
+
37
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
28
38
  def test_handle_2(self, report: FinalReport) -> None:
29
39
  """ If the file to convert does not exist """
30
40
 
@@ -32,7 +42,7 @@ class TestFinalReport(object):
32
42
  DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/f.xlsx",
33
43
  )
34
44
 
35
- @pytest.mark.parametrize("report", [None], indirect=True)
45
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
36
46
  def test_handle_3(self, report: FinalReport) -> None:
37
47
  """ If the data does not contain header data """
38
48
 
@@ -42,7 +52,7 @@ class TestFinalReport(object):
42
52
 
43
53
  assert len(report.header) == 0 and not report.snp_data.empty
44
54
 
45
- @pytest.mark.parametrize("report", [None], indirect=True)
55
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
46
56
  def test_handle_4(self, report: FinalReport) -> None:
47
57
  """ If the file contains only header and field names """
48
58
 
@@ -52,20 +62,18 @@ class TestFinalReport(object):
52
62
 
53
63
  assert report.snp_data is not None and report.snp_data.empty
54
64
 
55
- @pytest.mark.parametrize("report", [None], indirect=True)
65
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
56
66
  def test_handle_5(self, report: FinalReport) -> None:
57
67
  """ If the data file is empty """
58
68
 
59
- with pytest.raises(
60
- Exception, match="Not data in file FinalReport.txt"
61
- ):
62
- report.handle(
63
- DIR_FILES / "fr/file5.txt", DIR_FILES / "fr/file5.xlsx",
64
- )
69
+ report.handle(
70
+ DIR_FILES / "fr/file5.txt", DIR_FILES / "fr/file5.xlsx",
71
+ )
65
72
 
66
- assert report.snp_data is None
73
+ assert len(report.header) == 0
74
+ assert report.snp_data is not None and report.snp_data.empty
67
75
 
68
- @pytest.mark.parametrize("report", [None], indirect=True)
76
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
69
77
  def test_handle_6(self, report: FinalReport) -> None:
70
78
  """ If the conversion file is empty """
71
79
 
@@ -76,7 +84,7 @@ class TestFinalReport(object):
76
84
  assert not report.snp_data.empty
77
85
  assert len(report.header) != 0
78
86
 
79
- @pytest.mark.parametrize("report", [None], indirect=True)
87
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
80
88
  def test_handle_7(self, report: FinalReport) -> None:
81
89
  """ If the data file is not needed to convert ID name """
82
90
 
@@ -85,15 +93,7 @@ class TestFinalReport(object):
85
93
  assert not report.snp_data.empty
86
94
  assert len(report.header) != 0
87
95
 
88
- @pytest.mark.parametrize("report", [None], indirect=True)
89
- def test_handle_8(self, report: FinalReport) -> None:
90
- """ If files exist """
91
-
92
- assert report.handle(
93
- DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx",
94
- )
95
-
96
- @pytest.mark.parametrize("report", [None], indirect=True)
96
+ @pytest.mark.parametrize("report", [{'allele': None}], indirect=True)
97
97
  def test_allele_none(self, report: FinalReport) -> None:
98
98
  report.handle(DIR_FILES / "fr/file4.txt", None)
99
99
 
@@ -105,7 +105,7 @@ class TestFinalReport(object):
105
105
 
106
106
  assert report.snp_data.columns.difference(_fields).empty
107
107
 
108
- @pytest.mark.parametrize("report", ["AB"], indirect=True)
108
+ @pytest.mark.parametrize("report", [{'allele': "AB"}], indirect=True)
109
109
  def test_sample_allele_ab(self, report: FinalReport) -> None:
110
110
  report.handle(DIR_FILES / "fr/file4.txt", None)
111
111
 
@@ -116,7 +116,7 @@ class TestFinalReport(object):
116
116
 
117
117
  assert report.snp_data.columns.difference(_fields).empty
118
118
 
119
- @pytest.mark.parametrize("report", ["Forward"], indirect=True)
119
+ @pytest.mark.parametrize("report", [{'allele': "Forward"}], indirect=True)
120
120
  def test_sample_allele_forward(self, report: FinalReport) -> None:
121
121
  report.handle(DIR_FILES / "fr/file4.txt", None)
122
122
 
@@ -127,7 +127,7 @@ class TestFinalReport(object):
127
127
 
128
128
  assert report.snp_data.columns.difference(_fields).empty
129
129
 
130
- @pytest.mark.parametrize("report", ["Top"], indirect=True)
130
+ @pytest.mark.parametrize("report", [{'allele': "Top"}], indirect=True)
131
131
  def test_sample_allele_top(self, report: FinalReport) -> None:
132
132
  report.handle(DIR_FILES / "fr/file4.txt", None)
133
133
 
@@ -138,7 +138,9 @@ class TestFinalReport(object):
138
138
 
139
139
  assert report.snp_data.columns.difference(_fields).empty
140
140
 
141
- @pytest.mark.parametrize("report", [["AB", "Top"]], indirect=True)
141
+ @pytest.mark.parametrize(
142
+ "report", [{'allele': ["AB", "Top"]}], indirect=True
143
+ )
142
144
  def test_sample_allele_list1(self, report: FinalReport) -> None:
143
145
  report.handle(DIR_FILES / "fr/file4.txt", None)
144
146
 
@@ -149,7 +151,7 @@ class TestFinalReport(object):
149
151
 
150
152
  assert report.snp_data.columns.difference(_fields).empty
151
153
 
152
- @pytest.mark.parametrize("report", [["AB"]], indirect=True)
154
+ @pytest.mark.parametrize("report", [{'allele': ["AB"]}], indirect=True)
153
155
  def test_sample_allele_list2(self, report: FinalReport) -> None:
154
156
  report.handle(DIR_FILES / "fr/file4.txt", None)
155
157
 
@@ -160,7 +162,9 @@ class TestFinalReport(object):
160
162
 
161
163
  assert report.snp_data.columns.difference(_fields).empty
162
164
 
163
- @pytest.mark.parametrize("report", [("AB", "Top")], indirect=True)
165
+ @pytest.mark.parametrize(
166
+ "report", [{'allele': ("AB", "Top")}], indirect=True
167
+ )
164
168
  def test_sample_allele_tuple(self, report: FinalReport) -> None:
165
169
  report.handle(DIR_FILES / "fr/file4.txt", None)
166
170
 
@@ -171,7 +175,9 @@ class TestFinalReport(object):
171
175
 
172
176
  assert report.snp_data.columns.difference(_fields).empty
173
177
 
174
- @pytest.mark.parametrize("report", [{"AB", "Top"}], indirect=True)
178
+ @pytest.mark.parametrize(
179
+ "report", [{'allele': {"AB", "Top"}}], indirect=True
180
+ )
175
181
  def test_sample_allele_set(self, report: FinalReport) -> None:
176
182
  report.handle(DIR_FILES / "fr/file4.txt", None)
177
183
 
@@ -182,7 +188,7 @@ class TestFinalReport(object):
182
188
 
183
189
  assert report.snp_data.columns.difference(_fields).empty
184
190
 
185
- @pytest.mark.parametrize("report", ["GG"], indirect=True)
191
+ @pytest.mark.parametrize("report", [{'allele': "GG"}], indirect=True)
186
192
  def test_sample_allele_not_exist(self, report: FinalReport) -> None:
187
193
 
188
194
  with pytest.raises(
@@ -190,8 +196,8 @@ class TestFinalReport(object):
190
196
  ):
191
197
  report.handle(DIR_FILES / "fr/file4.txt", None)
192
198
 
193
- @pytest.mark.parametrize("report", ["AB"], indirect=True)
194
- def test_7(self, report: FinalReport) -> None:
199
+ @pytest.mark.parametrize("report", [{'allele': ["AB"]}], indirect=True)
200
+ def test_on_ru_symbol(self, report: FinalReport) -> None:
195
201
  """ An error is checked if the name of the number is Kirilitsa """
196
202
 
197
203
  with pytest.raises(
@@ -201,10 +207,10 @@ class TestFinalReport(object):
201
207
  DIR_FILES / "fr/file7.txt", DIR_FILES / "fr/file7.xlsx"
202
208
  )
203
209
 
204
- assert report.snp_data is None
210
+ assert report.snp_data is None and len(report.header) == 0
205
211
 
206
- @pytest.mark.parametrize("report", ["AB"], indirect=True)
207
- def test_8(self, report: FinalReport) -> None:
212
+ @pytest.mark.parametrize("report", [{'allele': ["AB"]}], indirect=True)
213
+ def test_on_nan_in_sid_file_xlsx(self, report: FinalReport) -> None:
208
214
  """ Checking for processing empty values in SID """
209
215
 
210
216
  report.handle(
@@ -212,18 +218,89 @@ class TestFinalReport(object):
212
218
  DIR_FILES / "fr/file8.xlsx"
213
219
  )
214
220
 
215
- assert report.snp_data is not None
216
- assert not report.snp_data.empty
221
+ assert report.snp_data is not None and not report.snp_data.empty
217
222
  assert report.snp_data['Sample ID'].isna().any()
218
223
 
219
- @pytest.mark.parametrize("report", ["AB"], indirect=True)
220
- def test_9(self, report: FinalReport) -> None:
224
+ @pytest.mark.parametrize("report", [{'allele': "AB"}], indirect=True)
225
+ def test_on_nan_in_fr(self, report: FinalReport) -> None:
221
226
  """ Checking for missing values in SID """
222
227
  report.handle(
223
228
  DIR_FILES / "fr/file9.txt",
224
229
  DIR_FILES / "fr/file9.xlsx"
225
230
  )
226
231
 
227
- assert report.snp_data is not None
228
- assert not report.snp_data.empty
232
+ assert report.snp_data is not None and not report.snp_data.empty
229
233
  assert report.snp_data['Sample ID'].isna().any()
234
+
235
+ @pytest.mark.parametrize(
236
+ "report",
237
+ [{'allele': "AB", 'usecols': ['SNP Name', 'Sample ID']}],
238
+ indirect=True
239
+ )
240
+ def test_raise_use_allele_and_usecols(self, report: FinalReport) -> None:
241
+ """ Checking for allele and usecols sharing error """
242
+
243
+ with pytest.raises(
244
+ Exception, match="Error. Usecols is used for allele is none."
245
+ ):
246
+ report.handle(
247
+ DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
248
+ )
249
+
250
+ assert report.snp_data is None and len(report.header) == 0
251
+
252
+ @pytest.mark.parametrize(
253
+ "report",
254
+ [{'usecols': ['SNP Name', 'Sample ID']}],
255
+ indirect=True
256
+ )
257
+ def test_using_usecols(self, report: FinalReport) -> None:
258
+ """ Checking for use setting usecols """
259
+
260
+ _fields = ['SNP Name', 'Sample ID']
261
+
262
+ report.handle(
263
+ DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
264
+ )
265
+
266
+ assert not report.snp_data.empty and len(report.header) != 0
267
+ assert report.snp_data.columns.difference(_fields).empty
268
+
269
+ @pytest.mark.parametrize(
270
+ "report",
271
+ [{
272
+ 'usecols': ['SNP Name', 'Sample ID'],
273
+ 'dtype': {'SNP Name': 'category'}
274
+ }],
275
+ indirect=True
276
+ )
277
+ def test_using_dtype_and_combo_usecols(self, report: FinalReport) -> None:
278
+ """ Checking the use of dtype """
279
+
280
+ _fields = ['SNP Name', 'Sample ID']
281
+
282
+ report.handle(
283
+ DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
284
+ )
285
+
286
+ assert not report.snp_data.empty and len(report.header) != 0
287
+ assert report.snp_data.columns.difference(_fields).empty
288
+ assert report.snp_data['SNP Name'].dtypes == 'category'
289
+
290
+ @pytest.mark.parametrize(
291
+ "report",
292
+ [{
293
+ 'allele': "AB",
294
+ 'dtype': {'SNP Name': 'category'}
295
+ }],
296
+ indirect=True
297
+ )
298
+ def test_using_dtype_and_combo_usecols(self, report: FinalReport) -> None:
299
+ """ Checking the use combo dtype and allele """
300
+
301
+ report.handle(
302
+ DIR_FILES / "fr/file1.txt", DIR_FILES / "fr/file1.xlsx"
303
+ )
304
+
305
+ assert not report.snp_data.empty and len(report.header) != 0
306
+ assert report.snp_data['SNP Name'].dtypes == 'category'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes