snplib 1.0.10__tar.gz → 1.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {snplib-1.0.10 → snplib-1.1.10}/.gitignore +2 -0
- {snplib-1.0.10/src/snplib.egg-info → snplib-1.1.10}/PKG-INFO +7 -7
- {snplib-1.0.10 → snplib-1.1.10}/docs/conf.py +1 -1
- {snplib-1.0.10 → snplib-1.1.10}/docs/examples.rst +105 -11
- snplib-1.1.10/docs/requirements.txt +2 -0
- {snplib-1.0.10 → snplib-1.1.10}/pyproject.toml +7 -7
- snplib-1.1.10/requirements.txt +10 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/finalreport/_finalreport.py +12 -10
- {snplib-1.0.10 → snplib-1.1.10/src/snplib.egg-info}/PKG-INFO +7 -7
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib.egg-info/SOURCES.txt +4 -0
- snplib-1.1.10/src/snplib.egg-info/requires.txt +8 -0
- snplib-1.1.10/tests/finalreport/files/fr/file8.txt +28 -0
- snplib-1.1.10/tests/finalreport/files/fr/file8.xlsx +0 -0
- snplib-1.1.10/tests/finalreport/files/fr/file9.txt +28 -0
- snplib-1.1.10/tests/finalreport/files/fr/file9.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/test_finalreport.py +27 -13
- snplib-1.0.10/docs/requirements.txt +0 -2
- snplib-1.0.10/requirements.txt +0 -12
- snplib-1.0.10/src/snplib.egg-info/requires.txt +0 -8
- {snplib-1.0.10 → snplib-1.1.10}/.github/workflows/linux.yml +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/.github/workflows/macos.yml +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/.github/workflows/windows.yml +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/.readthedocs.yaml +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/LICENSE +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/README.md +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/Makefile +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/index.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/install.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/intro.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/logo.png +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/make.bat +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/modules.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.finalreport.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.format.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.parentage.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.statistics.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/docs/usage.rst +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/iconlib.png +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/setup.cfg +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/finalreport/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/__settings.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/_plink.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/_snp.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/_discov.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/_isagmark.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/_verif.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/isag_disc.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/isag_verif.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/_callrate.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/_freq.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/_snphwe.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib.egg-info/dependency_links.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/src/snplib.egg-info/top_level.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file1.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file1.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file2.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file2.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file3.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file3.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file4.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file5.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file5.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file6.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file6.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file7.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file7.xlsx +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file2.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file3.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file4.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/lgen/file.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/map/file_bovinesnp50.csv +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file2.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file3.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file4.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fsnp/file1.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fsnp/file2.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_fam.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_lgen.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_map.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_ped.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_snp.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/data/parentage_test_disc.csv +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/data/parentage_test_verf.csv +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/test_discov.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/test_verif.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/__init__.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/cr/file_cra.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/cr/file_crm.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/freq/etalon.txt +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/freq/file.pl +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_callrate.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_freq_allele.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_freq_maf.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_hwe_t.py +0 -0
- {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_snphwe.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: snplib
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.1.10
|
4
4
|
Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
|
5
5
|
Author-email: Igor <igor.loschinin@gmail.com>
|
6
6
|
License: GNU
|
@@ -10,14 +10,14 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.10
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
|
-
Requires-Dist: numpy>=
|
14
|
-
Requires-Dist: pandas>=2.
|
15
|
-
Requires-Dist: six>=1.
|
13
|
+
Requires-Dist: numpy>=2.2.3
|
14
|
+
Requires-Dist: pandas>=2.2.3
|
15
|
+
Requires-Dist: six>=1.17.0
|
16
16
|
Requires-Dist: swifter>=1.4.0
|
17
17
|
Requires-Dist: xlrd>=2.0.1
|
18
|
-
Requires-Dist: XlsxWriter>=3.
|
19
|
-
Requires-Dist: openpyxl>=3.1.
|
20
|
-
Requires-Dist: pydantic>=2.
|
18
|
+
Requires-Dist: XlsxWriter>=3.2.2
|
19
|
+
Requires-Dist: openpyxl>=3.1.5
|
20
|
+
Requires-Dist: pydantic>=2.10.6
|
21
21
|
|
22
22
|
# snptools
|
23
23
|
<p align="center">
|
@@ -90,7 +90,19 @@ plink - GBLUP, ssGBLUP, GWAS.
|
|
90
90
|
blupf90 format
|
91
91
|
______________
|
92
92
|
The input data for obtaining the ``snp.txt`` file used for the genomic
|
93
|
-
blupf90 evaluation is the data file - processed file ``finalreport.txt
|
93
|
+
blupf90 evaluation is the data file - processed file ``finalreport.txt``.
|
94
|
+
The processed file can be seen in the item above - Finalreport.txt processing:
|
95
|
+
|
96
|
+
Content input *file.txt*::
|
97
|
+
|
98
|
+
SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score X Y
|
99
|
+
ARS-BFGL-BAC-10172 HO840M003135245650 B B 0.9420 0.069 0.801
|
100
|
+
ARS-BFGL-BAC-1020 HO840M003135245650 B B 0.9489 0.033 0.700
|
101
|
+
ARS-BFGL-BAC-10245 HO840M003135245650 B B 0.7277 0.152 1.504
|
102
|
+
ARS-BFGL-BAC-10345 HO840M003135245650 A B 0.9411 0.598 0.572
|
103
|
+
ARS-BFGL-BAC-10375 HO840M003135245650 A B 0.9348 0.430 0.494f
|
104
|
+
|
105
|
+
...
|
94
106
|
|
95
107
|
**uga**
|
96
108
|
|
@@ -99,7 +111,7 @@ blupf90 evaluation is the data file - processed file ``finalreport.txt``
|
|
99
111
|
import pandas as pd
|
100
112
|
from snplib.format import Snp
|
101
113
|
|
102
|
-
data_finalreport = pd.read_csv("file.txt", sep="\t")
|
114
|
+
data_finalreport = pd.read_csv("path_to_file/file.txt", sep="\t")
|
103
115
|
|
104
116
|
obj = Snp(fmt="uga")
|
105
117
|
obj_snp.process(data_finalreport)
|
@@ -111,7 +123,7 @@ Data after snp processing in ``uga`` (blupf90) format - obj_snp.data::
|
|
111
123
|
0 14814 02011015010000500
|
112
124
|
1 14815 01110152120222512
|
113
125
|
|
114
|
-
Default result
|
126
|
+
Default result - this is what the data looks like if ``fmt=None``::
|
115
127
|
|
116
128
|
SNP_NAME SAMPLE_ID SNP
|
117
129
|
0 ABCA12 14814 0
|
@@ -130,16 +142,104 @@ ____________
|
|
130
142
|
|
131
143
|
This page describes specialized PLINK input and output file formats which are
|
132
144
|
identifiable by file extension. https://www.cog-genomics.org/plink/1.9/formats
|
133
|
-
|
145
|
+
Common fomrats for performing GWAS analysis - ``ped``, ``map``, ``fam``, ``lgen``....
|
134
146
|
|
135
147
|
**map** - https://www.cog-genomics.org/plink/1.9/formats#map
|
136
148
|
|
149
|
+
To get the ``.map`` file, first you need to download the *manifest file* for the chip
|
150
|
+
you are using chip.
|
151
|
+
|
152
|
+
.. note::
|
153
|
+
*file_bovinesnp50.csv* - The file that is taken on the Illumina website with full
|
154
|
+
information about the chip https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
|
155
|
+
|
156
|
+
Since the make_map function accepts **pd.DataFrame**, the *manifest file* processing is performed
|
157
|
+
independently.
|
158
|
+
|
159
|
+
Input data for make_map::
|
160
|
+
|
161
|
+
IlmnID ... BeadSetID
|
162
|
+
0 BovineHD0100037694-128_T_F_2278925834 ... 1241
|
163
|
+
1 BovineHD0100037699_dup-128_T_F_2327674593 ... 1241
|
164
|
+
2 BovineHD0100037703_dup-128_B_R_2327674602 ... 1241
|
165
|
+
3 BovineHD0100037704_dup-128_T_F_2327674603 ... 1241
|
166
|
+
4 BovineHD0100037710_dup-128_T_F_2327674613 ... 1241
|
167
|
+
5 BovineHD0100037712_dup-128_B_R_2327674618 ... 1241
|
168
|
+
6 BovineHD0100037716-128_T_F_2255347065 ... 1241
|
169
|
+
7 BovineHD0100037719-128_T_F_2278926219 ... 1241
|
170
|
+
8 BovineHD0100037720-128_B_R_2255342455 ... 1241
|
171
|
+
9 BovineHD0100037722_dup-128_B_R_2327674634 ... 1241
|
172
|
+
|
173
|
+
|
174
|
+
.. note::
|
175
|
+
The original file, for example, **BovineSNP50_v3_A1.csv** looks like this::
|
176
|
+
|
177
|
+
Illumina, Inc.,,,,,,,,,,,,,,,,,
|
178
|
+
[Heading],,,,,,,,,,,,,,,,,,
|
179
|
+
Descriptor File Name,BovineSNP50_v3_A1.bpm,,,,,,,,,,,,,,,,,
|
180
|
+
Assay Format,Infinium HTS,,,,,,,,,,,,,,,,,
|
181
|
+
Date Manufactured,1/14/2016,,,,,,,,,,,,,,,,,
|
182
|
+
Loci Count ,53218,,,,,,,,,,,,,,,,,
|
183
|
+
[Assay],,,,,,,,,,,,,,,,,,
|
184
|
+
IlmnID,Name,IlmnStrand,SNP,AddressA_ID,AlleleA_ProbeSeq,AddressB_ID,AlleleB_ProbeSeq,GenomeBuild,Chr,MapInfo,Ploidy,Species,Source,SourceVersion,SourceStrand,SourceSeq,TopGenomicSeq,BeadSetID
|
185
|
+
ABCA12_r2-1_T_F_2277749139,ABCA12,TOP,[A/G],0059616496,CTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC,,,0,2,103548215,diploid,Bos taurus,UMD3.1,1,TOP,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,1241
|
186
|
+
APAF1_dup-1_B_F_2327661418,APAF1,BOT,[T/C],0041654401,ATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG,,,0,5,63150400,diploid,Bos taurus,UMD3.1,1,BOT,CCATTTCCTAATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG[T/C]AAGCTAAGCTGCAGGCCAAGCAGGAGGTCGATAACGGAATGCTTTACCTGGAGTGGGTGT,ACACCCACTCCAGGTAAAGCATTCCGTTATCGACCTCCTGCTTGGCCTGCAGCTTAGCTT[A/G]CCGATAAACCTCTGAAGTTTCCAGTTCACAGAGGCCCAGTTGCACAATATTAGGAAATGG,1241
|
187
|
+
ARS-BFGL-BAC-10172_dup-0_T_F_2328966397,ARS-BFGL-BAC-10172,TOP,[A/G],0072620471,GGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT,,,3,14,6371334,diploid,Bos taurus,UM3,0,TOP,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,1241
|
188
|
+
.
|
189
|
+
.
|
190
|
+
.
|
191
|
+
UA-IFASA-9812_dup-0_B_F_2329051536,UA-IFASA-9812,BOT,[T/C],0031677304,ACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA,,,3,29,48012818,diploid,Bos taurus,UM3,0,BOT,GTAAAAACAAACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA[T/C]GATGATCTGAAGTTTCTCAAGCACGCAGAGAAACGTAAGAGAAACGTTCCAGCAAAGGGA,TCCCTTTGCTGGAACGTTTCTCTTACGTTTCTCTGCGTGCTTGAGAAACTTCAGATCATC[A/G]TTAGTATAATGGGGCTGCAAGTTGAGACCATTCCTATCAGCTATGGAGGTTTGTTTTTAC,1241
|
192
|
+
UA-IFASA-9813_dup-0_B_F_2329051538,UA-IFASA-9813,BOT,[T/C],0011661313,ACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT,,,3,19,32508700,diploid,Bos taurus,UM3,0,BOT,AATAAAACCAACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT[T/C]AGGGGTCAATTCCCCCATGTCTAAAATTGAACCTCAACGTCCTTTCTGTTTTCAAAACTC,GAGTTTTGAAAACAGAAAGGACGTTGAGGTTCAATTTTAGACATGGGGGAATTGACCCCT[A/G]AATTCCTGAGGAAGTCTGATTAATGCTGAACCGTTAGCGAGTGCAAAGGTTGGTTTTATT,1241
|
193
|
+
UMPS_dup-1_T_R_2327737250,UMPS,TOP,[A/G],0073777348,TAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC,,,0,1,69756880,diploid,Bos taurus,UMD3.1,1,BOT,TCATCTGTTGATTACATTCCATTCAGGTGCAAATGGCTGAAGAACATTCTGAATTTGTGATTGGTTTTATTTCTGGCTCC[T/C]GAGTAAGCATGAAACCAGAATTTCTTCACTTGACTCCAGGAGTTCAGTTAGAAGCAGGAGGTAAGCCTATTGATTGGTAA,TTACCAATCAATAGGCTTACCTCCTGCTTCTAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC[A/G]GGAGCCAGAAATAAAACCAATCACAAATTCAGAATGTTCTTCAGCCATTTGCACCTGAATGGAATGTAATCAACAGATGA,1241
|
194
|
+
[Controls],,,,,,,,,,,,,,,,,,
|
195
|
+
0027630314,Staining,Red,DNP (High),,,,,,,,,,,,,,,
|
196
|
+
0029619375,Staining,Purple,DNP (Bgnd),,,,,,,,,,,,,,,
|
197
|
+
0041666334,Staining,Green,Biotin (High),,,,,,,,,,,,,,,
|
198
|
+
0034648333,Staining,Blue,Biotin (Bgnd),,,,,,,,,,,,,,,
|
199
|
+
0017616306,Extension,Red,Extension (A),,,,,,,,,,,,,,,
|
200
|
+
0014607337,Extension,Purple,Extension (T),,,,,,,,,,,,,,,
|
201
|
+
|
202
|
+
Therefore, for direct reading via **pd.read_csv()** it is necessary to
|
203
|
+
preprocess the file - delete extra lines::
|
204
|
+
|
205
|
+
Illumina, Inc.,,,,,,,,,,,,,,,,,
|
206
|
+
[Heading],,,,,,,,,,,,,,,,,,
|
207
|
+
Descriptor File Name,BovineSNP50_v3_A1.bpm,,,,,,,,,,,,,,,,,
|
208
|
+
Assay Format,Infinium HTS,,,,,,,,,,,,,,,,,
|
209
|
+
Date Manufactured,1/14/2016,,,,,,,,,,,,,,,,,
|
210
|
+
Loci Count ,53218,,,,,,,,,,,,,,,,,
|
211
|
+
[Assay],,,,,,,,,,,,,,,,,,
|
212
|
+
|
213
|
+
and
|
214
|
+
|
215
|
+
[Controls],,,,,,,,,,,,,,,,,,
|
216
|
+
0027630314,Staining,Red,DNP (High),,,,,,,,,,,,,,,
|
217
|
+
0029619375,Staining,Purple,DNP (Bgnd),,,,,,,,,,,,,,,
|
218
|
+
0041666334,Staining,Green,Biotin (High),,,,,,,,,,,,,,,
|
219
|
+
0034648333,Staining,Blue,Biotin (Bgnd),,,,,,,,,,,,,,,
|
220
|
+
0017616306,Extension,Red,Extension (A),,,,,,,,,,,,,,,
|
221
|
+
0014607337,Extension,Purple,Extension (T),,,,,,,,,,,,,,,
|
222
|
+
|
223
|
+
The file should end up looking like this::
|
224
|
+
|
225
|
+
IlmnID,Name,IlmnStrand,SNP,AddressA_ID,AlleleA_ProbeSeq,AddressB_ID,AlleleB_ProbeSeq,GenomeBuild,Chr,MapInfo,Ploidy,Species,Source,SourceVersion,SourceStrand,SourceSeq,TopGenomicSeq,BeadSetID
|
226
|
+
ABCA12_r2-1_T_F_2277749139,ABCA12,TOP,[A/G],0059616496,CTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC,,,0,2,103548215,diploid,Bos taurus,UMD3.1,1,TOP,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,1241
|
227
|
+
APAF1_dup-1_B_F_2327661418,APAF1,BOT,[T/C],0041654401,ATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG,,,0,5,63150400,diploid,Bos taurus,UMD3.1,1,BOT,CCATTTCCTAATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG[T/C]AAGCTAAGCTGCAGGCCAAGCAGGAGGTCGATAACGGAATGCTTTACCTGGAGTGGGTGT,ACACCCACTCCAGGTAAAGCATTCCGTTATCGACCTCCTGCTTGGCCTGCAGCTTAGCTT[A/G]CCGATAAACCTCTGAAGTTTCCAGTTCACAGAGGCCCAGTTGCACAATATTAGGAAATGG,1241
|
228
|
+
ARS-BFGL-BAC-10172_dup-0_T_F_2328966397,ARS-BFGL-BAC-10172,TOP,[A/G],0072620471,GGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT,,,3,14,6371334,diploid,Bos taurus,UM3,0,TOP,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,1241
|
229
|
+
.
|
230
|
+
.
|
231
|
+
.
|
232
|
+
UA-IFASA-9812_dup-0_B_F_2329051536,UA-IFASA-9812,BOT,[T/C],0031677304,ACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA,,,3,29,48012818,diploid,Bos taurus,UM3,0,BOT,GTAAAAACAAACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA[T/C]GATGATCTGAAGTTTCTCAAGCACGCAGAGAAACGTAAGAGAAACGTTCCAGCAAAGGGA,TCCCTTTGCTGGAACGTTTCTCTTACGTTTCTCTGCGTGCTTGAGAAACTTCAGATCATC[A/G]TTAGTATAATGGGGCTGCAAGTTGAGACCATTCCTATCAGCTATGGAGGTTTGTTTTTAC,1241
|
233
|
+
UA-IFASA-9813_dup-0_B_F_2329051538,UA-IFASA-9813,BOT,[T/C],0011661313,ACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT,,,3,19,32508700,diploid,Bos taurus,UM3,0,BOT,AATAAAACCAACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT[T/C]AGGGGTCAATTCCCCCATGTCTAAAATTGAACCTCAACGTCCTTTCTGTTTTCAAAACTC,GAGTTTTGAAAACAGAAAGGACGTTGAGGTTCAATTTTAGACATGGGGGAATTGACCCCT[A/G]AATTCCTGAGGAAGTCTGATTAATGCTGAACCGTTAGCGAGTGCAAAGGTTGGTTTTATT,1241
|
234
|
+
UMPS_dup-1_T_R_2327737250,UMPS,TOP,[A/G],0073777348,TAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC,,,0,1,69756880,diploid,Bos taurus,UMD3.1,1,BOT,TCATCTGTTGATTACATTCCATTCAGGTGCAAATGGCTGAAGAACATTCTGAATTTGTGATTGGTTTTATTTCTGGCTCC[T/C]GAGTAAGCATGAAACCAGAATTTCTTCACTTGACTCCAGGAGTTCAGTTAGAAGCAGGAGGTAAGCCTATTGATTGGTAA,TTACCAATCAATAGGCTTACCTCCTGCTTCTAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC[A/G]GGAGCCAGAAATAAAACCAATCACAAATTCAGAATGTTCTTCAGCCATTTGCACCTGAATGGAATGTAATCAACAGATGA,1241
|
235
|
+
|
236
|
+
|
137
237
|
.. code-block:: python
|
138
238
|
|
139
239
|
import pandas as pd
|
140
240
|
from snplib.format import make_map
|
141
241
|
|
142
|
-
input_data = pd.read_csv(
|
242
|
+
input_data = pd.read_csv("./file_bovinesnp50.csv")
|
143
243
|
data_map = make_map(input_data)
|
144
244
|
|
145
245
|
Output data view::
|
@@ -150,12 +250,6 @@ Output data view::
|
|
150
250
|
0 BovineHD0100037703 0 0
|
151
251
|
0 BovineHD0100037704 0 0
|
152
252
|
|
153
|
-
.. note::
|
154
|
-
file_bovinesnp50.csv - The file that is taken on the Illumina website with full
|
155
|
-
information about the chip
|
156
|
-
https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
|
157
|
-
|
158
|
-
|
159
253
|
**ped** - https://www.cog-genomics.org/plink/1.9/formats#ped
|
160
254
|
|
161
255
|
.. code-block:: python
|
@@ -17,7 +17,7 @@ snplib = ["*.pl"]
|
|
17
17
|
|
18
18
|
[project]
|
19
19
|
name = "snplib"
|
20
|
-
version = "1.
|
20
|
+
version = "1.1.10"
|
21
21
|
description = "Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing"
|
22
22
|
authors = [
|
23
23
|
{name = "Igor", email = "igor.loschinin@gmail.com"}
|
@@ -30,14 +30,14 @@ classifiers = [
|
|
30
30
|
"Operating System :: OS Independent",
|
31
31
|
]
|
32
32
|
dependencies = [
|
33
|
-
"numpy>=
|
34
|
-
"pandas>=2.
|
35
|
-
"six>=1.
|
33
|
+
"numpy>=2.2.3",
|
34
|
+
"pandas>=2.2.3",
|
35
|
+
"six>=1.17.0",
|
36
36
|
"swifter>=1.4.0",
|
37
37
|
"xlrd>=2.0.1",
|
38
|
-
"XlsxWriter>=3.
|
39
|
-
"openpyxl>=3.1.
|
40
|
-
"pydantic>=2.
|
38
|
+
"XlsxWriter>=3.2.2",
|
39
|
+
"openpyxl>=3.1.5",
|
40
|
+
"pydantic>=2.10.6",
|
41
41
|
]
|
42
42
|
|
43
43
|
[project.urls]
|
@@ -7,11 +7,15 @@ from pathlib import Path
|
|
7
7
|
from functools import reduce
|
8
8
|
|
9
9
|
import re
|
10
|
+
|
11
|
+
from numpy import nan
|
10
12
|
import pandas as pd
|
11
13
|
|
12
14
|
|
13
15
|
class FinalReport(object):
|
14
|
-
""" File that contains SNP information.
|
16
|
+
""" File that contains SNP information. File processing is triggered by the
|
17
|
+
handle method. If values in 'SID' or 'UNIQ_KEY' were missing in the xlsx
|
18
|
+
conversion file, the processed data will contain NAN values.
|
15
19
|
|
16
20
|
:argument allele: A variant form of a single nucleotide polymorphism
|
17
21
|
(SNP), a specific polymorphic site or a whole gene detectable at
|
@@ -235,17 +239,15 @@ class FinalReport(object):
|
|
235
239
|
if self._check_on_ru_symbols(self._map_rn.UNIQ_KEY):
|
236
240
|
raise Exception("Error. Unique keys contain Cyrillic alphabet.")
|
237
241
|
|
238
|
-
if self._map_rn.UNIQ_KEY.isna().any():
|
239
|
-
self._map_rn.fillna('unknown', inplace=True)
|
240
|
-
|
241
242
|
@staticmethod
|
242
243
|
def _check_on_ru_symbols(seq: pd.Series) -> bool | None:
|
243
|
-
"""
|
244
|
+
""" Checial verification of the Cyrillic
|
244
245
|
|
245
|
-
:param seq:
|
246
|
-
:return:
|
246
|
+
:param seq: Squeezed for verification.
|
247
|
+
:return: Truth if there are no symbols of Cyril and there is a lie if
|
248
|
+
there is.
|
247
249
|
"""
|
248
250
|
|
249
|
-
return
|
250
|
-
|
251
|
-
|
251
|
+
return seq.apply(
|
252
|
+
lambda x: bool(re.search('[а-яА-Я]', x)) if x is not nan else x
|
253
|
+
).any()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: snplib
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.1.10
|
4
4
|
Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
|
5
5
|
Author-email: Igor <igor.loschinin@gmail.com>
|
6
6
|
License: GNU
|
@@ -10,14 +10,14 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.10
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
|
-
Requires-Dist: numpy>=
|
14
|
-
Requires-Dist: pandas>=2.
|
15
|
-
Requires-Dist: six>=1.
|
13
|
+
Requires-Dist: numpy>=2.2.3
|
14
|
+
Requires-Dist: pandas>=2.2.3
|
15
|
+
Requires-Dist: six>=1.17.0
|
16
16
|
Requires-Dist: swifter>=1.4.0
|
17
17
|
Requires-Dist: xlrd>=2.0.1
|
18
|
-
Requires-Dist: XlsxWriter>=3.
|
19
|
-
Requires-Dist: openpyxl>=3.1.
|
20
|
-
Requires-Dist: pydantic>=2.
|
18
|
+
Requires-Dist: XlsxWriter>=3.2.2
|
19
|
+
Requires-Dist: openpyxl>=3.1.5
|
20
|
+
Requires-Dist: pydantic>=2.10.6
|
21
21
|
|
22
22
|
# snptools
|
23
23
|
<p align="center">
|
@@ -63,6 +63,10 @@ tests/finalreport/files/fr/file6.txt
|
|
63
63
|
tests/finalreport/files/fr/file6.xlsx
|
64
64
|
tests/finalreport/files/fr/file7.txt
|
65
65
|
tests/finalreport/files/fr/file7.xlsx
|
66
|
+
tests/finalreport/files/fr/file8.txt
|
67
|
+
tests/finalreport/files/fr/file8.xlsx
|
68
|
+
tests/finalreport/files/fr/file9.txt
|
69
|
+
tests/finalreport/files/fr/file9.xlsx
|
66
70
|
tests/format/__init__.py
|
67
71
|
tests/format/test_plink_fam.py
|
68
72
|
tests/format/test_plink_lgen.py
|
@@ -0,0 +1,28 @@
|
|
1
|
+
[Header]
|
2
|
+
GSGT Version 2.0.4
|
3
|
+
Processing Date 10/14/2021 4:02 PM
|
4
|
+
Content BovineSNP50_v3_A1.bpm
|
5
|
+
Num SNPs 53218
|
6
|
+
Total SNPs 53218
|
7
|
+
Num Samples 3
|
8
|
+
Total Samples 3
|
9
|
+
[Data]
|
10
|
+
SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
|
11
|
+
ABCA12 1 A A 0.4048 0.8164
|
12
|
+
APAF1 1 B B 0.9067 0.9155
|
13
|
+
ARS-BFGL-BAC-10172 1 B B 0.9140 0.8767
|
14
|
+
ARS-BFGL-BAC-1020 1 B B 0.9288 0.8919
|
15
|
+
ARS-BFGL-BAC-10245 1 B B 0.7227 0.7447
|
16
|
+
ARS-BFGL-BAC-10345 1 A B 0.9468 0.9127
|
17
|
+
ABCA12 2 A A 0.4048 0.8164
|
18
|
+
APAF1 2 B B 0.9067 0.9155
|
19
|
+
ARS-BFGL-BAC-10172 2 A B 0.9140 0.8767
|
20
|
+
ARS-BFGL-BAC-1020 2 A B 0.9288 0.8919
|
21
|
+
ARS-BFGL-BAC-10245 2 A A 0.7227 0.7447
|
22
|
+
ARS-BFGL-BAC-10345 2 B B 0.9468 0.9127
|
23
|
+
ABCA12 3 A A 0.4048 0.8164
|
24
|
+
APAF1 3 B B 0.9067 0.9155
|
25
|
+
ARS-BFGL-BAC-10172 3 A B 0.9140 0.8767
|
26
|
+
ARS-BFGL-BAC-1020 3 A B 0.9288 0.8919
|
27
|
+
ARS-BFGL-BAC-10245 3 A A 0.7227 0.7447
|
28
|
+
ARS-BFGL-BAC-10345 3 A B 0.9468 0.9127
|
Binary file
|
@@ -0,0 +1,28 @@
|
|
1
|
+
[Header]
|
2
|
+
GSGT Version 2.0.4
|
3
|
+
Processing Date 10/14/2021 4:02 PM
|
4
|
+
Content BovineSNP50_v3_A1.bpm
|
5
|
+
Num SNPs 53218
|
6
|
+
Total SNPs 53218
|
7
|
+
Num Samples 3
|
8
|
+
Total Samples 3
|
9
|
+
[Data]
|
10
|
+
SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
|
11
|
+
ABCA12 1 A A 0.4048 0.8164
|
12
|
+
APAF1 1 B B 0.9067 0.9155
|
13
|
+
ARS-BFGL-BAC-10172 1 B B 0.9140 0.8767
|
14
|
+
ARS-BFGL-BAC-1020 1 B B 0.9288 0.8919
|
15
|
+
ARS-BFGL-BAC-10245 1 B B 0.7227 0.7447
|
16
|
+
ARS-BFGL-BAC-10345 1 A B 0.9468 0.9127
|
17
|
+
ABCA12 2 A A 0.4048 0.8164
|
18
|
+
APAF1 2 B B 0.9067 0.9155
|
19
|
+
ARS-BFGL-BAC-10172 2 A B 0.9140 0.8767
|
20
|
+
ARS-BFGL-BAC-1020 2 A B 0.9288 0.8919
|
21
|
+
ARS-BFGL-BAC-10245 2 A A 0.7227 0.7447
|
22
|
+
ARS-BFGL-BAC-10345 2 B B 0.9468 0.9127
|
23
|
+
ABCA12 3 A A 0.4048 0.8164
|
24
|
+
APAF1 3 B B 0.9067 0.9155
|
25
|
+
ARS-BFGL-BAC-10172 3 A B 0.9140 0.8767
|
26
|
+
ARS-BFGL-BAC-1020 3 A B 0.9288 0.8919
|
27
|
+
ARS-BFGL-BAC-10245 3 A A 0.7227 0.7447
|
28
|
+
ARS-BFGL-BAC-10345 3 A B 0.9468 0.9127
|
Binary file
|
@@ -192,6 +192,7 @@ class TestFinalReport(object):
|
|
192
192
|
|
193
193
|
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
194
194
|
def test_7(self, report: FinalReport) -> None:
|
195
|
+
""" An error is checked if the name of the number is Kirilitsa """
|
195
196
|
|
196
197
|
with pytest.raises(
|
197
198
|
Exception, match="Error. Unique keys contain Cyrillic alphabet."
|
@@ -200,16 +201,29 @@ class TestFinalReport(object):
|
|
200
201
|
DIR_FILES / "fr/file7.txt", DIR_FILES / "fr/file7.xlsx"
|
201
202
|
)
|
202
203
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
204
|
+
assert report.snp_data is None
|
205
|
+
|
206
|
+
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
207
|
+
def test_8(self, report: FinalReport) -> None:
|
208
|
+
""" Checking for processing empty values in SID """
|
209
|
+
|
210
|
+
report.handle(
|
211
|
+
DIR_FILES / "fr/file8.txt",
|
212
|
+
DIR_FILES / "fr/file8.xlsx"
|
213
|
+
)
|
214
|
+
|
215
|
+
assert report.snp_data is not None
|
216
|
+
assert not report.snp_data.empty
|
217
|
+
assert report.snp_data['Sample ID'].isna().any()
|
218
|
+
|
219
|
+
@pytest.mark.parametrize("report", ["AB"], indirect=True)
|
220
|
+
def test_9(self, report: FinalReport) -> None:
|
221
|
+
""" Checking for missing values in SID """
|
222
|
+
report.handle(
|
223
|
+
DIR_FILES / "fr/file9.txt",
|
224
|
+
DIR_FILES / "fr/file9.xlsx"
|
225
|
+
)
|
226
|
+
|
227
|
+
assert report.snp_data is not None
|
228
|
+
assert not report.snp_data.empty
|
229
|
+
assert report.snp_data['Sample ID'].isna().any()
|
snplib-1.0.10/requirements.txt
DELETED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|