snplib 1.0.10__tar.gz → 1.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {snplib-1.0.10 → snplib-1.1.10}/.gitignore +2 -0
  2. {snplib-1.0.10/src/snplib.egg-info → snplib-1.1.10}/PKG-INFO +7 -7
  3. {snplib-1.0.10 → snplib-1.1.10}/docs/conf.py +1 -1
  4. {snplib-1.0.10 → snplib-1.1.10}/docs/examples.rst +105 -11
  5. snplib-1.1.10/docs/requirements.txt +2 -0
  6. {snplib-1.0.10 → snplib-1.1.10}/pyproject.toml +7 -7
  7. snplib-1.1.10/requirements.txt +10 -0
  8. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/finalreport/_finalreport.py +12 -10
  9. {snplib-1.0.10 → snplib-1.1.10/src/snplib.egg-info}/PKG-INFO +7 -7
  10. {snplib-1.0.10 → snplib-1.1.10}/src/snplib.egg-info/SOURCES.txt +4 -0
  11. snplib-1.1.10/src/snplib.egg-info/requires.txt +8 -0
  12. snplib-1.1.10/tests/finalreport/files/fr/file8.txt +28 -0
  13. snplib-1.1.10/tests/finalreport/files/fr/file8.xlsx +0 -0
  14. snplib-1.1.10/tests/finalreport/files/fr/file9.txt +28 -0
  15. snplib-1.1.10/tests/finalreport/files/fr/file9.xlsx +0 -0
  16. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/test_finalreport.py +27 -13
  17. snplib-1.0.10/docs/requirements.txt +0 -2
  18. snplib-1.0.10/requirements.txt +0 -12
  19. snplib-1.0.10/src/snplib.egg-info/requires.txt +0 -8
  20. {snplib-1.0.10 → snplib-1.1.10}/.github/workflows/linux.yml +0 -0
  21. {snplib-1.0.10 → snplib-1.1.10}/.github/workflows/macos.yml +0 -0
  22. {snplib-1.0.10 → snplib-1.1.10}/.github/workflows/windows.yml +0 -0
  23. {snplib-1.0.10 → snplib-1.1.10}/.readthedocs.yaml +0 -0
  24. {snplib-1.0.10 → snplib-1.1.10}/LICENSE +0 -0
  25. {snplib-1.0.10 → snplib-1.1.10}/README.md +0 -0
  26. {snplib-1.0.10 → snplib-1.1.10}/__init__.py +0 -0
  27. {snplib-1.0.10 → snplib-1.1.10}/docs/Makefile +0 -0
  28. {snplib-1.0.10 → snplib-1.1.10}/docs/index.rst +0 -0
  29. {snplib-1.0.10 → snplib-1.1.10}/docs/install.rst +0 -0
  30. {snplib-1.0.10 → snplib-1.1.10}/docs/intro.rst +0 -0
  31. {snplib-1.0.10 → snplib-1.1.10}/docs/logo.png +0 -0
  32. {snplib-1.0.10 → snplib-1.1.10}/docs/make.bat +0 -0
  33. {snplib-1.0.10 → snplib-1.1.10}/docs/modules.rst +0 -0
  34. {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.finalreport.rst +0 -0
  35. {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.format.rst +0 -0
  36. {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.parentage.rst +0 -0
  37. {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.rst +0 -0
  38. {snplib-1.0.10 → snplib-1.1.10}/docs/snplib.statistics.rst +0 -0
  39. {snplib-1.0.10 → snplib-1.1.10}/docs/usage.rst +0 -0
  40. {snplib-1.0.10 → snplib-1.1.10}/iconlib.png +0 -0
  41. {snplib-1.0.10 → snplib-1.1.10}/setup.cfg +0 -0
  42. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/__init__.py +0 -0
  43. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/finalreport/__init__.py +0 -0
  44. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/__init__.py +0 -0
  45. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/__settings.py +0 -0
  46. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/_plink.py +0 -0
  47. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/format/_snp.py +0 -0
  48. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/__init__.py +0 -0
  49. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/_discov.py +0 -0
  50. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/_isagmark.py +0 -0
  51. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/_verif.py +0 -0
  52. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/isag_disc.pl +0 -0
  53. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/parentage/isag_verif.pl +0 -0
  54. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/__init__.py +0 -0
  55. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/_callrate.py +0 -0
  56. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/_freq.py +0 -0
  57. {snplib-1.0.10 → snplib-1.1.10}/src/snplib/statistics/_snphwe.py +0 -0
  58. {snplib-1.0.10 → snplib-1.1.10}/src/snplib.egg-info/dependency_links.txt +0 -0
  59. {snplib-1.0.10 → snplib-1.1.10}/src/snplib.egg-info/top_level.txt +0 -0
  60. {snplib-1.0.10 → snplib-1.1.10}/tests/__init__.py +0 -0
  61. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/__init__.py +0 -0
  62. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file1.txt +0 -0
  63. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file1.xlsx +0 -0
  64. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file2.txt +0 -0
  65. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file2.xlsx +0 -0
  66. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file3.txt +0 -0
  67. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file3.xlsx +0 -0
  68. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file4.txt +0 -0
  69. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file5.txt +0 -0
  70. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file5.xlsx +0 -0
  71. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file6.txt +0 -0
  72. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file6.xlsx +0 -0
  73. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file7.txt +0 -0
  74. {snplib-1.0.10 → snplib-1.1.10}/tests/finalreport/files/fr/file7.xlsx +0 -0
  75. {snplib-1.0.10 → snplib-1.1.10}/tests/format/__init__.py +0 -0
  76. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file.pl +0 -0
  77. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file2.pl +0 -0
  78. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file3.pl +0 -0
  79. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/fam/file4.pl +0 -0
  80. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/lgen/file.pl +0 -0
  81. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/map/file_bovinesnp50.csv +0 -0
  82. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file.pl +0 -0
  83. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file2.pl +0 -0
  84. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file3.pl +0 -0
  85. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fplink/ped/file4.pl +0 -0
  86. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fsnp/file1.txt +0 -0
  87. {snplib-1.0.10 → snplib-1.1.10}/tests/format/files/fsnp/file2.txt +0 -0
  88. {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_fam.py +0 -0
  89. {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_lgen.py +0 -0
  90. {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_map.py +0 -0
  91. {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_plink_ped.py +0 -0
  92. {snplib-1.0.10 → snplib-1.1.10}/tests/format/test_snp.py +0 -0
  93. {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/__init__.py +0 -0
  94. {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/data/parentage_test_disc.csv +0 -0
  95. {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/data/parentage_test_verf.csv +0 -0
  96. {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/test_discov.py +0 -0
  97. {snplib-1.0.10 → snplib-1.1.10}/tests/parentage/test_verif.py +0 -0
  98. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/__init__.py +0 -0
  99. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/cr/file_cra.pl +0 -0
  100. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/cr/file_crm.pl +0 -0
  101. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/freq/etalon.txt +0 -0
  102. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/data/freq/file.pl +0 -0
  103. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_callrate.py +0 -0
  104. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_freq_allele.py +0 -0
  105. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_freq_maf.py +0 -0
  106. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_hwe_t.py +0 -0
  107. {snplib-1.0.10 → snplib-1.1.10}/tests/statistics/test_snphwe.py +0 -0
@@ -17,6 +17,8 @@
17
17
  !requirements.txt
18
18
  !/tests/*/**
19
19
  !/docs/*/**
20
+ /docs/_build
21
+
20
22
 
21
23
  *.idea*
22
24
  *__pycache__*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.0.10
3
+ Version: 1.1.10
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -10,14 +10,14 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: numpy>=1.26.1
14
- Requires-Dist: pandas>=2.1.1
15
- Requires-Dist: six>=1.16.0
13
+ Requires-Dist: numpy>=2.2.3
14
+ Requires-Dist: pandas>=2.2.3
15
+ Requires-Dist: six>=1.17.0
16
16
  Requires-Dist: swifter>=1.4.0
17
17
  Requires-Dist: xlrd>=2.0.1
18
- Requires-Dist: XlsxWriter>=3.1.9
19
- Requires-Dist: openpyxl>=3.1.2
20
- Requires-Dist: pydantic>=2.4.2
18
+ Requires-Dist: XlsxWriter>=3.2.2
19
+ Requires-Dist: openpyxl>=3.1.5
20
+ Requires-Dist: pydantic>=2.10.6
21
21
 
22
22
  # snptools
23
23
  <p align="center">
@@ -8,7 +8,7 @@
8
8
 
9
9
  import os
10
10
  import sys
11
- sys.path.insert(0, os.path.abspath('../src/snplib'))
11
+ sys.path.insert(0, os.path.abspath('../src'))
12
12
 
13
13
  project = 'snptools'
14
14
  copyright = '2025, Igor Locshinin'
@@ -90,7 +90,19 @@ plink - GBLUP, ssGBLUP, GWAS.
90
90
  blupf90 format
91
91
  ______________
92
92
  The input data for obtaining the ``snp.txt`` file used for the genomic
93
- blupf90 evaluation is the data file - processed file ``finalreport.txt``
93
+ blupf90 evaluation is the data file - processed file ``finalreport.txt``.
94
+ The processed file can be seen in the item above - Finalreport.txt processing:
95
+
96
+ Content input *file.txt*::
97
+
98
+ SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score X Y
99
+ ARS-BFGL-BAC-10172 HO840M003135245650 B B 0.9420 0.069 0.801
100
+ ARS-BFGL-BAC-1020 HO840M003135245650 B B 0.9489 0.033 0.700
101
+ ARS-BFGL-BAC-10245 HO840M003135245650 B B 0.7277 0.152 1.504
102
+ ARS-BFGL-BAC-10345 HO840M003135245650 A B 0.9411 0.598 0.572
103
+ ARS-BFGL-BAC-10375 HO840M003135245650 A B 0.9348 0.430 0.494f
104
+
105
+ ...
94
106
 
95
107
  **uga**
96
108
 
@@ -99,7 +111,7 @@ blupf90 evaluation is the data file - processed file ``finalreport.txt``
99
111
  import pandas as pd
100
112
  from snplib.format import Snp
101
113
 
102
- data_finalreport = pd.read_csv("file.txt", sep="\t")
114
+ data_finalreport = pd.read_csv("path_to_file/file.txt", sep="\t")
103
115
 
104
116
  obj = Snp(fmt="uga")
105
117
  obj_snp.process(data_finalreport)
@@ -111,7 +123,7 @@ Data after snp processing in ``uga`` (blupf90) format - obj_snp.data::
111
123
  0 14814 02011015010000500
112
124
  1 14815 01110152120222512
113
125
 
114
- Default result::
126
+ Default result - this is what the data looks like if ``fmt=None``::
115
127
 
116
128
  SNP_NAME SAMPLE_ID SNP
117
129
  0 ABCA12 14814 0
@@ -130,16 +142,104 @@ ____________
130
142
 
131
143
  This page describes specialized PLINK input and output file formats which are
132
144
  identifiable by file extension. https://www.cog-genomics.org/plink/1.9/formats
133
- Распространненные фомраты для проведения GWAS анализа - ``ped``, ``map``, ``fam``, ``lgen``...
145
+ Common fomrats for performing GWAS analysis - ``ped``, ``map``, ``fam``, ``lgen``....
134
146
 
135
147
  **map** - https://www.cog-genomics.org/plink/1.9/formats#map
136
148
 
149
+ To get the ``.map`` file, first you need to download the *manifest file* for the chip
150
+ you are using chip.
151
+
152
+ .. note::
153
+ *file_bovinesnp50.csv* - The file that is taken on the Illumina website with full
154
+ information about the chip https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
155
+
156
+ Since the make_map function accepts **pd.DataFrame**, the *manifest file* processing is performed
157
+ independently.
158
+
159
+ Input data for make_map::
160
+
161
+ IlmnID ... BeadSetID
162
+ 0 BovineHD0100037694-128_T_F_2278925834 ... 1241
163
+ 1 BovineHD0100037699_dup-128_T_F_2327674593 ... 1241
164
+ 2 BovineHD0100037703_dup-128_B_R_2327674602 ... 1241
165
+ 3 BovineHD0100037704_dup-128_T_F_2327674603 ... 1241
166
+ 4 BovineHD0100037710_dup-128_T_F_2327674613 ... 1241
167
+ 5 BovineHD0100037712_dup-128_B_R_2327674618 ... 1241
168
+ 6 BovineHD0100037716-128_T_F_2255347065 ... 1241
169
+ 7 BovineHD0100037719-128_T_F_2278926219 ... 1241
170
+ 8 BovineHD0100037720-128_B_R_2255342455 ... 1241
171
+ 9 BovineHD0100037722_dup-128_B_R_2327674634 ... 1241
172
+
173
+
174
+ .. note::
175
+ The original file, for example, **BovineSNP50_v3_A1.csv** looks like this::
176
+
177
+ Illumina, Inc.,,,,,,,,,,,,,,,,,
178
+ [Heading],,,,,,,,,,,,,,,,,,
179
+ Descriptor File Name,BovineSNP50_v3_A1.bpm,,,,,,,,,,,,,,,,,
180
+ Assay Format,Infinium HTS,,,,,,,,,,,,,,,,,
181
+ Date Manufactured,1/14/2016,,,,,,,,,,,,,,,,,
182
+ Loci Count ,53218,,,,,,,,,,,,,,,,,
183
+ [Assay],,,,,,,,,,,,,,,,,,
184
+ IlmnID,Name,IlmnStrand,SNP,AddressA_ID,AlleleA_ProbeSeq,AddressB_ID,AlleleB_ProbeSeq,GenomeBuild,Chr,MapInfo,Ploidy,Species,Source,SourceVersion,SourceStrand,SourceSeq,TopGenomicSeq,BeadSetID
185
+ ABCA12_r2-1_T_F_2277749139,ABCA12,TOP,[A/G],0059616496,CTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC,,,0,2,103548215,diploid,Bos taurus,UMD3.1,1,TOP,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,1241
186
+ APAF1_dup-1_B_F_2327661418,APAF1,BOT,[T/C],0041654401,ATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG,,,0,5,63150400,diploid,Bos taurus,UMD3.1,1,BOT,CCATTTCCTAATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG[T/C]AAGCTAAGCTGCAGGCCAAGCAGGAGGTCGATAACGGAATGCTTTACCTGGAGTGGGTGT,ACACCCACTCCAGGTAAAGCATTCCGTTATCGACCTCCTGCTTGGCCTGCAGCTTAGCTT[A/G]CCGATAAACCTCTGAAGTTTCCAGTTCACAGAGGCCCAGTTGCACAATATTAGGAAATGG,1241
187
+ ARS-BFGL-BAC-10172_dup-0_T_F_2328966397,ARS-BFGL-BAC-10172,TOP,[A/G],0072620471,GGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT,,,3,14,6371334,diploid,Bos taurus,UM3,0,TOP,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,1241
188
+ .
189
+ .
190
+ .
191
+ UA-IFASA-9812_dup-0_B_F_2329051536,UA-IFASA-9812,BOT,[T/C],0031677304,ACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA,,,3,29,48012818,diploid,Bos taurus,UM3,0,BOT,GTAAAAACAAACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA[T/C]GATGATCTGAAGTTTCTCAAGCACGCAGAGAAACGTAAGAGAAACGTTCCAGCAAAGGGA,TCCCTTTGCTGGAACGTTTCTCTTACGTTTCTCTGCGTGCTTGAGAAACTTCAGATCATC[A/G]TTAGTATAATGGGGCTGCAAGTTGAGACCATTCCTATCAGCTATGGAGGTTTGTTTTTAC,1241
192
+ UA-IFASA-9813_dup-0_B_F_2329051538,UA-IFASA-9813,BOT,[T/C],0011661313,ACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT,,,3,19,32508700,diploid,Bos taurus,UM3,0,BOT,AATAAAACCAACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT[T/C]AGGGGTCAATTCCCCCATGTCTAAAATTGAACCTCAACGTCCTTTCTGTTTTCAAAACTC,GAGTTTTGAAAACAGAAAGGACGTTGAGGTTCAATTTTAGACATGGGGGAATTGACCCCT[A/G]AATTCCTGAGGAAGTCTGATTAATGCTGAACCGTTAGCGAGTGCAAAGGTTGGTTTTATT,1241
193
+ UMPS_dup-1_T_R_2327737250,UMPS,TOP,[A/G],0073777348,TAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC,,,0,1,69756880,diploid,Bos taurus,UMD3.1,1,BOT,TCATCTGTTGATTACATTCCATTCAGGTGCAAATGGCTGAAGAACATTCTGAATTTGTGATTGGTTTTATTTCTGGCTCC[T/C]GAGTAAGCATGAAACCAGAATTTCTTCACTTGACTCCAGGAGTTCAGTTAGAAGCAGGAGGTAAGCCTATTGATTGGTAA,TTACCAATCAATAGGCTTACCTCCTGCTTCTAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC[A/G]GGAGCCAGAAATAAAACCAATCACAAATTCAGAATGTTCTTCAGCCATTTGCACCTGAATGGAATGTAATCAACAGATGA,1241
194
+ [Controls],,,,,,,,,,,,,,,,,,
195
+ 0027630314,Staining,Red,DNP (High),,,,,,,,,,,,,,,
196
+ 0029619375,Staining,Purple,DNP (Bgnd),,,,,,,,,,,,,,,
197
+ 0041666334,Staining,Green,Biotin (High),,,,,,,,,,,,,,,
198
+ 0034648333,Staining,Blue,Biotin (Bgnd),,,,,,,,,,,,,,,
199
+ 0017616306,Extension,Red,Extension (A),,,,,,,,,,,,,,,
200
+ 0014607337,Extension,Purple,Extension (T),,,,,,,,,,,,,,,
201
+
202
+ Therefore, for direct reading via **pd.read_csv()** it is necessary to
203
+ preprocess the file - delete extra lines::
204
+
205
+ Illumina, Inc.,,,,,,,,,,,,,,,,,
206
+ [Heading],,,,,,,,,,,,,,,,,,
207
+ Descriptor File Name,BovineSNP50_v3_A1.bpm,,,,,,,,,,,,,,,,,
208
+ Assay Format,Infinium HTS,,,,,,,,,,,,,,,,,
209
+ Date Manufactured,1/14/2016,,,,,,,,,,,,,,,,,
210
+ Loci Count ,53218,,,,,,,,,,,,,,,,,
211
+ [Assay],,,,,,,,,,,,,,,,,,
212
+
213
+ and
214
+
215
+ [Controls],,,,,,,,,,,,,,,,,,
216
+ 0027630314,Staining,Red,DNP (High),,,,,,,,,,,,,,,
217
+ 0029619375,Staining,Purple,DNP (Bgnd),,,,,,,,,,,,,,,
218
+ 0041666334,Staining,Green,Biotin (High),,,,,,,,,,,,,,,
219
+ 0034648333,Staining,Blue,Biotin (Bgnd),,,,,,,,,,,,,,,
220
+ 0017616306,Extension,Red,Extension (A),,,,,,,,,,,,,,,
221
+ 0014607337,Extension,Purple,Extension (T),,,,,,,,,,,,,,,
222
+
223
+ The file should end up looking like this::
224
+
225
+ IlmnID,Name,IlmnStrand,SNP,AddressA_ID,AlleleA_ProbeSeq,AddressB_ID,AlleleB_ProbeSeq,GenomeBuild,Chr,MapInfo,Ploidy,Species,Source,SourceVersion,SourceStrand,SourceSeq,TopGenomicSeq,BeadSetID
226
+ ABCA12_r2-1_T_F_2277749139,ABCA12,TOP,[A/G],0059616496,CTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC,,,0,2,103548215,diploid,Bos taurus,UMD3.1,1,TOP,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,1241
227
+ APAF1_dup-1_B_F_2327661418,APAF1,BOT,[T/C],0041654401,ATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG,,,0,5,63150400,diploid,Bos taurus,UMD3.1,1,BOT,CCATTTCCTAATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG[T/C]AAGCTAAGCTGCAGGCCAAGCAGGAGGTCGATAACGGAATGCTTTACCTGGAGTGGGTGT,ACACCCACTCCAGGTAAAGCATTCCGTTATCGACCTCCTGCTTGGCCTGCAGCTTAGCTT[A/G]CCGATAAACCTCTGAAGTTTCCAGTTCACAGAGGCCCAGTTGCACAATATTAGGAAATGG,1241
228
+ ARS-BFGL-BAC-10172_dup-0_T_F_2328966397,ARS-BFGL-BAC-10172,TOP,[A/G],0072620471,GGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT,,,3,14,6371334,diploid,Bos taurus,UM3,0,TOP,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,1241
229
+ .
230
+ .
231
+ .
232
+ UA-IFASA-9812_dup-0_B_F_2329051536,UA-IFASA-9812,BOT,[T/C],0031677304,ACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA,,,3,29,48012818,diploid,Bos taurus,UM3,0,BOT,GTAAAAACAAACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA[T/C]GATGATCTGAAGTTTCTCAAGCACGCAGAGAAACGTAAGAGAAACGTTCCAGCAAAGGGA,TCCCTTTGCTGGAACGTTTCTCTTACGTTTCTCTGCGTGCTTGAGAAACTTCAGATCATC[A/G]TTAGTATAATGGGGCTGCAAGTTGAGACCATTCCTATCAGCTATGGAGGTTTGTTTTTAC,1241
233
+ UA-IFASA-9813_dup-0_B_F_2329051538,UA-IFASA-9813,BOT,[T/C],0011661313,ACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT,,,3,19,32508700,diploid,Bos taurus,UM3,0,BOT,AATAAAACCAACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT[T/C]AGGGGTCAATTCCCCCATGTCTAAAATTGAACCTCAACGTCCTTTCTGTTTTCAAAACTC,GAGTTTTGAAAACAGAAAGGACGTTGAGGTTCAATTTTAGACATGGGGGAATTGACCCCT[A/G]AATTCCTGAGGAAGTCTGATTAATGCTGAACCGTTAGCGAGTGCAAAGGTTGGTTTTATT,1241
234
+ UMPS_dup-1_T_R_2327737250,UMPS,TOP,[A/G],0073777348,TAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC,,,0,1,69756880,diploid,Bos taurus,UMD3.1,1,BOT,TCATCTGTTGATTACATTCCATTCAGGTGCAAATGGCTGAAGAACATTCTGAATTTGTGATTGGTTTTATTTCTGGCTCC[T/C]GAGTAAGCATGAAACCAGAATTTCTTCACTTGACTCCAGGAGTTCAGTTAGAAGCAGGAGGTAAGCCTATTGATTGGTAA,TTACCAATCAATAGGCTTACCTCCTGCTTCTAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC[A/G]GGAGCCAGAAATAAAACCAATCACAAATTCAGAATGTTCTTCAGCCATTTGCACCTGAATGGAATGTAATCAACAGATGA,1241
235
+
236
+
137
237
  .. code-block:: python
138
238
 
139
239
  import pandas as pd
140
240
  from snplib.format import make_map
141
241
 
142
- input_data = pd.read_csv(DIR_FILES / "./file_bovinesnp50.csv")
242
+ input_data = pd.read_csv("./file_bovinesnp50.csv")
143
243
  data_map = make_map(input_data)
144
244
 
145
245
  Output data view::
@@ -150,12 +250,6 @@ Output data view::
150
250
  0 BovineHD0100037703 0 0
151
251
  0 BovineHD0100037704 0 0
152
252
 
153
- .. note::
154
- file_bovinesnp50.csv - The file that is taken on the Illumina website with full
155
- information about the chip
156
- https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
157
-
158
-
159
253
  **ped** - https://www.cog-genomics.org/plink/1.9/formats#ped
160
254
 
161
255
  .. code-block:: python
@@ -0,0 +1,2 @@
1
+ sphinx==8.1.3
2
+ sphinx_rtd_theme==3.0.2
@@ -17,7 +17,7 @@ snplib = ["*.pl"]
17
17
 
18
18
  [project]
19
19
  name = "snplib"
20
- version = "1.0.10"
20
+ version = "1.1.10"
21
21
  description = "Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing"
22
22
  authors = [
23
23
  {name = "Igor", email = "igor.loschinin@gmail.com"}
@@ -30,14 +30,14 @@ classifiers = [
30
30
  "Operating System :: OS Independent",
31
31
  ]
32
32
  dependencies = [
33
- "numpy>=1.26.1",
34
- "pandas>=2.1.1",
35
- "six>=1.16.0",
33
+ "numpy>=2.2.3",
34
+ "pandas>=2.2.3",
35
+ "six>=1.17.0",
36
36
  "swifter>=1.4.0",
37
37
  "xlrd>=2.0.1",
38
- "XlsxWriter>=3.1.9",
39
- "openpyxl>=3.1.2",
40
- "pydantic>=2.4.2",
38
+ "XlsxWriter>=3.2.2",
39
+ "openpyxl>=3.1.5",
40
+ "pydantic>=2.10.6",
41
41
  ]
42
42
 
43
43
  [project.urls]
@@ -0,0 +1,10 @@
1
+ numpy==2.2.3
2
+ pandas==2.2.3
3
+ six==1.17.0
4
+ swifter==1.4.0
5
+ xlrd==2.0.1
6
+ XlsxWriter==3.2.2
7
+ openpyxl==3.1.5
8
+ pydantic==2.10.6
9
+ pytest==8.3.4
10
+ setuptools-scm==8.1.0
@@ -7,11 +7,15 @@ from pathlib import Path
7
7
  from functools import reduce
8
8
 
9
9
  import re
10
+
11
+ from numpy import nan
10
12
  import pandas as pd
11
13
 
12
14
 
13
15
  class FinalReport(object):
14
- """ File that contains SNP information.
16
+ """ File that contains SNP information. File processing is triggered by the
17
+ handle method. If values in 'SID' or 'UNIQ_KEY' were missing in the xlsx
18
+ conversion file, the processed data will contain NAN values.
15
19
 
16
20
  :argument allele: A variant form of a single nucleotide polymorphism
17
21
  (SNP), a specific polymorphic site or a whole gene detectable at
@@ -235,17 +239,15 @@ class FinalReport(object):
235
239
  if self._check_on_ru_symbols(self._map_rn.UNIQ_KEY):
236
240
  raise Exception("Error. Unique keys contain Cyrillic alphabet.")
237
241
 
238
- if self._map_rn.UNIQ_KEY.isna().any():
239
- self._map_rn.fillna('unknown', inplace=True)
240
-
241
242
  @staticmethod
242
243
  def _check_on_ru_symbols(seq: pd.Series) -> bool | None:
243
- """
244
+ """ Checial verification of the Cyrillic
244
245
 
245
- :param seq:
246
- :return:
246
+ :param seq: Squeezed for verification.
247
+ :return: Truth if there are no symbols of Cyril and there is a lie if
248
+ there is.
247
249
  """
248
250
 
249
- return any(seq.apply(lambda x: bool(re.search('[а-яА-Я]', x))))
250
-
251
-
251
+ return seq.apply(
252
+ lambda x: bool(re.search('[а-яА-Я]', x)) if x is not nan else x
253
+ ).any()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.0.10
3
+ Version: 1.1.10
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -10,14 +10,14 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: numpy>=1.26.1
14
- Requires-Dist: pandas>=2.1.1
15
- Requires-Dist: six>=1.16.0
13
+ Requires-Dist: numpy>=2.2.3
14
+ Requires-Dist: pandas>=2.2.3
15
+ Requires-Dist: six>=1.17.0
16
16
  Requires-Dist: swifter>=1.4.0
17
17
  Requires-Dist: xlrd>=2.0.1
18
- Requires-Dist: XlsxWriter>=3.1.9
19
- Requires-Dist: openpyxl>=3.1.2
20
- Requires-Dist: pydantic>=2.4.2
18
+ Requires-Dist: XlsxWriter>=3.2.2
19
+ Requires-Dist: openpyxl>=3.1.5
20
+ Requires-Dist: pydantic>=2.10.6
21
21
 
22
22
  # snptools
23
23
  <p align="center">
@@ -63,6 +63,10 @@ tests/finalreport/files/fr/file6.txt
63
63
  tests/finalreport/files/fr/file6.xlsx
64
64
  tests/finalreport/files/fr/file7.txt
65
65
  tests/finalreport/files/fr/file7.xlsx
66
+ tests/finalreport/files/fr/file8.txt
67
+ tests/finalreport/files/fr/file8.xlsx
68
+ tests/finalreport/files/fr/file9.txt
69
+ tests/finalreport/files/fr/file9.xlsx
66
70
  tests/format/__init__.py
67
71
  tests/format/test_plink_fam.py
68
72
  tests/format/test_plink_lgen.py
@@ -0,0 +1,8 @@
1
+ numpy>=2.2.3
2
+ pandas>=2.2.3
3
+ six>=1.17.0
4
+ swifter>=1.4.0
5
+ xlrd>=2.0.1
6
+ XlsxWriter>=3.2.2
7
+ openpyxl>=3.1.5
8
+ pydantic>=2.10.6
@@ -0,0 +1,28 @@
1
+ [Header]
2
+ GSGT Version 2.0.4
3
+ Processing Date 10/14/2021 4:02 PM
4
+ Content BovineSNP50_v3_A1.bpm
5
+ Num SNPs 53218
6
+ Total SNPs 53218
7
+ Num Samples 3
8
+ Total Samples 3
9
+ [Data]
10
+ SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
11
+ ABCA12 1 A A 0.4048 0.8164
12
+ APAF1 1 B B 0.9067 0.9155
13
+ ARS-BFGL-BAC-10172 1 B B 0.9140 0.8767
14
+ ARS-BFGL-BAC-1020 1 B B 0.9288 0.8919
15
+ ARS-BFGL-BAC-10245 1 B B 0.7227 0.7447
16
+ ARS-BFGL-BAC-10345 1 A B 0.9468 0.9127
17
+ ABCA12 2 A A 0.4048 0.8164
18
+ APAF1 2 B B 0.9067 0.9155
19
+ ARS-BFGL-BAC-10172 2 A B 0.9140 0.8767
20
+ ARS-BFGL-BAC-1020 2 A B 0.9288 0.8919
21
+ ARS-BFGL-BAC-10245 2 A A 0.7227 0.7447
22
+ ARS-BFGL-BAC-10345 2 B B 0.9468 0.9127
23
+ ABCA12 3 A A 0.4048 0.8164
24
+ APAF1 3 B B 0.9067 0.9155
25
+ ARS-BFGL-BAC-10172 3 A B 0.9140 0.8767
26
+ ARS-BFGL-BAC-1020 3 A B 0.9288 0.8919
27
+ ARS-BFGL-BAC-10245 3 A A 0.7227 0.7447
28
+ ARS-BFGL-BAC-10345 3 A B 0.9468 0.9127
@@ -0,0 +1,28 @@
1
+ [Header]
2
+ GSGT Version 2.0.4
3
+ Processing Date 10/14/2021 4:02 PM
4
+ Content BovineSNP50_v3_A1.bpm
5
+ Num SNPs 53218
6
+ Total SNPs 53218
7
+ Num Samples 3
8
+ Total Samples 3
9
+ [Data]
10
+ SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
11
+ ABCA12 1 A A 0.4048 0.8164
12
+ APAF1 1 B B 0.9067 0.9155
13
+ ARS-BFGL-BAC-10172 1 B B 0.9140 0.8767
14
+ ARS-BFGL-BAC-1020 1 B B 0.9288 0.8919
15
+ ARS-BFGL-BAC-10245 1 B B 0.7227 0.7447
16
+ ARS-BFGL-BAC-10345 1 A B 0.9468 0.9127
17
+ ABCA12 2 A A 0.4048 0.8164
18
+ APAF1 2 B B 0.9067 0.9155
19
+ ARS-BFGL-BAC-10172 2 A B 0.9140 0.8767
20
+ ARS-BFGL-BAC-1020 2 A B 0.9288 0.8919
21
+ ARS-BFGL-BAC-10245 2 A A 0.7227 0.7447
22
+ ARS-BFGL-BAC-10345 2 B B 0.9468 0.9127
23
+ ABCA12 3 A A 0.4048 0.8164
24
+ APAF1 3 B B 0.9067 0.9155
25
+ ARS-BFGL-BAC-10172 3 A B 0.9140 0.8767
26
+ ARS-BFGL-BAC-1020 3 A B 0.9288 0.8919
27
+ ARS-BFGL-BAC-10245 3 A A 0.7227 0.7447
28
+ ARS-BFGL-BAC-10345 3 A B 0.9468 0.9127
@@ -192,6 +192,7 @@ class TestFinalReport(object):
192
192
 
193
193
  @pytest.mark.parametrize("report", ["AB"], indirect=True)
194
194
  def test_7(self, report: FinalReport) -> None:
195
+ """ An error is checked if the name of the number is Kirilitsa """
195
196
 
196
197
  with pytest.raises(
197
198
  Exception, match="Error. Unique keys contain Cyrillic alphabet."
@@ -200,16 +201,29 @@ class TestFinalReport(object):
200
201
  DIR_FILES / "fr/file7.txt", DIR_FILES / "fr/file7.xlsx"
201
202
  )
202
203
 
203
- # assert not report.snp_data.empty
204
- #
205
- # @pytest.mark.parametrize("report", ["AB"], indirect=True)
206
- # def test_8(self, report: FinalReport) -> None:
207
- # ...
208
- #
209
- # @pytest.mark.parametrize("report", ["AB"], indirect=True)
210
- # def test_9(self, report: FinalReport) -> None:
211
- # ...
212
- #
213
- # @pytest.mark.parametrize("report", ["AB"], indirect=True)
214
- # def test_10(self, report: FinalReport) -> None:
215
- # ...
204
+ assert report.snp_data is None
205
+
206
+ @pytest.mark.parametrize("report", ["AB"], indirect=True)
207
+ def test_8(self, report: FinalReport) -> None:
208
+ """ Checking for processing empty values in SID """
209
+
210
+ report.handle(
211
+ DIR_FILES / "fr/file8.txt",
212
+ DIR_FILES / "fr/file8.xlsx"
213
+ )
214
+
215
+ assert report.snp_data is not None
216
+ assert not report.snp_data.empty
217
+ assert report.snp_data['Sample ID'].isna().any()
218
+
219
+ @pytest.mark.parametrize("report", ["AB"], indirect=True)
220
+ def test_9(self, report: FinalReport) -> None:
221
+ """ Checking for missing values in SID """
222
+ report.handle(
223
+ DIR_FILES / "fr/file9.txt",
224
+ DIR_FILES / "fr/file9.xlsx"
225
+ )
226
+
227
+ assert report.snp_data is not None
228
+ assert not report.snp_data.empty
229
+ assert report.snp_data['Sample ID'].isna().any()
@@ -1,2 +0,0 @@
1
- sphinx==7.2.6
2
- sphinx_rtd_theme==1.3.0
@@ -1,12 +0,0 @@
1
- numpy==1.26.1
2
- pandas==2.1.1
3
- six==1.16.0
4
- swifter==1.4.0
5
- xlrd==2.0.1
6
- XlsxWriter==3.1.9
7
- openpyxl==3.1.2
8
- pydantic==2.4.2
9
- pytest==7.4.2
10
- twine==6.0.1
11
- build==1.2.2.post1
12
- setuptools-scm==8.1.0
@@ -1,8 +0,0 @@
1
- numpy>=1.26.1
2
- pandas>=2.1.1
3
- six>=1.16.0
4
- swifter>=1.4.0
5
- xlrd>=2.0.1
6
- XlsxWriter>=3.1.9
7
- openpyxl>=3.1.2
8
- pydantic>=2.4.2
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes