snplib 1.0.10__tar.gz → 1.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {snplib-1.0.10 → snplib-1.2.10}/.gitignore +2 -0
  2. {snplib-1.0.10/src/snplib.egg-info → snplib-1.2.10}/PKG-INFO +7 -7
  3. {snplib-1.0.10 → snplib-1.2.10}/docs/conf.py +1 -1
  4. {snplib-1.0.10 → snplib-1.2.10}/docs/examples.rst +130 -11
  5. snplib-1.2.10/docs/requirements.txt +2 -0
  6. {snplib-1.0.10 → snplib-1.2.10}/pyproject.toml +7 -7
  7. snplib-1.2.10/requirements.txt +10 -0
  8. snplib-1.2.10/src/snplib/finalreport/_finalreport.py +304 -0
  9. {snplib-1.0.10 → snplib-1.2.10/src/snplib.egg-info}/PKG-INFO +7 -7
  10. {snplib-1.0.10 → snplib-1.2.10}/src/snplib.egg-info/SOURCES.txt +4 -0
  11. snplib-1.2.10/src/snplib.egg-info/requires.txt +8 -0
  12. snplib-1.2.10/tests/finalreport/files/fr/file8.txt +28 -0
  13. snplib-1.2.10/tests/finalreport/files/fr/file8.xlsx +0 -0
  14. snplib-1.2.10/tests/finalreport/files/fr/file9.txt +28 -0
  15. snplib-1.2.10/tests/finalreport/files/fr/file9.xlsx +0 -0
  16. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/test_finalreport.py +138 -47
  17. snplib-1.0.10/docs/requirements.txt +0 -2
  18. snplib-1.0.10/requirements.txt +0 -12
  19. snplib-1.0.10/src/snplib/finalreport/_finalreport.py +0 -251
  20. snplib-1.0.10/src/snplib.egg-info/requires.txt +0 -8
  21. {snplib-1.0.10 → snplib-1.2.10}/.github/workflows/linux.yml +0 -0
  22. {snplib-1.0.10 → snplib-1.2.10}/.github/workflows/macos.yml +0 -0
  23. {snplib-1.0.10 → snplib-1.2.10}/.github/workflows/windows.yml +0 -0
  24. {snplib-1.0.10 → snplib-1.2.10}/.readthedocs.yaml +0 -0
  25. {snplib-1.0.10 → snplib-1.2.10}/LICENSE +0 -0
  26. {snplib-1.0.10 → snplib-1.2.10}/README.md +0 -0
  27. {snplib-1.0.10 → snplib-1.2.10}/__init__.py +0 -0
  28. {snplib-1.0.10 → snplib-1.2.10}/docs/Makefile +0 -0
  29. {snplib-1.0.10 → snplib-1.2.10}/docs/index.rst +0 -0
  30. {snplib-1.0.10 → snplib-1.2.10}/docs/install.rst +0 -0
  31. {snplib-1.0.10 → snplib-1.2.10}/docs/intro.rst +0 -0
  32. {snplib-1.0.10 → snplib-1.2.10}/docs/logo.png +0 -0
  33. {snplib-1.0.10 → snplib-1.2.10}/docs/make.bat +0 -0
  34. {snplib-1.0.10 → snplib-1.2.10}/docs/modules.rst +0 -0
  35. {snplib-1.0.10 → snplib-1.2.10}/docs/snplib.finalreport.rst +0 -0
  36. {snplib-1.0.10 → snplib-1.2.10}/docs/snplib.format.rst +0 -0
  37. {snplib-1.0.10 → snplib-1.2.10}/docs/snplib.parentage.rst +0 -0
  38. {snplib-1.0.10 → snplib-1.2.10}/docs/snplib.rst +0 -0
  39. {snplib-1.0.10 → snplib-1.2.10}/docs/snplib.statistics.rst +0 -0
  40. {snplib-1.0.10 → snplib-1.2.10}/docs/usage.rst +0 -0
  41. {snplib-1.0.10 → snplib-1.2.10}/iconlib.png +0 -0
  42. {snplib-1.0.10 → snplib-1.2.10}/setup.cfg +0 -0
  43. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/__init__.py +0 -0
  44. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/finalreport/__init__.py +0 -0
  45. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/format/__init__.py +0 -0
  46. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/format/__settings.py +0 -0
  47. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/format/_plink.py +0 -0
  48. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/format/_snp.py +0 -0
  49. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/parentage/__init__.py +0 -0
  50. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/parentage/_discov.py +0 -0
  51. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/parentage/_isagmark.py +0 -0
  52. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/parentage/_verif.py +0 -0
  53. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/parentage/isag_disc.pl +0 -0
  54. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/parentage/isag_verif.pl +0 -0
  55. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/statistics/__init__.py +0 -0
  56. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/statistics/_callrate.py +0 -0
  57. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/statistics/_freq.py +0 -0
  58. {snplib-1.0.10 → snplib-1.2.10}/src/snplib/statistics/_snphwe.py +0 -0
  59. {snplib-1.0.10 → snplib-1.2.10}/src/snplib.egg-info/dependency_links.txt +0 -0
  60. {snplib-1.0.10 → snplib-1.2.10}/src/snplib.egg-info/top_level.txt +0 -0
  61. {snplib-1.0.10 → snplib-1.2.10}/tests/__init__.py +0 -0
  62. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/__init__.py +0 -0
  63. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file1.txt +0 -0
  64. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file1.xlsx +0 -0
  65. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file2.txt +0 -0
  66. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file2.xlsx +0 -0
  67. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file3.txt +0 -0
  68. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file3.xlsx +0 -0
  69. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file4.txt +0 -0
  70. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file5.txt +0 -0
  71. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file5.xlsx +0 -0
  72. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file6.txt +0 -0
  73. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file6.xlsx +0 -0
  74. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file7.txt +0 -0
  75. {snplib-1.0.10 → snplib-1.2.10}/tests/finalreport/files/fr/file7.xlsx +0 -0
  76. {snplib-1.0.10 → snplib-1.2.10}/tests/format/__init__.py +0 -0
  77. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file.pl +0 -0
  78. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file2.pl +0 -0
  79. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file3.pl +0 -0
  80. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/fam/file4.pl +0 -0
  81. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/lgen/file.pl +0 -0
  82. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/map/file_bovinesnp50.csv +0 -0
  83. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file.pl +0 -0
  84. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file2.pl +0 -0
  85. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file3.pl +0 -0
  86. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fplink/ped/file4.pl +0 -0
  87. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fsnp/file1.txt +0 -0
  88. {snplib-1.0.10 → snplib-1.2.10}/tests/format/files/fsnp/file2.txt +0 -0
  89. {snplib-1.0.10 → snplib-1.2.10}/tests/format/test_plink_fam.py +0 -0
  90. {snplib-1.0.10 → snplib-1.2.10}/tests/format/test_plink_lgen.py +0 -0
  91. {snplib-1.0.10 → snplib-1.2.10}/tests/format/test_plink_map.py +0 -0
  92. {snplib-1.0.10 → snplib-1.2.10}/tests/format/test_plink_ped.py +0 -0
  93. {snplib-1.0.10 → snplib-1.2.10}/tests/format/test_snp.py +0 -0
  94. {snplib-1.0.10 → snplib-1.2.10}/tests/parentage/__init__.py +0 -0
  95. {snplib-1.0.10 → snplib-1.2.10}/tests/parentage/data/parentage_test_disc.csv +0 -0
  96. {snplib-1.0.10 → snplib-1.2.10}/tests/parentage/data/parentage_test_verf.csv +0 -0
  97. {snplib-1.0.10 → snplib-1.2.10}/tests/parentage/test_discov.py +0 -0
  98. {snplib-1.0.10 → snplib-1.2.10}/tests/parentage/test_verif.py +0 -0
  99. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/__init__.py +0 -0
  100. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/data/cr/file_cra.pl +0 -0
  101. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/data/cr/file_crm.pl +0 -0
  102. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/data/freq/etalon.txt +0 -0
  103. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/data/freq/file.pl +0 -0
  104. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/test_callrate.py +0 -0
  105. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/test_freq_allele.py +0 -0
  106. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/test_freq_maf.py +0 -0
  107. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/test_hwe_t.py +0 -0
  108. {snplib-1.0.10 → snplib-1.2.10}/tests/statistics/test_snphwe.py +0 -0
@@ -17,6 +17,8 @@
17
17
  !requirements.txt
18
18
  !/tests/*/**
19
19
  !/docs/*/**
20
+ /docs/_build
21
+
20
22
 
21
23
  *.idea*
22
24
  *__pycache__*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.0.10
3
+ Version: 1.2.10
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -10,14 +10,14 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: numpy>=1.26.1
14
- Requires-Dist: pandas>=2.1.1
15
- Requires-Dist: six>=1.16.0
13
+ Requires-Dist: numpy>=2.2.3
14
+ Requires-Dist: pandas>=2.2.3
15
+ Requires-Dist: six>=1.17.0
16
16
  Requires-Dist: swifter>=1.4.0
17
17
  Requires-Dist: xlrd>=2.0.1
18
- Requires-Dist: XlsxWriter>=3.1.9
19
- Requires-Dist: openpyxl>=3.1.2
20
- Requires-Dist: pydantic>=2.4.2
18
+ Requires-Dist: XlsxWriter>=3.2.2
19
+ Requires-Dist: openpyxl>=3.1.5
20
+ Requires-Dist: pydantic>=2.10.6
21
21
 
22
22
  # snptools
23
23
  <p align="center">
@@ -8,7 +8,7 @@
8
8
 
9
9
  import os
10
10
  import sys
11
- sys.path.insert(0, os.path.abspath('../src/snplib'))
11
+ sys.path.insert(0, os.path.abspath('../src'))
12
12
 
13
13
  project = 'snptools'
14
14
  copyright = '2025, Igor Locshinin'
@@ -73,6 +73,31 @@ Output::
73
73
 
74
74
  ...
75
75
 
76
+ To handle large files, use `usecols` and `dtype`. This reduces memory
77
+ consumption and speeds up processing.
78
+
79
+ .. note::
80
+ `usecols` is used when `allele` is **None**.
81
+
82
+ .. code-block:: python
83
+
84
+ alleles_ab = FinalReport(
85
+ usecols=['SNP Name', 'Sample ID', 'Allele1 - AB', 'Allele2 - AB'],
86
+ dtype={'SNP Name': 'category'}
87
+ )
88
+ alleles_ab.handle("path/to/finalreport.txt")
89
+ data_ab = alleles_ab.snp_data
90
+
91
+ Output::
92
+
93
+ SNP Name Sample ID Allele1 - AB Allele2 - AB
94
+ ARS-BFGL-BAC-10172 HO840M003135245650 B B
95
+ ARS-BFGL-BAC-1020 HO840M003135245650 B B
96
+ ARS-BFGL-BAC-10245 HO840M003135245650 B B
97
+ ARS-BFGL-BAC-10345 HO840M003135245650 A B
98
+ ARS-BFGL-BAC-10375 HO840M003135245650 A B
99
+ ...
100
+
76
101
  Preparation SNP files
77
102
  ---------------------
78
103
 
@@ -90,7 +115,19 @@ plink - GBLUP, ssGBLUP, GWAS.
90
115
  blupf90 format
91
116
  ______________
92
117
  The input data for obtaining the ``snp.txt`` file used for the genomic
93
- blupf90 evaluation is the data file - processed file ``finalreport.txt``
118
+ blupf90 evaluation is the data file - processed file ``finalreport.txt``.
119
+ The processed file can be seen in the item above - Finalreport.txt processing:
120
+
121
+ Content input *file.txt*::
122
+
123
+ SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score X Y
124
+ ARS-BFGL-BAC-10172 HO840M003135245650 B B 0.9420 0.069 0.801
125
+ ARS-BFGL-BAC-1020 HO840M003135245650 B B 0.9489 0.033 0.700
126
+ ARS-BFGL-BAC-10245 HO840M003135245650 B B 0.7277 0.152 1.504
127
+ ARS-BFGL-BAC-10345 HO840M003135245650 A B 0.9411 0.598 0.572
128
+ ARS-BFGL-BAC-10375 HO840M003135245650 A B 0.9348 0.430 0.494f
129
+
130
+ ...
94
131
 
95
132
  **uga**
96
133
 
@@ -99,7 +136,7 @@ blupf90 evaluation is the data file - processed file ``finalreport.txt``
99
136
  import pandas as pd
100
137
  from snplib.format import Snp
101
138
 
102
- data_finalreport = pd.read_csv("file.txt", sep="\t")
139
+ data_finalreport = pd.read_csv("path_to_file/file.txt", sep="\t")
103
140
 
104
141
  obj = Snp(fmt="uga")
105
142
  obj_snp.process(data_finalreport)
@@ -111,7 +148,7 @@ Data after snp processing in ``uga`` (blupf90) format - obj_snp.data::
111
148
  0 14814 02011015010000500
112
149
  1 14815 01110152120222512
113
150
 
114
- Default result::
151
+ Default result - this is what the data looks like if ``fmt=None``::
115
152
 
116
153
  SNP_NAME SAMPLE_ID SNP
117
154
  0 ABCA12 14814 0
@@ -130,16 +167,104 @@ ____________
130
167
 
131
168
  This page describes specialized PLINK input and output file formats which are
132
169
  identifiable by file extension. https://www.cog-genomics.org/plink/1.9/formats
133
- Распространненные фомраты для проведения GWAS анализа - ``ped``, ``map``, ``fam``, ``lgen``...
170
+ Common fomrats for performing GWAS analysis - ``ped``, ``map``, ``fam``, ``lgen``....
134
171
 
135
172
  **map** - https://www.cog-genomics.org/plink/1.9/formats#map
136
173
 
174
+ To get the ``.map`` file, first you need to download the *manifest file* for the chip
175
+ you are using chip.
176
+
177
+ .. note::
178
+ *file_bovinesnp50.csv* - The file that is taken on the Illumina website with full
179
+ information about the chip https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
180
+
181
+ Since the make_map function accepts **pd.DataFrame**, the *manifest file* processing is performed
182
+ independently.
183
+
184
+ Input data for make_map::
185
+
186
+ IlmnID ... BeadSetID
187
+ 0 BovineHD0100037694-128_T_F_2278925834 ... 1241
188
+ 1 BovineHD0100037699_dup-128_T_F_2327674593 ... 1241
189
+ 2 BovineHD0100037703_dup-128_B_R_2327674602 ... 1241
190
+ 3 BovineHD0100037704_dup-128_T_F_2327674603 ... 1241
191
+ 4 BovineHD0100037710_dup-128_T_F_2327674613 ... 1241
192
+ 5 BovineHD0100037712_dup-128_B_R_2327674618 ... 1241
193
+ 6 BovineHD0100037716-128_T_F_2255347065 ... 1241
194
+ 7 BovineHD0100037719-128_T_F_2278926219 ... 1241
195
+ 8 BovineHD0100037720-128_B_R_2255342455 ... 1241
196
+ 9 BovineHD0100037722_dup-128_B_R_2327674634 ... 1241
197
+
198
+
199
+ .. note::
200
+ The original file, for example, **BovineSNP50_v3_A1.csv** looks like this::
201
+
202
+ Illumina, Inc.,,,,,,,,,,,,,,,,,
203
+ [Heading],,,,,,,,,,,,,,,,,,
204
+ Descriptor File Name,BovineSNP50_v3_A1.bpm,,,,,,,,,,,,,,,,,
205
+ Assay Format,Infinium HTS,,,,,,,,,,,,,,,,,
206
+ Date Manufactured,1/14/2016,,,,,,,,,,,,,,,,,
207
+ Loci Count ,53218,,,,,,,,,,,,,,,,,
208
+ [Assay],,,,,,,,,,,,,,,,,,
209
+ IlmnID,Name,IlmnStrand,SNP,AddressA_ID,AlleleA_ProbeSeq,AddressB_ID,AlleleB_ProbeSeq,GenomeBuild,Chr,MapInfo,Ploidy,Species,Source,SourceVersion,SourceStrand,SourceSeq,TopGenomicSeq,BeadSetID
210
+ ABCA12_r2-1_T_F_2277749139,ABCA12,TOP,[A/G],0059616496,CTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC,,,0,2,103548215,diploid,Bos taurus,UMD3.1,1,TOP,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,1241
211
+ APAF1_dup-1_B_F_2327661418,APAF1,BOT,[T/C],0041654401,ATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG,,,0,5,63150400,diploid,Bos taurus,UMD3.1,1,BOT,CCATTTCCTAATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG[T/C]AAGCTAAGCTGCAGGCCAAGCAGGAGGTCGATAACGGAATGCTTTACCTGGAGTGGGTGT,ACACCCACTCCAGGTAAAGCATTCCGTTATCGACCTCCTGCTTGGCCTGCAGCTTAGCTT[A/G]CCGATAAACCTCTGAAGTTTCCAGTTCACAGAGGCCCAGTTGCACAATATTAGGAAATGG,1241
212
+ ARS-BFGL-BAC-10172_dup-0_T_F_2328966397,ARS-BFGL-BAC-10172,TOP,[A/G],0072620471,GGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT,,,3,14,6371334,diploid,Bos taurus,UM3,0,TOP,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,1241
213
+ .
214
+ .
215
+ .
216
+ UA-IFASA-9812_dup-0_B_F_2329051536,UA-IFASA-9812,BOT,[T/C],0031677304,ACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA,,,3,29,48012818,diploid,Bos taurus,UM3,0,BOT,GTAAAAACAAACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA[T/C]GATGATCTGAAGTTTCTCAAGCACGCAGAGAAACGTAAGAGAAACGTTCCAGCAAAGGGA,TCCCTTTGCTGGAACGTTTCTCTTACGTTTCTCTGCGTGCTTGAGAAACTTCAGATCATC[A/G]TTAGTATAATGGGGCTGCAAGTTGAGACCATTCCTATCAGCTATGGAGGTTTGTTTTTAC,1241
217
+ UA-IFASA-9813_dup-0_B_F_2329051538,UA-IFASA-9813,BOT,[T/C],0011661313,ACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT,,,3,19,32508700,diploid,Bos taurus,UM3,0,BOT,AATAAAACCAACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT[T/C]AGGGGTCAATTCCCCCATGTCTAAAATTGAACCTCAACGTCCTTTCTGTTTTCAAAACTC,GAGTTTTGAAAACAGAAAGGACGTTGAGGTTCAATTTTAGACATGGGGGAATTGACCCCT[A/G]AATTCCTGAGGAAGTCTGATTAATGCTGAACCGTTAGCGAGTGCAAAGGTTGGTTTTATT,1241
218
+ UMPS_dup-1_T_R_2327737250,UMPS,TOP,[A/G],0073777348,TAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC,,,0,1,69756880,diploid,Bos taurus,UMD3.1,1,BOT,TCATCTGTTGATTACATTCCATTCAGGTGCAAATGGCTGAAGAACATTCTGAATTTGTGATTGGTTTTATTTCTGGCTCC[T/C]GAGTAAGCATGAAACCAGAATTTCTTCACTTGACTCCAGGAGTTCAGTTAGAAGCAGGAGGTAAGCCTATTGATTGGTAA,TTACCAATCAATAGGCTTACCTCCTGCTTCTAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC[A/G]GGAGCCAGAAATAAAACCAATCACAAATTCAGAATGTTCTTCAGCCATTTGCACCTGAATGGAATGTAATCAACAGATGA,1241
219
+ [Controls],,,,,,,,,,,,,,,,,,
220
+ 0027630314,Staining,Red,DNP (High),,,,,,,,,,,,,,,
221
+ 0029619375,Staining,Purple,DNP (Bgnd),,,,,,,,,,,,,,,
222
+ 0041666334,Staining,Green,Biotin (High),,,,,,,,,,,,,,,
223
+ 0034648333,Staining,Blue,Biotin (Bgnd),,,,,,,,,,,,,,,
224
+ 0017616306,Extension,Red,Extension (A),,,,,,,,,,,,,,,
225
+ 0014607337,Extension,Purple,Extension (T),,,,,,,,,,,,,,,
226
+
227
+ Therefore, for direct reading via **pd.read_csv()** it is necessary to
228
+ preprocess the file - delete extra lines::
229
+
230
+ Illumina, Inc.,,,,,,,,,,,,,,,,,
231
+ [Heading],,,,,,,,,,,,,,,,,,
232
+ Descriptor File Name,BovineSNP50_v3_A1.bpm,,,,,,,,,,,,,,,,,
233
+ Assay Format,Infinium HTS,,,,,,,,,,,,,,,,,
234
+ Date Manufactured,1/14/2016,,,,,,,,,,,,,,,,,
235
+ Loci Count ,53218,,,,,,,,,,,,,,,,,
236
+ [Assay],,,,,,,,,,,,,,,,,,
237
+
238
+ and
239
+
240
+ [Controls],,,,,,,,,,,,,,,,,,
241
+ 0027630314,Staining,Red,DNP (High),,,,,,,,,,,,,,,
242
+ 0029619375,Staining,Purple,DNP (Bgnd),,,,,,,,,,,,,,,
243
+ 0041666334,Staining,Green,Biotin (High),,,,,,,,,,,,,,,
244
+ 0034648333,Staining,Blue,Biotin (Bgnd),,,,,,,,,,,,,,,
245
+ 0017616306,Extension,Red,Extension (A),,,,,,,,,,,,,,,
246
+ 0014607337,Extension,Purple,Extension (T),,,,,,,,,,,,,,,
247
+
248
+ The file should end up looking like this::
249
+
250
+ IlmnID,Name,IlmnStrand,SNP,AddressA_ID,AlleleA_ProbeSeq,AddressB_ID,AlleleB_ProbeSeq,GenomeBuild,Chr,MapInfo,Ploidy,Species,Source,SourceVersion,SourceStrand,SourceSeq,TopGenomicSeq,BeadSetID
251
+ ABCA12_r2-1_T_F_2277749139,ABCA12,TOP,[A/G],0059616496,CTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC,,,0,2,103548215,diploid,Bos taurus,UMD3.1,1,TOP,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,ACTCTGGTGGATGGTTCATAATCTGCTAAGATGAATAAGTTACTGGGGAAACTGGTGCATTTATTTTAAATATAAATTATATAGTCTGTAAGATATAAAGACTGCCTAATTTATTTGAACACCATACTGATCTTGTCTTCTTTTGGAATGTTACAGGTATGGTATGATCCAGAAGGCTATC[A/G]CTCCCTTCCAGCTTACCTCAACAGCCTGAATAATTTCCTCCTGCGAGTTAACATGTCAAAATATGATGCTGCCCGACATGGTAAAGTTATTTACATAGGAGCTCCTTGTATTGAAACTCTTGCTACTCTCCATGTGAAAATATACATTAGACCCCATTTTCCTCCCTGTGGCAGCTAT,1241
252
+ APAF1_dup-1_B_F_2327661418,APAF1,BOT,[T/C],0041654401,ATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG,,,0,5,63150400,diploid,Bos taurus,UMD3.1,1,BOT,CCATTTCCTAATATTGTGCAACTGGGCCTCTGTGAACTGGAAACTTCAGAGGTTTATCGG[T/C]AAGCTAAGCTGCAGGCCAAGCAGGAGGTCGATAACGGAATGCTTTACCTGGAGTGGGTGT,ACACCCACTCCAGGTAAAGCATTCCGTTATCGACCTCCTGCTTGGCCTGCAGCTTAGCTT[A/G]CCGATAAACCTCTGAAGTTTCCAGTTCACAGAGGCCCAGTTGCACAATATTAGGAAATGG,1241
253
+ ARS-BFGL-BAC-10172_dup-0_T_F_2328966397,ARS-BFGL-BAC-10172,TOP,[A/G],0072620471,GGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT,,,3,14,6371334,diploid,Bos taurus,UM3,0,TOP,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,CTCAGAAGTTGGTCCCCAAAGTATGTGGTAGCACTTACTTATGTAAGTCATCACTCAAGT[A/G]ATCCAGAATATTCTTTTAGTAATATTTTTGTTAATATTGAAATTTTTAAAACAATTGAAA,1241
254
+ .
255
+ .
256
+ .
257
+ UA-IFASA-9812_dup-0_B_F_2329051536,UA-IFASA-9812,BOT,[T/C],0031677304,ACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA,,,3,29,48012818,diploid,Bos taurus,UM3,0,BOT,GTAAAAACAAACCTCCATAGCTGATAGGAATGGTCTCAACTTGCAGCCCCATTATACTAA[T/C]GATGATCTGAAGTTTCTCAAGCACGCAGAGAAACGTAAGAGAAACGTTCCAGCAAAGGGA,TCCCTTTGCTGGAACGTTTCTCTTACGTTTCTCTGCGTGCTTGAGAAACTTCAGATCATC[A/G]TTAGTATAATGGGGCTGCAAGTTGAGACCATTCCTATCAGCTATGGAGGTTTGTTTTTAC,1241
258
+ UA-IFASA-9813_dup-0_B_F_2329051538,UA-IFASA-9813,BOT,[T/C],0011661313,ACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT,,,3,19,32508700,diploid,Bos taurus,UM3,0,BOT,AATAAAACCAACCTTTGCACTCGCTAACGGTTCAGCATTAATCAGACTTCCTCAGGAATT[T/C]AGGGGTCAATTCCCCCATGTCTAAAATTGAACCTCAACGTCCTTTCTGTTTTCAAAACTC,GAGTTTTGAAAACAGAAAGGACGTTGAGGTTCAATTTTAGACATGGGGGAATTGACCCCT[A/G]AATTCCTGAGGAAGTCTGATTAATGCTGAACCGTTAGCGAGTGCAAAGGTTGGTTTTATT,1241
259
+ UMPS_dup-1_T_R_2327737250,UMPS,TOP,[A/G],0073777348,TAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC,,,0,1,69756880,diploid,Bos taurus,UMD3.1,1,BOT,TCATCTGTTGATTACATTCCATTCAGGTGCAAATGGCTGAAGAACATTCTGAATTTGTGATTGGTTTTATTTCTGGCTCC[T/C]GAGTAAGCATGAAACCAGAATTTCTTCACTTGACTCCAGGAGTTCAGTTAGAAGCAGGAGGTAAGCCTATTGATTGGTAA,TTACCAATCAATAGGCTTACCTCCTGCTTCTAACTGAACTCCTGGAGTCAAGTGAAGAAATTCTGGTTTCATGCTTACTC[A/G]GGAGCCAGAAATAAAACCAATCACAAATTCAGAATGTTCTTCAGCCATTTGCACCTGAATGGAATGTAATCAACAGATGA,1241
260
+
261
+
137
262
  .. code-block:: python
138
263
 
139
264
  import pandas as pd
140
265
  from snplib.format import make_map
141
266
 
142
- input_data = pd.read_csv(DIR_FILES / "./file_bovinesnp50.csv")
267
+ input_data = pd.read_csv("./file_bovinesnp50.csv")
143
268
  data_map = make_map(input_data)
144
269
 
145
270
  Output data view::
@@ -150,12 +275,6 @@ Output data view::
150
275
  0 BovineHD0100037703 0 0
151
276
  0 BovineHD0100037704 0 0
152
277
 
153
- .. note::
154
- file_bovinesnp50.csv - The file that is taken on the Illumina website with full
155
- information about the chip
156
- https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
157
-
158
-
159
278
  **ped** - https://www.cog-genomics.org/plink/1.9/formats#ped
160
279
 
161
280
  .. code-block:: python
@@ -0,0 +1,2 @@
1
+ sphinx==8.1.3
2
+ sphinx_rtd_theme==3.0.2
@@ -17,7 +17,7 @@ snplib = ["*.pl"]
17
17
 
18
18
  [project]
19
19
  name = "snplib"
20
- version = "1.0.10"
20
+ version = "1.2.10"
21
21
  description = "Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing"
22
22
  authors = [
23
23
  {name = "Igor", email = "igor.loschinin@gmail.com"}
@@ -30,14 +30,14 @@ classifiers = [
30
30
  "Operating System :: OS Independent",
31
31
  ]
32
32
  dependencies = [
33
- "numpy>=1.26.1",
34
- "pandas>=2.1.1",
35
- "six>=1.16.0",
33
+ "numpy>=2.2.3",
34
+ "pandas>=2.2.3",
35
+ "six>=1.17.0",
36
36
  "swifter>=1.4.0",
37
37
  "xlrd>=2.0.1",
38
- "XlsxWriter>=3.1.9",
39
- "openpyxl>=3.1.2",
40
- "pydantic>=2.4.2",
38
+ "XlsxWriter>=3.2.2",
39
+ "openpyxl>=3.1.5",
40
+ "pydantic>=2.10.6",
41
41
  ]
42
42
 
43
43
  [project.urls]
@@ -0,0 +1,10 @@
1
+ numpy==2.2.3
2
+ pandas==2.2.3
3
+ six==1.17.0
4
+ swifter==1.4.0
5
+ xlrd==2.0.1
6
+ XlsxWriter==3.2.2
7
+ openpyxl==3.1.5
8
+ pydantic==2.10.6
9
+ pytest==8.3.4
10
+ setuptools-scm==8.1.0
@@ -0,0 +1,304 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ __author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
4
+ __all__ = ("FinalReport",)
5
+
6
+ import re
7
+ from functools import reduce
8
+ from pathlib import Path
9
+
10
+ import pandas as pd
11
+ from numpy import nan
12
+
13
+
14
+ class FinalReport(object):
15
+ """ File that contains SNP information. File processing is triggered by the
16
+ handle method. If values in 'SID' or 'UNIQ_KEY' were missing in the xlsx
17
+ conversion file, the processed data will contain NAN values.
18
+
19
+ :param allele: A variant form of a single nucleotide polymorphism (SNP), a
20
+ specific polymorphic site or a whole gene detectable at a locus. Type:
21
+ 'AB', 'Forward', 'Top', 'Plus', 'Design'.
22
+ :param sep: Delimiter to use. Default value: "\\t".
23
+ :param usecols: Selection of fields for reading. Accelerates processing
24
+ and reduces memory.
25
+ :param dtype: Data type(s) to apply to either the whole dataset or
26
+ individual columns. E.g., {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
27
+
28
+ Example:
29
+ [Header]
30
+ GSGT Version 2.0.4
31
+ Processing Date 10/14/2021 4:02 PM
32
+ Content BovineSNP50_v3_A1.bpm
33
+ Num SNPs 53218
34
+ Total SNPs 53218
35
+ Num Samples 3
36
+ Total Samples 3
37
+ [Data]
38
+ SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
39
+ ABCA12 1 A A 0.4048 0.8164
40
+ APAF1 1 B B 0.9067 0.9155
41
+ ...
42
+ """
43
+
44
+ __PATTERN_HEADER = re.compile(r'(^\[Header])')
45
+ __PATTERN_DATA = re.compile(r'(^\[Data])')
46
+
47
+ __slots__ = (
48
+ "_delimiter",
49
+ "__allele",
50
+ "__usecols",
51
+ "__dtype",
52
+ "__snp_data",
53
+ "__header",
54
+ "_map_rn",
55
+ )
56
+
57
+ def __init__(
58
+ self,
59
+ allele: str | list | None = None,
60
+ usecols: list[str] | None = None,
61
+ dtype: dict | None = None,
62
+ sep: str = "\t"
63
+ ) -> None:
64
+ self._delimiter = sep
65
+ self.__allele = allele
66
+ self.__usecols = usecols
67
+ self.__dtype = dtype
68
+
69
+ # self._full_data = None
70
+ self.__snp_data: pd.DataFrame | None = None
71
+ self.__header = {}
72
+ self._map_rn = None
73
+
74
+ @property
75
+ def header(self) -> dict:
76
+ return self.__header
77
+
78
+ @property
79
+ def snp_data(self) -> pd.DataFrame | None:
80
+ return self.__snp_data
81
+
82
+ def handle(
83
+ self, file_rep: Path | str, conv_file: Path | str = None
84
+ ) -> bool:
85
+ """ Processes the FinalReport.txt file. Highlights meta information
86
+ and data.
87
+
88
+ :param file_rep: The file FinalReport.txt or another name.
89
+ :param conv_file: The file that contains IDs of registration numbers
90
+ of animals.
91
+ :return: Returns true if file processing was successful, false if
92
+ there were errors.
93
+ """
94
+
95
+ try:
96
+
97
+ if self.__allele is not None and self.__usecols is not None:
98
+ raise Exception("Error. Usecols is used for allele is none.")
99
+
100
+ if isinstance(file_rep, str):
101
+ file_rep = Path(file_rep)
102
+
103
+ if not file_rep.is_file() and not file_rep.exists():
104
+ return False
105
+
106
+ # Processing conversion file
107
+ if conv_file is not None:
108
+ if isinstance(conv_file, str):
109
+ conv_file = Path(conv_file)
110
+
111
+ if not conv_file.is_file() and not conv_file.exists():
112
+ return False
113
+
114
+ self.__convert_s_id(conv_file)
115
+
116
+ # # Processing report file
117
+ self.__handler_header(file_rep)
118
+ self.__handler_data(file_rep)
119
+
120
+ if not self.__snp_data.empty and self._map_rn is not None:
121
+ self.__snp_data['Sample ID'] = \
122
+ self.__snp_data['Sample ID'].map(
123
+ dict(zip(self._map_rn.SID, self._map_rn.UNIQ_KEY))
124
+ )
125
+
126
+ except Exception as e:
127
+ raise e
128
+
129
+ return True
130
+
131
+ def __handler_header(self, file_rep: Path) -> None:
132
+ """ Processes data from a file, selects meta-information.
133
+
134
+ :param file_rep: path, pointer to the file to be read.
135
+ """
136
+
137
+ with open(file_rep, 'r') as file:
138
+
139
+ for line in file:
140
+ if self.__class__.__PATTERN_DATA.findall(line.strip()):
141
+ return
142
+
143
+ if self.__class__.__PATTERN_HEADER.findall(line.strip()) or\
144
+ len(line.strip()) == 0:
145
+ continue
146
+
147
+ key = line.strip().split("\t")[0]
148
+ value = line.strip().split("\t")[1]
149
+
150
+ self.__header[key] = value
151
+
152
+ def __handler_data(self, file_rep: Path) -> None:
153
+ """ Processes data and forms an array for further processing.
154
+
155
+ :param file_rep: path, pointer to the file to be read.
156
+ """
157
+
158
+ with open(file_rep, 'r') as file:
159
+
160
+ # Search for the data start index and skip
161
+ for line in file:
162
+ if self.__class__.__PATTERN_DATA.findall(line.strip()):
163
+ break
164
+
165
+ # line column
166
+ orig_name_col = file.readline().strip().split(self._delimiter)
167
+
168
+ if self.__allele is None and self.__usecols is None:
169
+ self.__snp_data = pd.read_csv(
170
+ file,
171
+ sep=self._delimiter,
172
+ header=None,
173
+ names=orig_name_col,
174
+ dtype=self.__dtype,
175
+ low_memory=True,
176
+ na_filter=True
177
+ )
178
+
179
+ return
180
+
181
+ sub_n_col = self.__processing_columns(orig_name_col)
182
+ self.__snp_data = pd.read_csv(
183
+ file,
184
+ sep=self._delimiter,
185
+ header=None,
186
+ names=orig_name_col,
187
+ usecols=sub_n_col,
188
+ dtype=self.__dtype,
189
+ low_memory=True,
190
+ na_filter=True
191
+ )
192
+
193
+ return
194
+
195
+ def __processing_columns(self, lst_col: list[str]) -> list[str] | None:
196
+ """ Processing the line with all the names of the fields and the
197
+ sample of them.
198
+
199
+ :param lst_col: List of all fields.
200
+ :return: Returns a tuple with a list of names of selected fields.
201
+ """
202
+
203
+ if self.__usecols is not None:
204
+ check_n_col = [
205
+ item for item in self.__usecols if item in lst_col
206
+ ]
207
+
208
+ # Check on empty list
209
+ if check_n_col:
210
+ return self.__usecols
211
+
212
+ raise Exception(
213
+ f"Error. The USECOLS list contains not true fields."
214
+ )
215
+
216
+ # processing alleles
217
+ sample_n_col = self.__sample_by_allele(lst_col)
218
+ if sample_n_col is None:
219
+ raise Exception(
220
+ f"Error. Allele {self.__allele} not in data."
221
+ )
222
+
223
+ return sample_n_col
224
+
225
+ def __sample_by_allele(self, names: list[str]) -> list[str] | None:
226
+ """ Method that generates a list of field names choosing which alleles
227
+ to keep
228
+
229
+ :param names: List of field names in the report file.
230
+ :return: Returns a filtered list of fields by alleles.
231
+ """
232
+
233
+ allele_templ = r'(^Allele\d\s[:-]\s{}\b)'
234
+
235
+ match self.__allele:
236
+ case None:
237
+ return names
238
+
239
+ case str():
240
+ allele_pattern = re.compile(
241
+ allele_templ.format(self.__allele)
242
+ )
243
+
244
+ case list() | tuple() | set():
245
+ allele_pattern = re.compile(
246
+ allele_templ.format("|".join(self.__allele))
247
+ )
248
+ case _:
249
+ return None
250
+
251
+ lst_allele = reduce(
252
+ lambda i, j: i + j,
253
+ [allele_pattern.findall(item) for item in names]
254
+ )
255
+
256
+ if len(lst_allele) == 0:
257
+ return None
258
+
259
+ exclude_alleles = [
260
+ item for item in names
261
+ if item.startswith("Allele") and item not in lst_allele
262
+ ]
263
+
264
+ return list(filter(
265
+ lambda x: True if x not in exclude_alleles else False, names
266
+ ))
267
+
268
+ def __convert_s_id(self, path_file: Path) -> None:
269
+ """Converts sample id which is in FinalReport to animal registration
270
+ number.
271
+
272
+ :param path_file: xlsx file with animal numbers label
273
+ """
274
+
275
+ self._map_rn = pd.read_excel(
276
+ path_file,
277
+ header=None,
278
+ names=['SID', 'UNIQ_KEY', 'SEX'],
279
+ dtype={'SID': str},
280
+ index_col=False
281
+ )
282
+
283
+ if self._map_rn.empty:
284
+ self._map_rn = None
285
+ return
286
+
287
+ self._map_rn.SID = self._map_rn.SID.str.strip()
288
+ self._map_rn.UNIQ_KEY = self._map_rn.UNIQ_KEY.str.strip()
289
+
290
+ if self._check_on_ru_symbols(self._map_rn.UNIQ_KEY):
291
+ raise Exception("Error. Unique keys contain Cyrillic alphabet.")
292
+
293
+ @staticmethod
294
+ def _check_on_ru_symbols(seq: pd.Series) -> bool | None:
295
+ """ Checial verification of the Cyrillic
296
+
297
+ :param seq: Squeezed for verification.
298
+ :return: Truth if there are no symbols of Cyril and there is a lie if
299
+ there is.
300
+ """
301
+
302
+ return seq.apply(
303
+ lambda x: bool(re.search('[а-яА-Я]', x)) if x is not nan else x
304
+ ).any()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.0.10
3
+ Version: 1.2.10
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -10,14 +10,14 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: numpy>=1.26.1
14
- Requires-Dist: pandas>=2.1.1
15
- Requires-Dist: six>=1.16.0
13
+ Requires-Dist: numpy>=2.2.3
14
+ Requires-Dist: pandas>=2.2.3
15
+ Requires-Dist: six>=1.17.0
16
16
  Requires-Dist: swifter>=1.4.0
17
17
  Requires-Dist: xlrd>=2.0.1
18
- Requires-Dist: XlsxWriter>=3.1.9
19
- Requires-Dist: openpyxl>=3.1.2
20
- Requires-Dist: pydantic>=2.4.2
18
+ Requires-Dist: XlsxWriter>=3.2.2
19
+ Requires-Dist: openpyxl>=3.1.5
20
+ Requires-Dist: pydantic>=2.10.6
21
21
 
22
22
  # snptools
23
23
  <p align="center">
@@ -63,6 +63,10 @@ tests/finalreport/files/fr/file6.txt
63
63
  tests/finalreport/files/fr/file6.xlsx
64
64
  tests/finalreport/files/fr/file7.txt
65
65
  tests/finalreport/files/fr/file7.xlsx
66
+ tests/finalreport/files/fr/file8.txt
67
+ tests/finalreport/files/fr/file8.xlsx
68
+ tests/finalreport/files/fr/file9.txt
69
+ tests/finalreport/files/fr/file9.xlsx
66
70
  tests/format/__init__.py
67
71
  tests/format/test_plink_fam.py
68
72
  tests/format/test_plink_lgen.py
@@ -0,0 +1,8 @@
1
+ numpy>=2.2.3
2
+ pandas>=2.2.3
3
+ six>=1.17.0
4
+ swifter>=1.4.0
5
+ xlrd>=2.0.1
6
+ XlsxWriter>=3.2.2
7
+ openpyxl>=3.1.5
8
+ pydantic>=2.10.6
@@ -0,0 +1,28 @@
1
+ [Header]
2
+ GSGT Version 2.0.4
3
+ Processing Date 10/14/2021 4:02 PM
4
+ Content BovineSNP50_v3_A1.bpm
5
+ Num SNPs 53218
6
+ Total SNPs 53218
7
+ Num Samples 3
8
+ Total Samples 3
9
+ [Data]
10
+ SNP Name Sample ID Allele1 - AB Allele2 - AB GC Score GT Score
11
+ ABCA12 1 A A 0.4048 0.8164
12
+ APAF1 1 B B 0.9067 0.9155
13
+ ARS-BFGL-BAC-10172 1 B B 0.9140 0.8767
14
+ ARS-BFGL-BAC-1020 1 B B 0.9288 0.8919
15
+ ARS-BFGL-BAC-10245 1 B B 0.7227 0.7447
16
+ ARS-BFGL-BAC-10345 1 A B 0.9468 0.9127
17
+ ABCA12 2 A A 0.4048 0.8164
18
+ APAF1 2 B B 0.9067 0.9155
19
+ ARS-BFGL-BAC-10172 2 A B 0.9140 0.8767
20
+ ARS-BFGL-BAC-1020 2 A B 0.9288 0.8919
21
+ ARS-BFGL-BAC-10245 2 A A 0.7227 0.7447
22
+ ARS-BFGL-BAC-10345 2 B B 0.9468 0.9127
23
+ ABCA12 3 A A 0.4048 0.8164
24
+ APAF1 3 B B 0.9067 0.9155
25
+ ARS-BFGL-BAC-10172 3 A B 0.9140 0.8767
26
+ ARS-BFGL-BAC-1020 3 A B 0.9288 0.8919
27
+ ARS-BFGL-BAC-10245 3 A A 0.7227 0.7447
28
+ ARS-BFGL-BAC-10345 3 A B 0.9468 0.9127