snplib 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
snplib/format/_plink.py CHANGED
@@ -14,23 +14,20 @@ def make_map(manifest: pd.DataFrame) -> pd.DataFrame:
14
14
  """ PLINK text fileset variant information file
15
15
  https://www.cog-genomics.org/plink/1.9/formats#map
16
16
 
17
- A text file with no header line, and one line per variant with the following 3-4 fields:
17
+ A text file with no header line, and one line per variant with the
18
+ following 3-4 fields:
18
19
 
19
- 1. Chromosome code. PLINK 1.9 also permits contig names here, but most
20
- older programs do not.
21
- 2. Variant identifier
22
- 3. Position in morgans or centimorgans (optional; also safe to use
23
- dummy value of '0')
24
- 4. Base-pair coordinate
20
+ 1. Chromosome code. PLINK 1.9 also permits contig names here, but most older programs do not.
21
+ 2. Variant identifier.
22
+ 3. Position in morgans or centimorgans (optional; also safe to use dummy value of '0').
23
+ 4. Base-pair coordinate.
25
24
 
26
- All lines must have the same number of columns (so either no lines
27
- contain the morgans/centimorgans column, or all of them do).
25
+ All lines must have the same number of columns (so either no lines contain
26
+ the morgans/centimorgans column, or all of them do).
28
27
 
29
- :param manifest: The file that is taken on the Illumina website with full
30
- information about the chip
31
- https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html
28
+ :param manifest: The file that is taken on the Illumina website with full information about the chip https://support.illumina.com/downloads/bovinesnp50-v3-0-product-files.html.
32
29
 
33
- :return: Return data in formate .map
30
+ :return: Return data in formate .map.
34
31
  """
35
32
 
36
33
  fields = ['Chr', 'Name', 'MapInfo']
@@ -69,12 +66,11 @@ def make_ped(
69
66
  genotype calls. Normally must be accompanied by a .map file.
70
67
  https://www.cog-genomics.org/plink/1.9/formats#ped
71
68
 
72
- The PED file has 6 fixed columns at the beginning followed by the SNP
69
+ The PED file has 6 fixed columns at the beginning followed by the SNP
73
70
  information. The columns should be separated by a whitespace or a tab. The
74
71
  first six columns hold the following information:
75
72
 
76
- 1. Family ID (if unknown use the same id as for the sample id in
77
- column two)
73
+ 1. Family ID (if unknown use the same id as for the sample id in column two)
78
74
  2. Sample ID
79
75
  3. Paternal ID (if unknown use 0)
80
76
  4. Maternal ID (if unknown use 0)
@@ -82,25 +78,23 @@ def make_ped(
82
78
  6. Affection (0=unknown; 1=unaffected; 2=affected)
83
79
  7. Genotypes (space or tab separated, 2 for each marker. 0/-9=missing)
84
80
 
85
- Here is a brief example of a genotype PED file containing 5 samples
81
+ Here is a brief example of a genotype PED file containing 5 samples
86
82
  with 10 homozygous SNPs:
87
- 4304 4304 0 0 0 0 C C C C G G G G G G C C G G C C T T T T
88
- 6925 6925 0 0 0 0 C C C C T T G G A A C C G G C C T T T T
89
- 7319 7319 0 0 0 0 C C C C G G G G G G C C G G C C T T T T
90
- 6963 6963 0 0 0 0 A A C C T T G G A A C C G G C C T T T T
91
- 6968 6968 0 0 0 0 C C C C G G G G G G G G G G C C T T T T
92
-
93
- :param data: Snp data that contain full or partial information on the
94
- animal
95
- :param sid_col: Sample ID. Column name in data
96
- :param snp_col: Snp column name in data
97
- :param fid_col: Family ID column name in data (if unknown use the same
98
- id as for the sample id in column two)
99
- :param father_col: Paternal ID column name in data (if unknown use 0)
100
- :param mother_col: Maternal ID column name in data (if unknown use 0)
101
- :param sex_col: Sex column name in data (if unknown use 0)
102
- :return: Returns an array of data in ped format to work with the plink
103
- program
83
+
84
+ 4304 4304 0 0 0 0 C C C C G G G G G G C C G G C C T T T T
85
+ 6925 6925 0 0 0 0 C C C C T T G G A A C C G G C C T T T T
86
+ 7319 7319 0 0 0 0 C C C C G G G G G G C C G G C C T T T T
87
+ 6963 6963 0 0 0 0 A A C C T T G G A A C C G G C C T T T T
88
+ 6968 6968 0 0 0 0 C C C C G G G G G G G G G G C C T T T T
89
+
90
+ :param data: Snp data that contain full or partial information on the animal.
91
+ :param sid_col: Sample ID. Column name in data.
92
+ :param snp_col: Snp column name in data.
93
+ :param fid_col: Family ID column name in data (if unknown use the same id as for the sample id in column two).
94
+ :param father_col: Paternal ID column name in data (if unknown use 0).
95
+ :param mother_col: Maternal ID column name in data (if unknown use 0).
96
+ :param sex_col: Sex column name in data (if unknown use 0).
97
+ :return: Returns an array of data in ped format to work with the plink program.
104
98
  """
105
99
 
106
100
  _fields = ["fid", "sid", "father", "mother", "sex", "not_used", "snp"]
@@ -153,37 +147,29 @@ def make_fam(
153
147
  sex_val: int = 0,
154
148
  pheno_col: str = None,
155
149
  pheno_val: int = -9
156
-
157
150
  ) -> pd.DataFrame | None:
158
151
  """ PLINK sample information file
159
152
  https://www.cog-genomics.org/plink/1.9/formats#fam
160
153
 
161
- A text file with no header line, and one line per sample with the
162
- following six fields:
154
+ A text file with no header line, and one line per sample with the following
155
+ six fields:
163
156
 
164
157
  1. Family ID ('FID')
165
158
  2. Within-family ID ('IID'; cannot be '0')
166
159
  3. Within-family ID of father ('0' if father isn't in dataset)
167
160
  4. Within-family ID of mother ('0' if mother isn't in dataset)
168
161
  5. Sex code ('1' = male, '2' = female, '0' = unknown)
169
- 6. Phenotype value ('1' = control, '2' = case, '-9'/'0'/non-numeric =
170
- missing data if case/control)
171
-
172
- :param data: Snp data that contain full or partial information on the
173
- animal
174
- :param fid_col: Family ID, default value "1". Must not contain
175
- underline - "_"
176
- :param sid_col: Within-family ID ('IID'; cannot be '0'). Must not contain
177
- underline - "_"
178
- :param father_col: Within-family ID of father ('0' if father isn't in
179
- dataset)
180
- :param mother_col: Within-family ID of mother ('0' if mother isn't in
181
- dataset)
162
+ 6. Phenotype value ('1' = control, '2' = case, '-9'/'0'/non-numeric = missing data if case/control)
163
+
164
+ :param data: Snp data that contain full or partial information on the animal
165
+ :param fid_col: Family ID, default value "1". Must not contain underline - "_"
166
+ :param sid_col: Within-family ID ('IID'; cannot be '0'). Must not contain underline - "_"
167
+ :param father_col: Within-family ID of father ('0' if father isn't in dataset)
168
+ :param mother_col: Within-family ID of mother ('0' if mother isn't in dataset)
182
169
  :param sex_col: Sex column name in data
183
170
  :param sex_val: Sex code ('1' = male, '2' = female, '0' = unknown)
184
171
  :param pheno_col: Pheno column name in data
185
- :param pheno_val: Phenotype value ('1' = control, '2' = case,
186
- '-9'/'0'/non-numeric = missing data if case/control)
172
+ :param pheno_val: Phenotype value ('1' = control, '2' = case,'-9'/'0'/non-numeric = missing data if case/control)
187
173
  :return: Return data in formate .fam
188
174
  """
189
175
 
@@ -236,7 +222,7 @@ def make_lgen(
236
222
  """ PLINK long-format genotype file
237
223
  https://www.cog-genomics.org/plink/1.9/formats#lgen
238
224
 
239
- A text file with no header line, and one line per genotype call (or
225
+ A text file with no header line, and one line per genotype call (or
240
226
  just not-homozygous-major calls if 'lgen-ref' was invoked) usually with
241
227
  the following five fields:
242
228
 
@@ -246,7 +232,7 @@ def make_lgen(
246
232
  4. Allele call 1 ('0' for missing)
247
233
  5. Allele call 2
248
234
 
249
- There are several variations which are also handled by PLINK; see the
235
+ There are several variations which are also handled by PLINK; see the
250
236
  original discussion for details.
251
237
 
252
238
  :param data: Data the after parsing FinalReport.txt
@@ -254,7 +240,7 @@ def make_lgen(
254
240
  :param snp_name:
255
241
  :param fid_col: Family ID, default value "1"
256
242
  :param alleles:
257
- :return: - Return data in formate .lgen
243
+ :return: Return data in formate .lgen
258
244
  """
259
245
  _fields = ['fid', 'sid', 'snp_name', 'allele1', 'allele2']
260
246
  _f_dtype = dict(zip(_fields, (str for _ in range(len(_fields)))))
@@ -295,7 +281,7 @@ def _check_underscore(value: str) -> bool:
295
281
  """ Checking for underscore in a string
296
282
 
297
283
  :param value: String for checked
298
- :return: Return True if there is an underline in the string, False if not
284
+ :return: Return True if there is an underline in the string, False if not.
299
285
  """
300
286
  _under_l = re.compile(r"_")
301
287
 
@@ -15,11 +15,12 @@ def call_rate(
15
15
  not missing. In the following example, we filter using a call rate of 95%,
16
16
  meaning we retain SNPs for which there is less than 5% missing data.
17
17
 
18
- Of the say, 54K markers in the chip, 50K have been genotyped for a
19
- particular animal, the “call rate animal” is 50K/54K=93%
20
- Of the say, 900 animals genotyped for marker CL635944_160.1, how many
18
+ Of the say, 54K markers in the chip, 50K have been genotyped for a
19
+ particular animal, the “call rate animal” is 50K/54K=93%.
20
+
21
+ Of the say, 900 animals genotyped for marker CL635944_160.1, how many
21
22
  have actually been successfully read? Assume that 600 have been read, then
22
- the “call rate marker” is 600/900 = 67%
23
+ the “call rate marker” is 600/900 = 67%.
23
24
 
24
25
  :param data: Pre-processed data on which the call rate is calculated.
25
26
  :param id_col: The name of the column with the id of the animals or
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: snplib
3
- Version: 1.0.8
3
+ Version: 1.0.9
4
4
  Summary: Snptools is a tool for Single Nucleotide Polymorphism (SNP) data processing
5
5
  Author-email: Igor <igor.loschinin@gmail.com>
6
6
  License: GNU
@@ -73,7 +73,7 @@ from snplib import (
73
73
  ```
74
74
 
75
75
  ## Documentation
76
- Detailed documentation on how to use SNPTools is available see the [docs](./docs/_build/index.html).
76
+ Detailed documentation on how to use SNPTools is available see the [docs](docs/_build/index.html).
77
77
 
78
78
  ## License
79
79
  This project is licensed under the GNU General Public License - see the
@@ -3,7 +3,7 @@ snplib/finalreport/__init__.py,sha256=Yk49x8t-STIfsdP6QLMtaGm1gTj_n-XS8kchPguvW1
3
3
  snplib/finalreport/_finalreport.py,sha256=el_d8MVmpic3wKCRJ-J52VZYSmMuNSf4p_tmPkgh0Z0,5876
4
4
  snplib/format/__init__.py,sha256=3W_l_sP1u9HV3HWwnsJxPGw9anrVknstqLaJmWQaG0k,261
5
5
  snplib/format/__settings.py,sha256=kyAVZ4tiU61sNr3jQhjXbLXRyBA3pjFfCw3fOfSkY14,289
6
- snplib/format/_plink.py,sha256=Z09IOPACOt3n8CKEVRkE4tLT16I8e_6ZoMaWRxSImrA,10529
6
+ snplib/format/_plink.py,sha256=cjT6PkvDJr8KwvQo76i7_Hm1Og4bASYCDN9G7CHsQ00,10372
7
7
  snplib/format/_snp.py,sha256=oI-V4-_w28aX-VxoimywLDDnX6owhdjLbqt9a54_ouU,3172
8
8
  snplib/parentage/__init__.py,sha256=bN3mWTxmaFQ1qzRtyMLaAoxfomz6jnoWa-kmnJ9q_fE,280
9
9
  snplib/parentage/_discov.py,sha256=qGlNzpl4xKOWRr6-fi1osylzizgiPO8vCus8VE56nec,3180
@@ -12,11 +12,11 @@ snplib/parentage/_verif.py,sha256=VbX46dC4tl4Qeuw65aRg1s6hSn0FI25Hy9-3U_jxmrg,30
12
12
  snplib/parentage/isag_disc.pl,sha256=XzjcsnO_kwPg4WaE2YMuZXBNHQ9ixi6pg5n2mfGOuJU,14219
13
13
  snplib/parentage/isag_verif.pl,sha256=e_c4YGd5_JXGWqFQwmcxjp6hEkdcqpK_5y5MqJ8J9YY,8254
14
14
  snplib/statistics/__init__.py,sha256=XJFU7mEwAJJ2M187jEkO8rFNYKoxF-g9KF_stS7eFFw,302
15
- snplib/statistics/_callrate.py,sha256=ghB1EXT5JLQeIEIzh8LjWpqAnhCtCOk6l5ecNMLtQa0,1865
15
+ snplib/statistics/_callrate.py,sha256=yfHxnNVpcDfV3qxZVwrk2RWPgy9dTf7NHWczDUORwtY,1866
16
16
  snplib/statistics/_freq.py,sha256=ZPZBZM3xq9EseOxuMzRVvzkjjFfaaA4ZvF7XI8ctON0,1623
17
17
  snplib/statistics/_snphwe.py,sha256=KcoRGwovMCc53-GJ8VfYs_3ZEHObgt8B0EvrW5nFnmM,3353
18
- snplib-1.0.8.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
19
- snplib-1.0.8.dist-info/METADATA,sha256=Imm2fupAtiH61erPCanyjqNv50ssw0nLTEDwHJ92KE0,2165
20
- snplib-1.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
21
- snplib-1.0.8.dist-info/top_level.txt,sha256=CGCrLXuCSyXPCTwMFQjPxQR7b93FFFft56sAPPun_2g,7
22
- snplib-1.0.8.dist-info/RECORD,,
18
+ snplib-1.0.9.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
19
+ snplib-1.0.9.dist-info/METADATA,sha256=tgDJRi81nc66_LSZtTTkvPENK6r9OZ6WKf_MGPk7hcg,2163
20
+ snplib-1.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
21
+ snplib-1.0.9.dist-info/top_level.txt,sha256=CGCrLXuCSyXPCTwMFQjPxQR7b93FFFft56sAPPun_2g,7
22
+ snplib-1.0.9.dist-info/RECORD,,
File without changes