PyPI - snplib - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend

snplib 1.0.7py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

snplib/__init__.py +8 -8
snplib/finalreport/__init__.py +7 -7
snplib/finalreport/_finalreport.py +251 -251
snplib/format/__init__.py +19 -19
snplib/format/__settings.py +7 -7
snplib/format/_plink.py +305 -305
snplib/format/_snp.py +113 -113
snplib/parentage/__init__.py +15 -15
snplib/parentage/_discov.py +102 -102
snplib/parentage/_isagmark.py +15 -15
snplib/parentage/_verif.py +91 -91
snplib/parentage/isag_disc.pl +0 -0
snplib/parentage/isag_verif.pl +0 -0
snplib/statistics/__init__.py +16 -16
snplib/statistics/_callrate.py +59 -59
snplib/statistics/_freq.py +67 -67
snplib/statistics/_snphwe.py +132 -132
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/LICENSE +674 -674
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/METADATA +80 -97
snplib-1.0.8.dist-info/RECORD +22 -0
snplib/finalreport/tests/__init__.py +0 -7
snplib/finalreport/tests/test_finalreport.py +0 -215
snplib/format/tests/__init__.py +0 -7
snplib/format/tests/test_plink_fam.py +0 -121
snplib/format/tests/test_plink_lgen.py +0 -106
snplib/format/tests/test_plink_map.py +0 -42
snplib/format/tests/test_plink_ped.py +0 -136
snplib/format/tests/test_snp.py +0 -128
snplib/parentage/tests/__init__.py +0 -7
snplib/parentage/tests/test_discov.py +0 -164
snplib/parentage/tests/test_verif.py +0 -160
snplib/statistics/tests/__init__.py +0 -7
snplib/statistics/tests/test_callrate.py +0 -171
snplib/statistics/tests/test_freq_allele.py +0 -87
snplib/statistics/tests/test_freq_maf.py +0 -17
snplib/statistics/tests/test_hwe_t.py +0 -41
snplib/statistics/tests/test_snphwe.py +0 -41
snplib-1.0.7.dist-info/RECORD +0 -37
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/WHEEL +0 -0
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/top_level.txt +0 -0

snplib/parentage/_verif.py CHANGED Viewed

@@ -1,91 +1,91 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-import numpy as np
-import pandas as pd
-"""
-https://www.icar.org/Documents/GenoEx/ICAR%20Guidelines%20for%20Parentage%20Verification%20and%20Parentage%20Discovery%20based%20on%20SNP.pdf
-"""
-class Verification(object):
-    """
-    Verification of paternity according to ICAR recommendations.
-    :argument isag_marks: Fixed sample of markers to confirm paternity.
-    """
-    def __init__(
-            self, isag_marks: pd.Series | list | set | None = None
-    ) -> None:
-        self.__isag_marks = isag_marks
-        # The minimum number of SNP available in the profile
-        # of each animal and potential parent must be scaled (i.e.: 95%
-        # truncated down)
-        self.__min_num_snp = 0.95
-        self.__num_conflicts = None  # Number of conflicts
-    @property
-    def status(self) -> None | str:
-        if self.__num_conflicts is not None:
-            if self.__num_conflicts <= 2:
-                return 'Accept'
-            elif 3 <= self.__num_conflicts <= 5:
-                return 'Doubtful'
-            elif self.__num_conflicts > 5:
-                return 'Excluded'
-            else:
-                return None
-    @property
-    def num_conflicts(self) -> None | int:
-        return self.__num_conflicts
-    def check_on(
-            self,
-            data: pd.DataFrame,
-            descendant: str,
-            parent: str,
-            snp_name_col: str
-    ) -> None:
-        """ Verification of paternity according to ICAR recommendations.
-        :param data: SNP data for descendant and parent.
-        :param descendant: Columns name of the descendant in the data.
-        :param parent: Columns name of the parent in the data.
-        :param snp_name_col: SNP column name in data.
-        """
-        if self.__isag_marks is None:
-            raise ValueError('Error. No array of snp names to verify')
-        num_isag_mark = len(self.__isag_marks)
-        min_num_comm_snp = int(num_isag_mark - (2 * (num_isag_mark * 0.05)))
-        sample_mark = data.loc[
-            data[snp_name_col].isin(self.__isag_marks), [descendant, parent]
-        ]
-        # The number of markers is not 5ok
-        desc_n_markers = (sample_mark[descendant] < 5).sum()
-        parent_n_markers = (sample_mark[parent] < 5).sum()
-        # According to ICAR, the number of markers not 5ok should be more
-        # than 95%
-        if (desc_n_markers < num_isag_mark * self.__min_num_snp) and \
-                (parent_n_markers < num_isag_mark * self.__min_num_snp):
-            raise Exception('Calf and parent have low call rate')
-        comm_snp_no_missing = sample_mark.replace(5, np.nan).dropna()
-        num_comm_markers = len(comm_snp_no_missing)
-        if num_comm_markers < min_num_comm_snp:
-            raise Exception('Pair call rate is low')
-        self.__num_conflicts = (abs(
-            comm_snp_no_missing[descendant] - comm_snp_no_missing[parent]
-        ) == 2).sum()
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+import numpy as np
+import pandas as pd
+"""
+https://www.icar.org/Documents/GenoEx/ICAR%20Guidelines%20for%20Parentage%20Verification%20and%20Parentage%20Discovery%20based%20on%20SNP.pdf
+"""
+class Verification(object):
+    """
+    Verification of paternity according to ICAR recommendations.
+    :argument isag_marks: Fixed sample of markers to confirm paternity.
+    """
+    def __init__(
+            self, isag_marks: pd.Series | list | set | None = None
+    ) -> None:
+        self.__isag_marks = isag_marks
+        # The minimum number of SNP available in the profile
+        # of each animal and potential parent must be scaled (i.e.: 95%
+        # truncated down)
+        self.__min_num_snp = 0.95
+        self.__num_conflicts = None  # Number of conflicts
+    @property
+    def status(self) -> None | str:
+        if self.__num_conflicts is not None:
+            if self.__num_conflicts <= 2:
+                return 'Accept'
+            elif 3 <= self.__num_conflicts <= 5:
+                return 'Doubtful'
+            elif self.__num_conflicts > 5:
+                return 'Excluded'
+            else:
+                return None
+    @property
+    def num_conflicts(self) -> None | int:
+        return self.__num_conflicts
+    def check_on(
+            self,
+            data: pd.DataFrame,
+            descendant: str,
+            parent: str,
+            snp_name_col: str
+    ) -> None:
+        """ Verification of paternity according to ICAR recommendations.
+        :param data: SNP data for descendant and parent.
+        :param descendant: Columns name of the descendant in the data.
+        :param parent: Columns name of the parent in the data.
+        :param snp_name_col: SNP column name in data.
+        """
+        if self.__isag_marks is None:
+            raise ValueError('Error. No array of snp names to verify')
+        num_isag_mark = len(self.__isag_marks)
+        min_num_comm_snp = int(num_isag_mark - (2 * (num_isag_mark * 0.05)))
+        sample_mark = data.loc[
+            data[snp_name_col].isin(self.__isag_marks), [descendant, parent]
+        ]
+        # The number of markers is not 5ok
+        desc_n_markers = (sample_mark[descendant] < 5).sum()
+        parent_n_markers = (sample_mark[parent] < 5).sum()
+        # According to ICAR, the number of markers not 5ok should be more
+        # than 95%
+        if (desc_n_markers < num_isag_mark * self.__min_num_snp) and \
+                (parent_n_markers < num_isag_mark * self.__min_num_snp):
+            raise Exception('Calf and parent have low call rate')
+        comm_snp_no_missing = sample_mark.replace(5, np.nan).dropna()
+        num_comm_markers = len(comm_snp_no_missing)
+        if num_comm_markers < min_num_comm_snp:
+            raise Exception('Pair call rate is low')
+        self.__num_conflicts = (abs(
+            comm_snp_no_missing[descendant] - comm_snp_no_missing[parent]
+        ) == 2).sum()

snplib/parentage/isag_disc.pl ADDED Viewed

Binary file

snplib/parentage/isag_verif.pl ADDED Viewed

Binary file

snplib/statistics/__init__.py CHANGED Viewed

@@ -1,16 +1,16 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-from ._snphwe import hwe, hwe_test
-from ._callrate import call_rate
-from ._freq import allele_freq, minor_allele_freq
-__all__ = [
-	"call_rate",
-	"allele_freq",
-	"minor_allele_freq",
-	"hwe",
-	"hwe_test"
-]
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+from ._snphwe import hwe, hwe_test
+from ._callrate import call_rate
+from ._freq import allele_freq, minor_allele_freq
+__all__ = [
+	"call_rate",
+	"allele_freq",
+	"minor_allele_freq",
+	"hwe",
+	"hwe_test"
+]

snplib/statistics/_callrate.py CHANGED Viewed

@@ -1,59 +1,59 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-import pandas as pd
-def call_rate(
-		data: pd.DataFrame | str,
-		id_col: str = None,
-		snp_col: str = None
-) -> pd.DataFrame | float | None:
-	""" The call rate for a given SNP is defined as the proportion of
-	individuals in the study for which the corresponding SNP information is
-	not missing. In the following example, we filter using a call rate of 95%,
-	meaning we retain SNPs for which there is less than 5% missing data.
-		Of the say, 54K markers in the chip, 50K have been genotyped for a
-	particular animal, the “call rate animal” is 50K/54K=93%
-		Of the say, 900 animals genotyped for marker CL635944_160.1, how many
-	have actually been successfully read? Assume that 600 have been read, then
-	the “call rate marker” is 600/900 = 67%
-	:param data: Pre-processed data on which the call rate is calculated.
-	:param id_col: The name of the column with the id of the animals or
-		markers.
-	:param snp_col: The name of the column with the snp sequence.
-	:return: Return dataframe with call rates for each animal if a dataframe
-		is transmitted. The number if the snp sequence is passed as a string.
-		None if there were errors.
-	"""
-	if isinstance(data, pd.DataFrame):
-		try:
-			if data[snp_col].dtype.hasobject:
-				if not data[snp_col].str.isdigit().all():
-					return None
-				return data[[id_col, snp_col]].\
-					groupby(by=id_col)[snp_col].\
-					apply(lambda x: 1 - ((x == "5").sum() / len(x))).\
-					reset_index()
-			return data[[id_col, snp_col]]. \
-				groupby(by=id_col)[snp_col]. \
-				apply(lambda x: 1 - ((x == 5).sum() / len(x))). \
-				reset_index()
-		except Exception as e:
-			raise e
-	elif isinstance(data, str):
-		if not data.isdigit():
-			return None
-		return round(1 - (data.count('5') / len(data)), 6)
-	else:
-		return None
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+import pandas as pd
+def call_rate(
+		data: pd.DataFrame | str,
+		id_col: str = None,
+		snp_col: str = None
+) -> pd.DataFrame | float | None:
+	""" The call rate for a given SNP is defined as the proportion of
+	individuals in the study for which the corresponding SNP information is
+	not missing. In the following example, we filter using a call rate of 95%,
+	meaning we retain SNPs for which there is less than 5% missing data.
+		Of the say, 54K markers in the chip, 50K have been genotyped for a
+	particular animal, the “call rate animal” is 50K/54K=93%
+		Of the say, 900 animals genotyped for marker CL635944_160.1, how many
+	have actually been successfully read? Assume that 600 have been read, then
+	the “call rate marker” is 600/900 = 67%
+	:param data: Pre-processed data on which the call rate is calculated.
+	:param id_col: The name of the column with the id of the animals or
+		markers.
+	:param snp_col: The name of the column with the snp sequence.
+	:return: Return dataframe with call rates for each animal if a dataframe
+		is transmitted. The number if the snp sequence is passed as a string.
+		None if there were errors.
+	"""
+	if isinstance(data, pd.DataFrame):
+		try:
+			if data[snp_col].dtype.hasobject:
+				if not data[snp_col].str.isdigit().all():
+					return None
+				return data[[id_col, snp_col]].\
+					groupby(by=id_col)[snp_col].\
+					apply(lambda x: 1 - ((x == "5").sum() / len(x))).\
+					reset_index()
+			return data[[id_col, snp_col]]. \
+				groupby(by=id_col)[snp_col]. \
+				apply(lambda x: 1 - ((x == 5).sum() / len(x))). \
+				reset_index()
+		except Exception as e:
+			raise e
+	elif isinstance(data, str):
+		if not data.isdigit():
+			return None
+		return round(1 - (data.count('5') / len(data)), 6)
+	else:
+		return None

snplib/statistics/_freq.py CHANGED Viewed

@@ -1,67 +1,67 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-import pandas as pd
-def allele_freq(
-		data: pd.DataFrame | str, id_col: str = None, seq_col: str = None
-) -> pd.DataFrame | float | None:
-	""" The allele frequency represents the incidence of a gene variant in a
-	population.
-	:param data: Data array.
-	:param id_col: Columns with snp names.
-	:param seq_col: Columns with value snp in format ucg - 0, 1, 2, 5.
-	:return: Return the alleles frequency.
-	"""
-	if isinstance(data, pd.DataFrame):
-		try:
-			if data[seq_col].dtype.hasobject:
-				if not data[seq_col].str.isdigit().all():
-					return None
-				return data.\
-					loc[data[seq_col] != "5", [id_col, seq_col]]. \
-					groupby(by=id_col)[seq_col]. \
-					apply(lambda x: x.astype("int8").sum() / (2 * x.count())).\
-					reset_index().\
-					round(3)
-			return data.\
-				loc[data[seq_col] != 5, [id_col, seq_col]].\
-				groupby(by=id_col)[seq_col].\
-				apply(lambda x: x.sum() / (2 * x.count())).\
-				reset_index().\
-				round(3)
-		except Exception as e:
-			raise e
-	elif isinstance(data, str):
-		if not data.isdigit():
-			return None
-		sam_seq = tuple(
-			map(int, filter(lambda x: x if x != "5" else None, data))
-		)
-		return round(sum(sam_seq) / (2 * len(sam_seq)), 3)
-	else:
-		return None
-def minor_allele_freq(value: float) -> float:
-	""" The minor allele frequency is therefore the frequency at which the
-	minor allele occurs within a population.
-	:param value: Allele frequency
-	:return: Return the minor alleles frequency
-	"""
-	if value > 0.5:
-		return round(1 - value, 3)
-	return round(value, 3)
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+import pandas as pd
+def allele_freq(
+		data: pd.DataFrame | str, id_col: str = None, seq_col: str = None
+) -> pd.DataFrame | float | None:
+	""" The allele frequency represents the incidence of a gene variant in a
+	population.
+	:param data: Data array.
+	:param id_col: Columns with snp names.
+	:param seq_col: Columns with value snp in format ucg - 0, 1, 2, 5.
+	:return: Return the alleles frequency.
+	"""
+	if isinstance(data, pd.DataFrame):
+		try:
+			if data[seq_col].dtype.hasobject:
+				if not data[seq_col].str.isdigit().all():
+					return None
+				return data.\
+					loc[data[seq_col] != "5", [id_col, seq_col]]. \
+					groupby(by=id_col)[seq_col]. \
+					apply(lambda x: x.astype("int8").sum() / (2 * x.count())).\
+					reset_index().\
+					round(3)
+			return data.\
+				loc[data[seq_col] != 5, [id_col, seq_col]].\
+				groupby(by=id_col)[seq_col].\
+				apply(lambda x: x.sum() / (2 * x.count())).\
+				reset_index().\
+				round(3)
+		except Exception as e:
+			raise e
+	elif isinstance(data, str):
+		if not data.isdigit():
+			return None
+		sam_seq = tuple(
+			map(int, filter(lambda x: x if x != "5" else None, data))
+		)
+		return round(sum(sam_seq) / (2 * len(sam_seq)), 3)
+	else:
+		return None
+def minor_allele_freq(value: float) -> float:
+	""" The minor allele frequency is therefore the frequency at which the
+	minor allele occurs within a population.
+	:param value: Allele frequency
+	:return: Return the minor alleles frequency
+	"""
+	if value > 0.5:
+		return round(1 - value, 3)
+	return round(value, 3)

snplib 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl

snplib 1.0.7py3-none-any.whl → 1.0.8py3-none-any.whl