PyPI - snplib - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend

snplib 1.0.7py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

snplib/__init__.py +8 -8
snplib/finalreport/__init__.py +7 -7
snplib/finalreport/_finalreport.py +251 -251
snplib/format/__init__.py +19 -19
snplib/format/__settings.py +7 -7
snplib/format/_plink.py +305 -305
snplib/format/_snp.py +113 -113
snplib/parentage/__init__.py +15 -15
snplib/parentage/_discov.py +102 -102
snplib/parentage/_isagmark.py +15 -15
snplib/parentage/_verif.py +91 -91
snplib/parentage/isag_disc.pl +0 -0
snplib/parentage/isag_verif.pl +0 -0
snplib/statistics/__init__.py +16 -16
snplib/statistics/_callrate.py +59 -59
snplib/statistics/_freq.py +67 -67
snplib/statistics/_snphwe.py +132 -132
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/LICENSE +674 -674
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/METADATA +80 -97
snplib-1.0.8.dist-info/RECORD +22 -0
snplib/finalreport/tests/__init__.py +0 -7
snplib/finalreport/tests/test_finalreport.py +0 -215
snplib/format/tests/__init__.py +0 -7
snplib/format/tests/test_plink_fam.py +0 -121
snplib/format/tests/test_plink_lgen.py +0 -106
snplib/format/tests/test_plink_map.py +0 -42
snplib/format/tests/test_plink_ped.py +0 -136
snplib/format/tests/test_snp.py +0 -128
snplib/parentage/tests/__init__.py +0 -7
snplib/parentage/tests/test_discov.py +0 -164
snplib/parentage/tests/test_verif.py +0 -160
snplib/statistics/tests/__init__.py +0 -7
snplib/statistics/tests/test_callrate.py +0 -171
snplib/statistics/tests/test_freq_allele.py +0 -87
snplib/statistics/tests/test_freq_maf.py +0 -17
snplib/statistics/tests/test_hwe_t.py +0 -41
snplib/statistics/tests/test_snphwe.py +0 -41
snplib-1.0.7.dist-info/RECORD +0 -37
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/WHEEL +0 -0
{snplib-1.0.7.dist-info → snplib-1.0.8.dist-info}/top_level.txt +0 -0

snplib/format/_snp.py CHANGED Viewed

@@ -1,113 +1,113 @@
-# !/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-from pathlib import Path
-from .__settings import FIELDS_ILLUMIN, MAP_FIELDS
-import pandas as pd
-class Snp(object):
-	""" The process of converting genomic map data - FinalReport.txt obtained
-	from Illumin. Recoding allele data into quantitative data, saving in the
-	format necessary for calculating gblup on blupf90.
-	:argument fmt: Data format to use snp in plink and blupf90. Default
-		value "uga". """
-	_ALLELE_CODE = {
-		'AA': 0, 'AB': 1, 'BA': 1, 'BB': 2, '--': 5
-	}
-	_FIELDS = ['SNP_NAME', 'SAMPLE_ID', 'SNP']
-	_F_DTYPE = dict(zip(_FIELDS, (str for _ in range(len(_FIELDS)))))
-	def __init__(self, fmt: str | None = "uga") -> None:
-		self._format_data = fmt
-		self.__data_snp = None
-	@property
-	def data(self) -> pd.DataFrame | None:
-		return self.__data_snp
-	def process(self, data: pd.DataFrame) -> None:
-		""" Data processing and formatting. Calculation of statistical
-		information
-		:param data: Data from FinalReport file. Example:
-			SNP Name  Sample ID  Allele1 - AB  Allele2 - AB  GC Score  GT Score
-			ABCA12	14814	A	A	0.4048	0.8164
-			ARS-BFGL-BAC-13031	14814	B	B	0.9083	0.8712
-			ARS-BFGL-BAC-13039	14814	A	A	0.9005	0.9096
-			ARS-BFGL-BAC-13049	14814	A	B	0.9295	0.8926
-		:return: Returns true if the data was formatted successfully and
-			statistical information was calculated, false if an error.
-		"""
-		if not all(list(map(lambda x: x in data.columns, FIELDS_ILLUMIN))):
-			raise KeyError(
-				'The name of the fields does not match the finalreport.txt '
-				'file from Illumina'
-			)
-		self.__data_snp = data.rename(columns=MAP_FIELDS)
-		self.__data_snp['SNP'] = \
-			self.__data_snp[['ALLELE1', 'ALLELE2']].\
-			sum(axis=1).\
-			map(Snp._ALLELE_CODE)
-		self.__data_snp = self.__data_snp[Snp._FIELDS].astype(Snp._F_DTYPE)
-		if self._format_data is not None and self._format_data == "uga":
-			self.__data_snp = self._format_uga(
-				self.__data_snp[['SAMPLE_ID', 'SNP']]
-			)
-	@staticmethod
-	def _format_uga(data: pd.DataFrame) -> pd.DataFrame:
-		""" Data format to use snp in plink and blupf90. """
-		return data.groupby(by='SAMPLE_ID').sum().reset_index()
-	def to_file(self, file_path: str | Path) -> None:
-		""" Saving data to a file.
-		:param file_path: Path to file
-		"""
-		if isinstance(file_path, str):
-			file_path = Path(file_path)
-		if self._format_data is not None and self._format_data == "uga":
-			max_len = self.__data_snp["SAMPLE_ID"].str.len().max()
-			self.__data_snp.\
-				apply(
-					lambda x: " ".join([
-						self._add_space(x.iloc[0], max_len), x.iloc[1]
-					]),
-					axis=1
-				).\
-				to_csv(file_path, index=False, header=False)
-			self.__data_snp["SAMPLE_ID"] = \
-				self.__data_snp["SAMPLE_ID"].str.strip()
-			return None
-		self.__data_snp.to_csv(file_path, sep=" ", index=False)
-	@staticmethod
-	def _add_space(value: str, max_len: int) -> str:
-		""" Adding spaces up to the maximum length of the value in the
-		sample_id data.
-		:param value: Sample_id value
-		:param max_len: Max len sample_id value
-		:return: Return replacing value
-		"""
-		return "".join([value, " " * (max_len - len(value))])
+# !/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+from pathlib import Path
+from .__settings import FIELDS_ILLUMIN, MAP_FIELDS
+import pandas as pd
+class Snp(object):
+	""" The process of converting genomic map data - FinalReport.txt obtained
+	from Illumin. Recoding allele data into quantitative data, saving in the
+	format necessary for calculating gblup on blupf90.
+	:argument fmt: Data format to use snp in plink and blupf90. Default
+		value "uga". """
+	_ALLELE_CODE = {
+		'AA': 0, 'AB': 1, 'BA': 1, 'BB': 2, '--': 5
+	}
+	_FIELDS = ['SNP_NAME', 'SAMPLE_ID', 'SNP']
+	_F_DTYPE = dict(zip(_FIELDS, (str for _ in range(len(_FIELDS)))))
+	def __init__(self, fmt: str | None = "uga") -> None:
+		self._format_data = fmt
+		self.__data_snp = None
+	@property
+	def data(self) -> pd.DataFrame | None:
+		return self.__data_snp
+	def process(self, data: pd.DataFrame) -> None:
+		""" Data processing and formatting. Calculation of statistical
+		information
+		:param data: Data from FinalReport file. Example:
+			SNP Name  Sample ID  Allele1 - AB  Allele2 - AB  GC Score  GT Score
+			ABCA12	14814	A	A	0.4048	0.8164
+			ARS-BFGL-BAC-13031	14814	B	B	0.9083	0.8712
+			ARS-BFGL-BAC-13039	14814	A	A	0.9005	0.9096
+			ARS-BFGL-BAC-13049	14814	A	B	0.9295	0.8926
+		:return: Returns true if the data was formatted successfully and
+			statistical information was calculated, false if an error.
+		"""
+		if not all(list(map(lambda x: x in data.columns, FIELDS_ILLUMIN))):
+			raise KeyError(
+				'The name of the fields does not match the finalreport.txt '
+				'file from Illumina'
+			)
+		self.__data_snp = data.rename(columns=MAP_FIELDS)
+		self.__data_snp['SNP'] = \
+			self.__data_snp[['ALLELE1', 'ALLELE2']].\
+			sum(axis=1).\
+			map(Snp._ALLELE_CODE)
+		self.__data_snp = self.__data_snp[Snp._FIELDS].astype(Snp._F_DTYPE)
+		if self._format_data is not None and self._format_data == "uga":
+			self.__data_snp = self._format_uga(
+				self.__data_snp[['SAMPLE_ID', 'SNP']]
+			)
+	@staticmethod
+	def _format_uga(data: pd.DataFrame) -> pd.DataFrame:
+		""" Data format to use snp in plink and blupf90. """
+		return data.groupby(by='SAMPLE_ID').sum().reset_index()
+	def to_file(self, file_path: str | Path) -> None:
+		""" Saving data to a file.
+		:param file_path: Path to file
+		"""
+		if isinstance(file_path, str):
+			file_path = Path(file_path)
+		if self._format_data is not None and self._format_data == "uga":
+			max_len = self.__data_snp["SAMPLE_ID"].str.len().max()
+			self.__data_snp.\
+				apply(
+					lambda x: " ".join([
+						self._add_space(x.iloc[0], max_len), x.iloc[1]
+					]),
+					axis=1
+				).\
+				to_csv(file_path, index=False, header=False)
+			self.__data_snp["SAMPLE_ID"] = \
+				self.__data_snp["SAMPLE_ID"].str.strip()
+			return None
+		self.__data_snp.to_csv(file_path, sep=" ", index=False)
+	@staticmethod
+	def _add_space(value: str, max_len: int) -> str:
+		""" Adding spaces up to the maximum length of the value in the
+		sample_id data.
+		:param value: Sample_id value
+		:param max_len: Max len sample_id value
+		:return: Return replacing value
+		"""
+		return "".join([value, " " * (max_len - len(value))])

snplib/parentage/__init__.py CHANGED Viewed

@@ -1,15 +1,15 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-from ._discov import Discovery
-from ._verif import Verification
-from ._isagmark import isag_verif, isag_disc
-__all__ = [
-	"Discovery",
-	"Verification",
-	"isag_disc",
-	"isag_verif"
-]
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+from ._discov import Discovery
+from ._verif import Verification
+from ._isagmark import isag_verif, isag_disc
+__all__ = [
+	"Discovery",
+	"Verification",
+	"isag_disc",
+	"isag_verif"
+]

snplib/parentage/_discov.py CHANGED Viewed

@@ -1,102 +1,102 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-import pandas as pd
-"""
-Search for paternity according to ICAR recommendations
-https://www.icar.org/Documents/GenoEx/ICAR%20Guidelines%20for%20Parentage%20Verification%20and%20Parentage%20Discovery%20based%20on%20SNP.pdf
-"""
-class Discovery(object):
-    """ Search for paternity according to ICAR recommendations
-    :argument isag_markers: Fixed sample of markers to confirm paternity.
-    """
-    def __init__(
-            self, isag_markers: pd.Series | list | set | None = None
-    ) -> None:
-        self.__isag_markers = isag_markers
-        self.__num_conflicts = None  # Number of conflicts
-        self.__perc_conflicts = None
-    @property
-    def status(self) -> None | str:
-        """ The status of each parent discovered. """
-        if self.__perc_conflicts is not None:
-            if 0 <= self.__perc_conflicts < 1:
-                return 'Discovered'
-            elif 1 < self.__perc_conflicts < 3:
-                return 'Doubtful'
-            elif self.__perc_conflicts >= 3:
-                return 'Excluded'
-            else:
-                return None
-    @property
-    def num_conflicts(self) -> None | int:
-        return self.__num_conflicts
-    @property
-    def perc_conflicts(self) -> None | float:
-        return self.__perc_conflicts
-    def search_parent(
-            self,
-            data: pd.DataFrame,
-            descendant: str,
-            parents: str,
-            snp_name_col: str
-    ) -> None:
-        """ Search for paternity.
-        :param data: SNP data for descendant and parent.
-        :param descendant: Columns name of the descendant in the data.
-        :param parents: Columns name or list name of the parents in the data.
-        :param snp_name_col: SNP columns name is data.
-        """
-        if self.__isag_markers is None:
-            raise ValueError("Error. No array of snp names to verify")
-        sample_by_markers = data.loc[
-            data[snp_name_col].isin(self.__isag_markers),
-            [snp_name_col, descendant, parents]
-        ]
-        # Filtering 5s from a descendent
-        desc_marks = sample_by_markers.loc[
-            sample_by_markers[descendant] != 5, [snp_name_col, descendant]
-        ]
-        # According to ICAR, the number of available markers must be
-        # above 450
-        if len(desc_marks) < 450:
-            raise Exception("Calf call rate is low.")
-        # Common after filtering markers of potential ancestors
-        sample_parents = sample_by_markers.loc[
-            sample_by_markers[snp_name_col].isin(desc_marks[snp_name_col]),
-            parents
-        ]
-        # Number of available markers in potential ancestors
-        prob_parents_same_n_markers = (sample_parents < 5).sum()
-        # number of conflicts
-        self.__num_conflicts = (
-            abs(sample_parents.sub(desc_marks[descendant], axis=0)) == 2
-        ).sum()
-        # Percentage of conflicts
-        self.__perc_conflicts = (
-            (self.__num_conflicts / prob_parents_same_n_markers) * 100
-        ).round(2)
-    def __status_define(self) -> None:
-        ...
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+import pandas as pd
+"""
+Search for paternity according to ICAR recommendations
+https://www.icar.org/Documents/GenoEx/ICAR%20Guidelines%20for%20Parentage%20Verification%20and%20Parentage%20Discovery%20based%20on%20SNP.pdf
+"""
+class Discovery(object):
+    """ Search for paternity according to ICAR recommendations
+    :argument isag_markers: Fixed sample of markers to confirm paternity.
+    """
+    def __init__(
+            self, isag_markers: pd.Series | list | set | None = None
+    ) -> None:
+        self.__isag_markers = isag_markers
+        self.__num_conflicts = None  # Number of conflicts
+        self.__perc_conflicts = None
+    @property
+    def status(self) -> None | str:
+        """ The status of each parent discovered. """
+        if self.__perc_conflicts is not None:
+            if 0 <= self.__perc_conflicts < 1:
+                return 'Discovered'
+            elif 1 < self.__perc_conflicts < 3:
+                return 'Doubtful'
+            elif self.__perc_conflicts >= 3:
+                return 'Excluded'
+            else:
+                return None
+    @property
+    def num_conflicts(self) -> None | int:
+        return self.__num_conflicts
+    @property
+    def perc_conflicts(self) -> None | float:
+        return self.__perc_conflicts
+    def search_parent(
+            self,
+            data: pd.DataFrame,
+            descendant: str,
+            parents: str,
+            snp_name_col: str
+    ) -> None:
+        """ Search for paternity.
+        :param data: SNP data for descendant and parent.
+        :param descendant: Columns name of the descendant in the data.
+        :param parents: Columns name or list name of the parents in the data.
+        :param snp_name_col: SNP columns name is data.
+        """
+        if self.__isag_markers is None:
+            raise ValueError("Error. No array of snp names to verify")
+        sample_by_markers = data.loc[
+            data[snp_name_col].isin(self.__isag_markers),
+            [snp_name_col, descendant, parents]
+        ]
+        # Filtering 5s from a descendent
+        desc_marks = sample_by_markers.loc[
+            sample_by_markers[descendant] != 5, [snp_name_col, descendant]
+        ]
+        # According to ICAR, the number of available markers must be
+        # above 450
+        if len(desc_marks) < 450:
+            raise Exception("Calf call rate is low.")
+        # Common after filtering markers of potential ancestors
+        sample_parents = sample_by_markers.loc[
+            sample_by_markers[snp_name_col].isin(desc_marks[snp_name_col]),
+            parents
+        ]
+        # Number of available markers in potential ancestors
+        prob_parents_same_n_markers = (sample_parents < 5).sum()
+        # number of conflicts
+        self.__num_conflicts = (
+            abs(sample_parents.sub(desc_marks[descendant], axis=0)) == 2
+        ).sum()
+        # Percentage of conflicts
+        self.__perc_conflicts = (
+            (self.__num_conflicts / prob_parents_same_n_markers) * 100
+        ).round(2)
+    def __status_define(self) -> None:
+        ...

snplib/parentage/_isagmark.py CHANGED Viewed

@@ -1,15 +1,15 @@
-#!/usr/bin/env python
-# coding: utf-8
-__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
-from pathlib import Path
-import pandas as pd
-def isag_disc() -> pd.DataFrame:
-	return pd.read_pickle(Path(__file__).parent.joinpath("isag_disc.pl"))
-def isag_verif() -> pd.DataFrame:
-	return pd.read_pickle(Path(__file__).parent.joinpath("isag_verif.pl"))
+#!/usr/bin/env python
+# coding: utf-8
+__author__ = "Igor Loschinin (igor.loschinin@gmail.com)"
+from pathlib import Path
+import pandas as pd
+def isag_disc() -> pd.DataFrame:
+	return pd.read_pickle(Path(__file__).parent.joinpath("isag_disc.pl"))
+def isag_verif() -> pd.DataFrame:
+	return pd.read_pickle(Path(__file__).parent.joinpath("isag_verif.pl"))

snplib 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl

snplib 1.0.7py3-none-any.whl → 1.0.8py3-none-any.whl