PyPI - ssi-analysis-result-parsers - Versions diffs - 0.0.1__tar.gz → 0.0.3__tar.gz - Mend

ssi-analysis-result-parsers 0.0.1tar.gz → 0.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info → ssi_analysis_result_parsers-0.0.3}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: ssi_analysis_result_parsers
-Version: 0.0.1
+Version: 0.0.3
 Summary: TODO
 Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
-Author: thej-ssi
+Author: Thor Bech Johannesen
 Author-email: thej@ssi.dk
 License: MIT License
 Keywords: nbdev jupyter notebook python

{ssi_analysis_result_parsers-0.0.1 → ssi_analysis_result_parsers-0.0.3}/settings.ini RENAMED Viewed

@@ -5,7 +5,7 @@
 ### Python library ###
 repo = ssi_analysis_result_parsers
 lib_name = %(repo)s
-version = 0.0.1
+version = 0.0.3
 min_python = 3.9
 license = MIT
 black_formatting = True
@@ -29,7 +29,7 @@ title = %(lib_name)s
 ### PyPI ###
 audience = Developers
-author = thej-ssi
+author = Thor Bech Johannesen
 author_email = thej@ssi.dk
 copyright = 2025 onwards, %(author)s
 description = TODO
@@ -49,4 +49,5 @@ pip_requirements = python_dotenv envyaml pandas black
 console_scripts =
     blast_parser_presence_absence=ssi_analysis_result_parsers.blast_parser:presence_absence
     blast_parser_allele_matches=ssi_analysis_result_parsers.blast_parser:allele_matches
-    legionella_parser=ssi_analysis_result_parsers.Legionella_parser:legionella_parser
+    get_leg_results=ssi_analysis_result_parsers.Legionella_parser:legionella_parser
+    get_leg_results_batch=ssi_analysis_result_parsers.Legionella_parser:legionella_batch_parser

ssi_analysis_result_parsers-0.0.3/ssi_analysis_result_parsers/Legionella_parser.py ADDED Viewed

@@ -0,0 +1,219 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
+# %% auto 0
+__all__ = ['extract_legionella_sbt', 'legionella_summary', 'legionella_batch_from_sheet', 'LegionellaResults',
+           'legionella_batch_from_dict', 'legionella_parser', 'legionella_batch_parser']
+# %% ../nbs/39_Legionella_parser.ipynb 3
+# standard libs
+import os
+import re
+# Common to template
+# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
+import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
+import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
+import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
+from fastcore import (
+    test,
+)
+from fastcore.script import (
+    call_parse,
+)  # for @call_parse, https://fastcore.fast.ai/script
+import json  # for nicely printing json and yaml
+# import functions from core module (optional, but most likely needed).
+from ssi_analysis_result_parsers import (
+    core,
+)
+from .blast_parser import extract_presence_absence
+# Project specific libraries
+from pathlib import Path
+import pandas
+import sys
+# %% ../nbs/39_Legionella_parser.ipynb 6
+def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
+    """
+    Returns dictionary of results found in the Legionella SBT summary output
+    """
+    if os.path.exists(legionella_sbt_results_tsv):
+        df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
+        df.set_index("sample", inplace=True, drop=True)
+        d = df.to_dict(orient="index")
+        fname = next(iter(d))
+        return d[fname]
+    else:
+        print(
+            f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
+            file=sys.stderr,
+        )
+        return None
+def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -> dict:
+    sbt_results_dict = extract_legionella_sbt(
+        legionella_sbt_results_tsv=legionella_sbt_results_tsv
+    )
+    lag1_blast_dict = extract_presence_absence(
+        blast_output_tsv=lag1_blast_tsv,
+        hits_as_string=False,
+        include_match_stats=False,
+        gene_names=["lag-1"],
+    )
+    results_dict = core.update_results_dict(
+        sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
+    )
+    return results_dict
+def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
+    results_dict = {}
+    for sample_name, path_dict in file_paths.items():
+        legionella_results = legionella_summary(
+            legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
+            lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
+        )
+        results_dict[sample_name] = legionella_results
+    if output_file is not None:
+        df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
+            names="sample_name"
+        )
+        df.to_csv(output_file, sep="\t", index=False)
+    return results_dict
+class LegionellaResults(core.PipelineResults):
+    @classmethod
+    def from_tool_paths(
+        cls, legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path, sample_name=None
+    ):
+        legionella_results = cls.legionella_summary(
+            legionella_sbt_results_tsv=legionella_sbt_results_tsv,
+            lag1_blast_tsv=lag1_blast_tsv,
+        )
+        return cls({sample_name: legionella_results})
+    @classmethod
+    def from_tool_paths_dict(cls, file_paths: dict):
+        results_dict = {}
+        for sample_name, path_dict in file_paths.items():
+            legionella_results = cls.legionella_summary(
+                legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
+                lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
+            )
+            results_dict[sample_name] = legionella_results
+        return cls(results_dict)
+    @classmethod
+    def from_tool_paths_dataframe(cls, file_paths_df: pandas.DataFrame):
+        file_paths = file_paths_df.to_dict(orient="index")
+        results_dict = {}
+        for sample_name, path_dict in file_paths.items():
+            legionella_results = cls.legionella_summary(
+                legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
+                lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
+            )
+            print(legionella_results)
+            results_dict[sample_name] = legionella_results
+        return cls(results_dict)
+    @classmethod
+    def from_tool_paths_tsv(cls, tool_paths_tsv: Path):
+        file_paths_df = pandas.read_csv(tool_paths_tsv, sep="\t")
+        file_paths_df.set_index("sample_name", inplace=True, drop=True)
+        # return_cls =
+        # results_dict = file_paths_df.to_dict(orient="index")
+        return cls.from_tool_paths_dataframe(file_paths_df)
+        """for sample_name, path_dict in file_paths.items():
+            legionella_results = cls.legionella_summary(legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
+                                                    lag1_blast_tsv=Path(path_dict["lag1_blast_results"]))
+            results_dict[sample_name] = legionella_results
+        return cls(results_dict)"""
+    def legionella_summary(
+        legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
+    ) -> dict:
+        sbt_results_dict = extract_legionella_sbt(
+            legionella_sbt_results_tsv=legionella_sbt_results_tsv
+        )
+        lag1_blast_dict = extract_presence_absence(
+            blast_output_tsv=lag1_blast_tsv,
+            hits_as_string=False,
+            include_match_stats=False,
+            gene_names=["lag-1"],
+        )
+        print(lag1_blast_dict)
+        results_dict = core.update_results_dict(
+            sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
+        )
+        return results_dict
+    def __repr__(self):
+        return f"< Legionella analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
+def legionella_batch_from_dict(file_paths: dict, output_file: Path = None):
+    results_dict = {}
+    for sample_name, path_dict in file_paths.items():
+        legionella_results = legionella_summary(
+            legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
+            lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
+        )
+        results_dict[sample_name] = legionella_results
+    if output_file is not None:
+        df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
+            names="sample_name"
+        )
+        df.to_csv(output_file, sep="\t", index=False)
+    return results_dict
+def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
+    results_dict = {}
+    for sample_name, path_dict in file_paths.items():
+        legionella_results = legionella_summary(
+            legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
+            lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
+        )
+        results_dict[sample_name] = legionella_results
+    if output_file is not None:
+        df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
+            names="sample_name"
+        )
+        df.to_csv(output_file, sep="\t", index=False)
+    return results_dict
+# %% ../nbs/39_Legionella_parser.ipynb 9
+@call_parse
+def legionella_parser(
+    legionella_sbt_file: Path = None,  # Path "*.sbt.tsv from legionella_sbt program"
+    lag_1_blast_output: Path = None,  #  Path to output from lag1_blast. Generated with blastn -query lag-1.fasta -subject assembly.fasta -outfmt "6 qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore"
+    output_file: Path = None,  # Path to output tsv
+    sample_name: str = None,
+    config_file: str = None,  # config file to set env vars from
+) -> None:
+    """ """
+    # config = core.get_config(config_file)  # Set env vars and get config variables
+    legionella_results = LegionellaResults.from_tool_paths(
+        legionella_sbt_results_tsv=legionella_sbt_file,
+        lag1_blast_tsv=lag_1_blast_output,
+        sample_name=sample_name,
+    )
+    legionella_results.write_tsv(output_file=output_file)
+@call_parse
+def legionella_batch_parser(
+    file_path_tsv: Path = None,  # Path to tsv containing file paths to the outputs from tools to be parsed. Must contain headers "sample_name", "sbt_results", and "lag1_blast_results"
+    output_file: Path = None,  # Path to output tsv
+    config_file: str = None,  # config file to set env vars from
+) -> None:
+    """ """
+    # config = core.get_config(config_file)  # Set env vars and get config variables
+    legionella_results = LegionellaResults.from_tool_paths_tsv(
+        tool_paths_tsv=file_path_tsv
+    )
+    legionella_results.write_tsv(output_file)

ssi_analysis_result_parsers-0.0.3/ssi_analysis_result_parsers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.2"

ssi_analysis_result_parsers-0.0.3/ssi_analysis_result_parsers/_modidx.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Autogenerated by nbdev
+d = { 'settings': { 'branch': 'main',
+                'doc_baseurl': '/ssi_analysis_result_parsers',
+                'doc_host': 'https://thej-ssi.github.io',
+                'git_url': 'https://github.com/thej-ssi/ssi_analysis_result_parsers',
+                'lib_path': 'ssi_analysis_result_parsers'},
+  'syms': { 'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults': ( 'legionella_parser.html#legionellaresults',
+                                                                                                                                    'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.__repr__': ( 'legionella_parser.html#legionellaresults.__repr__',
+                                                                                                                                             'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths': ( 'legionella_parser.html#legionellaresults.from_tool_paths',
+                                                                                                                                                    'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths_dataframe': ( 'legionella_parser.html#legionellaresults.from_tool_paths_dataframe',
+                                                                                                                                                              'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths_dict': ( 'legionella_parser.html#legionellaresults.from_tool_paths_dict',
+                                                                                                                                                         'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths_tsv': ( 'legionella_parser.html#legionellaresults.from_tool_paths_tsv',
+                                                                                                                                                        'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.legionella_summary': ( 'legionella_parser.html#legionellaresults.legionella_summary',
+                                                                                                                                                       'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
+                                                                                                                                         'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_dict': ( 'legionella_parser.html#legionella_batch_from_dict',
+                                                                                                                                             'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_sheet': ( 'legionella_parser.html#legionella_batch_from_sheet',
+                                                                                                                                              'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_parser': ( 'legionella_parser.html#legionella_batch_parser',
+                                                                                                                                          'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
+                                                                                                                                    'ssi_analysis_result_parsers/Legionella_parser.py'),
+                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
+                                                                                                                                     'ssi_analysis_result_parsers/Legionella_parser.py')},
+            'ssi_analysis_result_parsers.blast_parser': { 'ssi_analysis_result_parsers.blast_parser.allele_matches': ( 'blast_parser.html#allele_matches',
+                                                                                                                       'ssi_analysis_result_parsers/blast_parser.py'),
+                                                          'ssi_analysis_result_parsers.blast_parser.extract_allele_matches': ( 'blast_parser.html#extract_allele_matches',
+                                                                                                                               'ssi_analysis_result_parsers/blast_parser.py'),
+                                                          'ssi_analysis_result_parsers.blast_parser.extract_presence_absence': ( 'blast_parser.html#extract_presence_absence',
+                                                                                                                                 'ssi_analysis_result_parsers/blast_parser.py'),
+                                                          'ssi_analysis_result_parsers.blast_parser.presence_absence': ( 'blast_parser.html#presence_absence',
+                                                                                                                         'ssi_analysis_result_parsers/blast_parser.py')},
+            'ssi_analysis_result_parsers.core': { 'ssi_analysis_result_parsers.core.PipelineResults': ( 'core.html#pipelineresults',
+                                                                                                        'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.__init__': ( 'core.html#pipelineresults.__init__',
+                                                                                                                 'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.__iter__': ( 'core.html#pipelineresults.__iter__',
+                                                                                                                 'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.__len__': ( 'core.html#pipelineresults.__len__',
+                                                                                                                'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.__repr__': ( 'core.html#pipelineresults.__repr__',
+                                                                                                                 'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.from_results_dataframe': ( 'core.html#pipelineresults.from_results_dataframe',
+                                                                                                                               'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.from_results_tsv': ( 'core.html#pipelineresults.from_results_tsv',
+                                                                                                                         'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.items': ( 'core.html#pipelineresults.items',
+                                                                                                              'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.results': ( 'core.html#pipelineresults.results',
+                                                                                                                'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.PipelineResults.write_tsv': ( 'core.html#pipelineresults.write_tsv',
+                                                                                                                  'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.get_config': ( 'core.html#get_config',
+                                                                                                   'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.get_samplesheet': ( 'core.html#get_samplesheet',
+                                                                                                        'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.print_results_dict_to_tsv': ( 'core.html#print_results_dict_to_tsv',
+                                                                                                                  'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.set_env_variables': ( 'core.html#set_env_variables',
+                                                                                                          'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.show_project_env_vars': ( 'core.html#show_project_env_vars',
+                                                                                                              'ssi_analysis_result_parsers/core.py'),
+                                                  'ssi_analysis_result_parsers.core.update_results_dict': ( 'core.html#update_results_dict',
+                                                                                                            'ssi_analysis_result_parsers/core.py')},
+            'ssi_analysis_result_parsers.hello_world': { 'ssi_analysis_result_parsers.hello_world.cli': ( 'hello_world.html#cli',
+                                                                                                          'ssi_analysis_result_parsers/hello_world.py'),
+                                                         'ssi_analysis_result_parsers.hello_world.hello_world': ( 'hello_world.html#hello_world',
+                                                                                                                  'ssi_analysis_result_parsers/hello_world.py')},
+            'ssi_analysis_result_parsers.some_string': {}}}

{ssi_analysis_result_parsers-0.0.1 → ssi_analysis_result_parsers-0.0.3}/ssi_analysis_result_parsers/blast_parser.py RENAMED Viewed

@@ -49,36 +49,38 @@ def extract_presence_absence(
     """
     if os.path.exists(blast_output_tsv):
-        blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
-        blast_df.columns = tsv_header.split(" ")
-        blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
-        blast_df_unique = (
-            blast_df.sort_values(by=["bitscore"], ascending=False)
-            .groupby("qseqid")
-            .first()
-        )
-        blast_df_filtered = blast_df_unique.query(
-            "plen > @plen_threshold and pident > @pident_threshold"
-        )
+        try:
+            blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
+            blast_df.columns = tsv_header.split(" ")
+            blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
+            blast_df_unique = (
+                blast_df.sort_values(by=["bitscore"], ascending=False)
+                .groupby("qseqid")
+                .first()
+            )
+            blast_df_filtered = blast_df_unique.query(
+                "plen > @plen_threshold and pident > @pident_threshold"
+            )
+            blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
+        except pandas.errors.EmptyDataError:
+            blast_dict = {}
+            print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
         if hits_as_string:
-            if include_match_stats:
-                results = []
-                for gene, d in blast_df_filtered.to_dict(orient="index").items():
-                    results.append(f"{gene}__{d['pident']}__{d['plen']}")
-                result_dict = {"genes_found": ", ".join(results)}
-                return result_dict
-            else:
-                result_dict = {
-                    "genes_found": ", ".join(list(blast_df_filtered.index.values))
-                }
-                return result_dict
+            results = []
+            for gene, d in blast_dict.items():
+                if include_match_stats:
+                    results.append(f"{gene}__{d['pident']}__{d['plen']}")
+                else:
+                    results.append(gene)
+            result_dict = {"genes_found": ", ".join(results)}
+            return result_dict
         else:
             result_dict = {}
-            blast_dict = dict(blast_df_filtered.to_dict(orient="index").items())
             if gene_names is None:
-                gene_names = blast_dict.keys()
+                gene_names = list(blast_dict.keys())
             for gene in gene_names:
                 if gene in blast_dict:
                     if include_match_stats:

{ssi_analysis_result_parsers-0.0.1 → ssi_analysis_result_parsers-0.0.3}/ssi_analysis_result_parsers/core.py RENAMED Viewed

@@ -2,7 +2,8 @@
 # %% auto 0
 __all__ = ['PACKAGE_NAME', 'DEV_MODE', 'PACKAGE_DIR', 'PROJECT_DIR', 'config', 'set_env_variables', 'get_config',
-           'show_project_env_vars', 'get_samplesheet', 'update_results_dict', 'print_results_dict_to_tsv']
+           'show_project_env_vars', 'get_samplesheet', 'PipelineResults', 'update_results_dict',
+           'print_results_dict_to_tsv']
 # %% ../nbs/00_core.ipynb 4
 # Need the ssi_analysis_result_parsers for a few functions, this can be considered a static var
@@ -192,48 +193,97 @@ def get_samplesheet(sample_sheet_config: dict) -> pd.DataFrame:
     return df
 # %% ../nbs/00_core.ipynb 24
+class PipelineResults:
+    def __init__(self, results_dict):
+        print(results_dict)
+        self.results_dict = results_dict
+        self.results_df = pandas.DataFrame.from_dict(results_dict, orient="index")
+    def write_tsv(self, output_file: Path) -> None:
+        print_df = self.results_df.reset_index(names="sample_name")
+        print_df.to_csv(output_file, sep="\t", index=False)
+        return None
+    @classmethod
+    def from_results_dataframe(cls, results_df: pandas.DataFrame):
+        # results_df = results_df.set_index("sample_name")
+        results_dict = results_df.to_dict(orient="index")
+        return cls(results_dict)
+    @classmethod
+    def from_results_tsv(cls, results_tsv: Path):
+        results_df = pandas.read_csv(results_tsv, sep="\t")
+        results_df.set_index("sample_name", inplace=True, drop=True)
+        results_dict = results_df.to_dict(orient="index")
+        return cls(results_dict)
+    def __repr__(self):
+        return f"< Generic analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
+    def __len__(self):
+        return len(self.results_dict)
+    def __iter__(self):
+        for sample_name in self.results_dict:
+            yield sample_name
+    def items(self):
+        for sample_name, results_d in self.results_dict:
+            yield sample_name, results_d
+    def results(self):
+        for results_d in self.results_dict.values():
+            yield results_d
 def update_results_dict(
     old_results: dict,
     new_results: dict,
     old_duplicate_key_prefix: str = None,
     new_duplicate_key_prefix: str = None,
 ):
-    duplicate_keys = list(set(old_results.keys()) & set(new_results.keys()))
-    if len(duplicate_keys) == 0:
-        old_results.update(new_results)
+    if old_results is None:
+        return new_results
+    elif new_results is None:
         return old_results
     else:
-        if old_duplicate_key_prefix is None and new_duplicate_key_prefix is None:
-            raise ValueError(
-                "Provided dictionaries contain duplicate keys. old_duplicate_key_prefix and/or new_duplicate_key_prefix must be provided"
-            )
-        elif old_duplicate_key_prefix == new_duplicate_key_prefix:
-            raise ValueError(
-                "old_duplicate_key_prefix and new_duplicate_key_prefix cannot be identical"
-            )
+        duplicate_keys = list(set(old_results.keys()) & set(new_results.keys()))
+        if len(duplicate_keys) == 0:
+            old_results.update(new_results)
+            return old_results
         else:
-            combined_dict = {}
-            if old_duplicate_key_prefix is None:
-                combined_dict.update(old_results)
-            else:
-                for key, value in old_results.items():
-                    if key in duplicate_keys:
-                        combined_dict.update(
-                            {f"{old_duplicate_key_prefix}{key}": value}
-                        )
-                    else:
-                        combined_dict.update({key: value})
-            if new_duplicate_key_prefix is None:
-                combined_dict.update(new_results)
+            if old_duplicate_key_prefix is None and new_duplicate_key_prefix is None:
+                raise ValueError(
+                    "Provided dictionaries contain duplicate keys. Old_duplicate_key_prefix and/or new_duplicate_key_prefix must be provided"
+                )
+            elif old_duplicate_key_prefix == new_duplicate_key_prefix:
+                raise ValueError(
+                    "old_duplicate_key_prefix and new_duplicate_key_prefix cannot be identical"
+                )
             else:
-                for key, value in new_results.items():
-                    if key in duplicate_keys:
-                        combined_dict.update(
-                            {f"{new_duplicate_key_prefix}{key}": value}
-                        )
-                    else:
-                        combined_dict.update({key: value})
-        return combined_dict
+                combined_dict = {}
+                if old_duplicate_key_prefix is None:
+                    combined_dict.update(old_results)
+                else:
+                    for key, value in old_results.items():
+                        if key in duplicate_keys:
+                            combined_dict.update(
+                                {f"{old_duplicate_key_prefix}{key}": value}
+                            )
+                        else:
+                            combined_dict.update({key: value})
+                if new_duplicate_key_prefix is None:
+                    combined_dict.update(new_results)
+                else:
+                    for key, value in new_results.items():
+                        if key in duplicate_keys:
+                            combined_dict.update(
+                                {f"{new_duplicate_key_prefix}{key}": value}
+                            )
+                        else:
+                            combined_dict.update({key: value})
+            return combined_dict
 def print_results_dict_to_tsv(

{ssi_analysis_result_parsers-0.0.1 → ssi_analysis_result_parsers-0.0.3/ssi_analysis_result_parsers.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: ssi_analysis_result_parsers
-Version: 0.0.1
+Version: 0.0.3
 Summary: TODO
 Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
-Author: thej-ssi
+Author: Thor Bech Johannesen
 Author-email: thej@ssi.dk
 License: MIT License
 Keywords: nbdev jupyter notebook python

{ssi_analysis_result_parsers-0.0.1 → ssi_analysis_result_parsers-0.0.3}/ssi_analysis_result_parsers.egg-info/SOURCES.txt RENAMED Viewed

@@ -21,8 +21,14 @@ ssi_analysis_result_parsers.egg-info/top_level.txt
 ssi_analysis_result_parsers/config/config.default.env
 ssi_analysis_result_parsers/config/config.default.yaml
 test_input/.DS_Store
+test_input/Legionella/batch_parser_file_paths.tsv
 test_input/Legionella/lag-1_blast.tsv
+test_input/Legionella/lag-1_blast_2.tsv
 test_input/Legionella/test.sbt.tsv
+test_input/Legionella/test2.sbt.tsv
 test_input/blast_parser/allele_matches_test.tsv
+test_input/blast_parser/empty_gene_presence_absense_test.tsv
 test_input/blast_parser/gene_presence_absence_test.tsv
-test_output/output_with_sample_name.tsv
+test_output/output_with_sample_name.tsv
+test_output/test.tsv
+test_output/test_batch_output.tsv

{ssi_analysis_result_parsers-0.0.1 → ssi_analysis_result_parsers-0.0.3}/ssi_analysis_result_parsers.egg-info/entry_points.txt RENAMED Viewed

@@ -1,7 +1,8 @@
 [console_scripts]
 blast_parser_allele_matches = ssi_analysis_result_parsers.blast_parser:allele_matches
 blast_parser_presence_absence = ssi_analysis_result_parsers.blast_parser:presence_absence
-legionella_parser = ssi_analysis_result_parsers.Legionella_parser:legionella_parser
+get_leg_results = ssi_analysis_result_parsers.Legionella_parser:legionella_parser
+get_leg_results_batch = ssi_analysis_result_parsers.Legionella_parser:legionella_batch_parser
 [nbdev]
 ssi_analysis_result_parsers = ssi_analysis_result_parsers._modidx:d

ssi_analysis_result_parsers-0.0.3/test_input/Legionella/batch_parser_file_paths.tsv ADDED Viewed

@@ -0,0 +1,3 @@
+sample_name	sbt_results	lag1_blast_results
+sample_1	test_input/Legionella/test.sbt.tsv	test_input/Legionella/lag-1_blast.tsv
+sample_2	test_input/Legionella/test2.sbt.tsv	test_input/Legionella/lag-1_blast_2.tsv

ssi_analysis_result_parsers-0.0.3/test_input/Legionella/lag-1_blast_2.tsv ADDED Viewed

File without changes

ssi_analysis_result_parsers-0.0.3/test_input/Legionella/test2.sbt.tsv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ sample ST flaA pilE asd mip mompS proA neuA notes
2	+ LEG-2024-R11031.fasta 182 3 4 1 3 35 9 11 Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call

ssi_analysis_result_parsers-0.0.3/test_input/blast_parser/empty_gene_presence_absense_test.tsv ADDED Viewed

File without changes

ssi_analysis_result_parsers-0.0.3/test_output/test.tsv ADDED Viewed

@@ -0,0 +1,3 @@
+sample_name	ST	flaA	pilE	asd	mip	mompS	proA	neuA	notes	lag-1
+sample_1	23	2	3	9	10	2	1	6	Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call	1
+sample_2	182	3	4	1	3	35	9	11	Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call	0

ssi_analysis_result_parsers-0.0.3/test_output/test_batch_output.tsv ADDED Viewed

@@ -0,0 +1,3 @@
+sample_name	ST	flaA	pilE	asd	mip	mompS	proA	neuA	notes	lag-1
+sample_1	23	2	3	9	10	2	1	6	Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call	1
+sample_2	182	3	4	1	3	35	9	11	Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call	0

ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/Legionella_parser.py DELETED Viewed

@@ -1,88 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
-# %% auto 0
-__all__ = ['extract_legionella_sbt', 'legionella_summary', 'legionella_parser']
-# %% ../nbs/39_Legionella_parser.ipynb 3
-# standard libs
-import os
-import re
-# Common to template
-# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
-import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
-import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
-import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
-from fastcore import (
-    test,
-)
-from fastcore.script import (
-    call_parse,
-)  # for @call_parse, https://fastcore.fast.ai/script
-import json  # for nicely printing json and yaml
-# import functions from core module (optional, but most likely needed).
-from ssi_analysis_result_parsers import (
-    core,
-)
-from .blast_parser import extract_presence_absence
-# Project specific libraries
-from pathlib import Path
-import pandas
-import sys
-# %% ../nbs/39_Legionella_parser.ipynb 6
-def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
-    """
-    Returns dictionary of results found in the Legionella SBT summary output
-    """
-    if os.path.exists(legionella_sbt_results_tsv):
-        df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
-        df.set_index("sample", inplace=True, drop=True)
-        d = df.to_dict(orient="index")
-        fname = next(iter(d))
-        return d[fname]
-    else:
-        print(
-            f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
-            file=sys.stderr,
-        )
-        return None
-def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -> dict:
-    sbt_results_dict = extract_legionella_sbt(
-        legionella_sbt_results_tsv=legionella_sbt_results_tsv
-    )
-    lag1_blast_dict = extract_presence_absence(
-        blast_output_tsv=lag1_blast_tsv,
-        hits_as_string=False,
-        include_match_stats=False,
-        gene_names=["lag-1"],
-    )
-    results_dict = core.update_results_dict(
-        sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
-    )
-    return results_dict
-# %% ../nbs/39_Legionella_parser.ipynb 9
-@call_parse
-def legionella_parser(
-    legionella_sbt_file: Path = None,  # Path "*.sbt.tsv from legionella_sbt program"
-    lag_1_blast_output: Path = None,  #  Path to output from lag1_blast. Generated with blastn -query lag-1.fasta -subject assembly.fasta -outfmt "6 qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore"
-    output_file: Path = None,  # Path to output tsv
-    sample_name: str = None,
-    config_file: str = None,  # config file to set env vars from
-) -> None:
-    """ """
-    # config = core.get_config(config_file)  # Set env vars and get config variables
-    legionella_summary_dict = legionella_summary(
-        legionella_sbt_results_tsv=legionella_sbt_file,
-        lag1_blast_tsv=lag_1_blast_output,
-    )
-    core.print_results_dict_to_tsv(
-        results_dict=legionella_summary_dict,
-        output_file=output_file,
-        sample_name=sample_name,
-    )

ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = "0.0.1"

ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/_modidx.py DELETED Viewed

@@ -1,38 +0,0 @@
-# Autogenerated by nbdev
-d = { 'settings': { 'branch': 'main',
-                'doc_baseurl': '/ssi_analysis_result_parsers',
-                'doc_host': 'https://$GIT_USER_NAME.github.io',
-                'git_url': 'https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers',
-                'lib_path': 'ssi_analysis_result_parsers'},
-  'syms': { 'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
-                                                                                                                                         'ssi_analysis_result_parsers/Legionella_parser.py'),
-                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
-                                                                                                                                    'ssi_analysis_result_parsers/Legionella_parser.py'),
-                                                               'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
-                                                                                                                                     'ssi_analysis_result_parsers/Legionella_parser.py')},
-            'ssi_analysis_result_parsers.blast_parser': { 'ssi_analysis_result_parsers.blast_parser.allele_matches': ( 'blast_parser.html#allele_matches',
-                                                                                                                       'ssi_analysis_result_parsers/blast_parser.py'),
-                                                          'ssi_analysis_result_parsers.blast_parser.extract_allele_matches': ( 'blast_parser.html#extract_allele_matches',
-                                                                                                                               'ssi_analysis_result_parsers/blast_parser.py'),
-                                                          'ssi_analysis_result_parsers.blast_parser.extract_presence_absence': ( 'blast_parser.html#extract_presence_absence',
-                                                                                                                                 'ssi_analysis_result_parsers/blast_parser.py'),
-                                                          'ssi_analysis_result_parsers.blast_parser.presence_absence': ( 'blast_parser.html#presence_absence',
-                                                                                                                         'ssi_analysis_result_parsers/blast_parser.py')},
-            'ssi_analysis_result_parsers.core': { 'ssi_analysis_result_parsers.core.get_config': ( 'core.html#get_config',
-                                                                                                   'ssi_analysis_result_parsers/core.py'),
-                                                  'ssi_analysis_result_parsers.core.get_samplesheet': ( 'core.html#get_samplesheet',
-                                                                                                        'ssi_analysis_result_parsers/core.py'),
-                                                  'ssi_analysis_result_parsers.core.print_results_dict_to_tsv': ( 'core.html#print_results_dict_to_tsv',
-                                                                                                                  'ssi_analysis_result_parsers/core.py'),
-                                                  'ssi_analysis_result_parsers.core.set_env_variables': ( 'core.html#set_env_variables',
-                                                                                                          'ssi_analysis_result_parsers/core.py'),
-                                                  'ssi_analysis_result_parsers.core.show_project_env_vars': ( 'core.html#show_project_env_vars',
-                                                                                                              'ssi_analysis_result_parsers/core.py'),
-                                                  'ssi_analysis_result_parsers.core.update_results_dict': ( 'core.html#update_results_dict',
-                                                                                                            'ssi_analysis_result_parsers/core.py')},
-            'ssi_analysis_result_parsers.hello_world': { 'ssi_analysis_result_parsers.hello_world.cli': ( 'hello_world.html#cli',
-                                                                                                          'ssi_analysis_result_parsers/hello_world.py'),
-                                                         'ssi_analysis_result_parsers.hello_world.hello_world': ( 'hello_world.html#hello_world',
-                                                                                                                  'ssi_analysis_result_parsers/hello_world.py')},
-            'ssi_analysis_result_parsers.some_string': {}}}