ssi-analysis-result-parsers 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ssi_analysis_result_parsers-0.0.1/LICENSE +8 -0
  2. ssi_analysis_result_parsers-0.0.1/MANIFEST.in +9 -0
  3. ssi_analysis_result_parsers-0.0.1/PKG-INFO +109 -0
  4. ssi_analysis_result_parsers-0.0.1/README.md +69 -0
  5. ssi_analysis_result_parsers-0.0.1/pyproject.toml +3 -0
  6. ssi_analysis_result_parsers-0.0.1/settings.ini +52 -0
  7. ssi_analysis_result_parsers-0.0.1/setup.cfg +4 -0
  8. ssi_analysis_result_parsers-0.0.1/setup.py +64 -0
  9. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/Legionella_parser.py +88 -0
  10. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/__init__.py +1 -0
  11. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/_modidx.py +38 -0
  12. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/blast_parser.py +178 -0
  13. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/config/config.default.env +24 -0
  14. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/config/config.default.yaml +9 -0
  15. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/core.py +252 -0
  16. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/hello_world.py +61 -0
  17. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/some_string.py +27 -0
  18. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/PKG-INFO +109 -0
  19. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/SOURCES.txt +28 -0
  20. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/dependency_links.txt +1 -0
  21. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/entry_points.txt +7 -0
  22. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/not-zip-safe +1 -0
  23. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/requires.txt +7 -0
  24. ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/top_level.txt +8 -0
  25. ssi_analysis_result_parsers-0.0.1/test_input/.DS_Store +0 -0
  26. ssi_analysis_result_parsers-0.0.1/test_input/Legionella/lag-1_blast.tsv +1 -0
  27. ssi_analysis_result_parsers-0.0.1/test_input/Legionella/test.sbt.tsv +2 -0
  28. ssi_analysis_result_parsers-0.0.1/test_input/blast_parser/allele_matches_test.tsv +536 -0
  29. ssi_analysis_result_parsers-0.0.1/test_input/blast_parser/gene_presence_absence_test.tsv +3 -0
  30. ssi_analysis_result_parsers-0.0.1/test_output/output_with_sample_name.tsv +2 -0
@@ -0,0 +1,8 @@
1
+ The MIT License (MIT)
2
+ Copyright © 2023 Statens Serum Institut
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5
+
6
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7
+
8
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include settings.ini
2
+ include LICENSE
3
+ include CONTRIBUTING.md
4
+ include README.md
5
+ recursive-exclude * __pycache__
6
+ include ssi_analysis_result_parsers/config/config.default.env
7
+ include ssi_analysis_result_parsers/config/config.default.yaml
8
+ recursive-include test_input *
9
+ recursive-include test_output *
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.4
2
+ Name: ssi_analysis_result_parsers
3
+ Version: 0.0.1
4
+ Summary: TODO
5
+ Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
+ Author: thej-ssi
7
+ Author-email: thej@ssi.dk
8
+ License: MIT License
9
+ Keywords: nbdev jupyter notebook python
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Natural Language :: English
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastcore
22
+ Requires-Dist: python_dotenv
23
+ Requires-Dist: envyaml
24
+ Requires-Dist: pandas
25
+ Requires-Dist: black
26
+ Provides-Extra: dev
27
+ Dynamic: author
28
+ Dynamic: author-email
29
+ Dynamic: classifier
30
+ Dynamic: description
31
+ Dynamic: description-content-type
32
+ Dynamic: home-page
33
+ Dynamic: keywords
34
+ Dynamic: license
35
+ Dynamic: license-file
36
+ Dynamic: provides-extra
37
+ Dynamic: requires-dist
38
+ Dynamic: requires-python
39
+ Dynamic: summary
40
+
41
+ # ssi_analysis_result_parsers
42
+
43
+
44
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
45
+
46
+ This file will become your README and also the index of your
47
+ documentation.
48
+
49
+ ## Developer Guide
50
+
51
+ If you are new to using `nbdev` here are some useful pointers to get you
52
+ started.
53
+
54
+ ### Install ssi_analysis_result_parsers in Development mode
55
+
56
+ ``` sh
57
+ # make sure ssi_analysis_result_parsers package is installed in development mode
58
+ $ pip install -e .
59
+
60
+ # make changes under nbs/ directory
61
+ # ...
62
+
63
+ # compile to have changes apply to ssi_analysis_result_parsers
64
+ $ nbdev_prepare
65
+ ```
66
+
67
+ ## Usage
68
+
69
+ ### Installation
70
+
71
+ Install latest from the GitHub
72
+ [repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers):
73
+
74
+ ``` sh
75
+ $ pip install git+https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers.git
76
+ ```
77
+
78
+ or from
79
+ [conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
80
+
81
+ ``` sh
82
+ $ conda install -c $GIT_USER_NAME ssi_analysis_result_parsers
83
+ ```
84
+
85
+ or from [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
86
+
87
+ ``` sh
88
+ $ pip install ssi_analysis_result_parsers
89
+ ```
90
+
91
+ ### Documentation
92
+
93
+ Documentation can be found hosted on this GitHub
94
+ [repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers)’s
95
+ [pages](https://$GIT_USER_NAME.github.io/ssi_analysis_result_parsers/).
96
+ Additionally you can find package manager specific guidelines on
97
+ [conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
98
+ and [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
99
+ respectively.
100
+
101
+ ## How to use
102
+
103
+ Fill me in please! Don’t forget code examples:
104
+
105
+ ``` python
106
+ 1+1
107
+ ```
108
+
109
+ 2
@@ -0,0 +1,69 @@
1
+ # ssi_analysis_result_parsers
2
+
3
+
4
+ <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
5
+
6
+ This file will become your README and also the index of your
7
+ documentation.
8
+
9
+ ## Developer Guide
10
+
11
+ If you are new to using `nbdev` here are some useful pointers to get you
12
+ started.
13
+
14
+ ### Install ssi_analysis_result_parsers in Development mode
15
+
16
+ ``` sh
17
+ # make sure ssi_analysis_result_parsers package is installed in development mode
18
+ $ pip install -e .
19
+
20
+ # make changes under nbs/ directory
21
+ # ...
22
+
23
+ # compile to have changes apply to ssi_analysis_result_parsers
24
+ $ nbdev_prepare
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ### Installation
30
+
31
+ Install latest from the GitHub
32
+ [repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers):
33
+
34
+ ``` sh
35
+ $ pip install git+https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers.git
36
+ ```
37
+
38
+ or from
39
+ [conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
40
+
41
+ ``` sh
42
+ $ conda install -c $GIT_USER_NAME ssi_analysis_result_parsers
43
+ ```
44
+
45
+ or from [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
46
+
47
+ ``` sh
48
+ $ pip install ssi_analysis_result_parsers
49
+ ```
50
+
51
+ ### Documentation
52
+
53
+ Documentation can be found hosted on this GitHub
54
+ [repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers)’s
55
+ [pages](https://$GIT_USER_NAME.github.io/ssi_analysis_result_parsers/).
56
+ Additionally you can find package manager specific guidelines on
57
+ [conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
58
+ and [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
59
+ respectively.
60
+
61
+ ## How to use
62
+
63
+ Fill me in please! Don’t forget code examples:
64
+
65
+ ``` python
66
+ 1+1
67
+ ```
68
+
69
+ 2
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64.0"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,52 @@
1
+ [DEFAULT]
2
+ # All sections below are required unless otherwise specified.
3
+ # See https://github.com/AnswerDotAI/nbdev/blob/main/settings.ini for examples.
4
+
5
+ ### Python library ###
6
+ repo = ssi_analysis_result_parsers
7
+ lib_name = %(repo)s
8
+ version = 0.0.1
9
+ min_python = 3.9
10
+ license = MIT
11
+ black_formatting = True
12
+
13
+ ### nbdev ###
14
+ doc_path = _docs
15
+ lib_path = ssi_analysis_result_parsers
16
+ nbs_path = nbs
17
+ recursive = True
18
+ tst_flags = notest
19
+ put_version_in_init = True
20
+ update_pyproject = True
21
+
22
+ ### Docs ###
23
+ branch = main
24
+ custom_sidebar = False
25
+ doc_host = https://%(user)s.github.io
26
+ doc_baseurl = /%(repo)s
27
+ git_url = https://github.com/%(user)s/%(repo)s
28
+ title = %(lib_name)s
29
+
30
+ ### PyPI ###
31
+ audience = Developers
32
+ author = thej-ssi
33
+ author_email = thej@ssi.dk
34
+ copyright = 2025 onwards, %(author)s
35
+ description = TODO
36
+ keywords = nbdev jupyter notebook python
37
+ language = English
38
+ status = 3
39
+ user = thej-ssi
40
+
41
+ ### Optional ###
42
+ # requirements = fastcore pandas
43
+ # dev_requirements =
44
+ # console_scripts =
45
+ # conda_user =
46
+ # package_data =
47
+ requirements = fastcore
48
+ pip_requirements = python_dotenv envyaml pandas black
49
+ console_scripts =
50
+ blast_parser_presence_absence=ssi_analysis_result_parsers.blast_parser:presence_absence
51
+ blast_parser_allele_matches=ssi_analysis_result_parsers.blast_parser:allele_matches
52
+ legionella_parser=ssi_analysis_result_parsers.Legionella_parser:legionella_parser
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,64 @@
1
+ from pkg_resources import parse_version
2
+ from configparser import ConfigParser
3
+ import setuptools, shlex
4
+ assert parse_version(setuptools.__version__)>=parse_version('36.2')
5
+
6
+ # note: all settings are in settings.ini; edit there, not here
7
+ config = ConfigParser(delimiters=['='])
8
+ config.read('settings.ini', encoding='utf-8')
9
+ cfg = config['DEFAULT']
10
+
11
+ cfg_keys = 'version description keywords author author_email'.split()
12
+ expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13
+ for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14
+ setup_cfg = {o:cfg[o] for o in cfg_keys}
15
+
16
+ licenses = {
17
+ 'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
18
+ 'mit': ('MIT License', 'OSI Approved :: MIT License'),
19
+ 'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
20
+ 'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
21
+ 'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
22
+ }
23
+ statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
24
+ '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
25
+ py_versions = '3.6 3.7 3.8 3.9 3.10 3.11 3.12'.split()
26
+
27
+ requirements = shlex.split(cfg.get('requirements', ''))
28
+ if cfg.get('pip_requirements'): requirements += shlex.split(cfg.get('pip_requirements', ''))
29
+ min_python = cfg['min_python']
30
+ lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
31
+ dev_requirements = (cfg.get('dev_requirements') or '').split()
32
+
33
+ package_data = dict()
34
+ pkg_data = cfg.get('package_data', None)
35
+ if pkg_data:
36
+ package_data[cfg['lib_name']] = pkg_data.split() # split as multiple files might be listed
37
+ # Add package data to setup_cfg for setuptools.setup(..., **setup_cfg)
38
+ setup_cfg['package_data'] = package_data
39
+
40
+ setuptools.setup(
41
+ name = cfg['lib_name'],
42
+ license = lic[0],
43
+ classifiers = [
44
+ 'Development Status :: ' + statuses[int(cfg['status'])],
45
+ 'Intended Audience :: ' + cfg['audience'].title(),
46
+ 'Natural Language :: ' + cfg['language'].title(),
47
+ ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
48
+ url = cfg['git_url'],
49
+ packages = setuptools.find_namespace_packages(),
50
+ include_package_data = True,
51
+ install_requires = requirements,
52
+ extras_require={ 'dev': dev_requirements },
53
+ dependency_links = cfg.get('dep_links','').split(),
54
+ python_requires = '>=' + cfg['min_python'],
55
+ long_description = open('README.md', encoding='utf-8').read(),
56
+ long_description_content_type = 'text/markdown',
57
+ zip_safe = False,
58
+ entry_points = {
59
+ 'console_scripts': cfg.get('console_scripts','').split(),
60
+ 'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
61
+ },
62
+ **setup_cfg)
63
+
64
+
@@ -0,0 +1,88 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['extract_legionella_sbt', 'legionella_summary', 'legionella_parser']
5
+
6
+ # %% ../nbs/39_Legionella_parser.ipynb 3
7
+ # standard libs
8
+ import os
9
+ import re
10
+
11
+ # Common to template
12
+ # add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
13
+ import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
14
+ import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
15
+ import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
16
+ from fastcore import (
17
+ test,
18
+ )
19
+ from fastcore.script import (
20
+ call_parse,
21
+ ) # for @call_parse, https://fastcore.fast.ai/script
22
+ import json # for nicely printing json and yaml
23
+
24
+ # import functions from core module (optional, but most likely needed).
25
+ from ssi_analysis_result_parsers import (
26
+ core,
27
+ )
28
+ from .blast_parser import extract_presence_absence
29
+
30
+ # Project specific libraries
31
+ from pathlib import Path
32
+ import pandas
33
+ import sys
34
+
35
+ # %% ../nbs/39_Legionella_parser.ipynb 6
36
+ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
37
+ """
38
+ Returns dictionary of results found in the Legionella SBT summary output
39
+ """
40
+ if os.path.exists(legionella_sbt_results_tsv):
41
+ df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
42
+ df.set_index("sample", inplace=True, drop=True)
43
+ d = df.to_dict(orient="index")
44
+ fname = next(iter(d))
45
+ return d[fname]
46
+ else:
47
+ print(
48
+ f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
49
+ file=sys.stderr,
50
+ )
51
+ return None
52
+
53
+
54
+ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -> dict:
55
+ sbt_results_dict = extract_legionella_sbt(
56
+ legionella_sbt_results_tsv=legionella_sbt_results_tsv
57
+ )
58
+ lag1_blast_dict = extract_presence_absence(
59
+ blast_output_tsv=lag1_blast_tsv,
60
+ hits_as_string=False,
61
+ include_match_stats=False,
62
+ gene_names=["lag-1"],
63
+ )
64
+ results_dict = core.update_results_dict(
65
+ sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
66
+ )
67
+ return results_dict
68
+
69
+ # %% ../nbs/39_Legionella_parser.ipynb 9
70
+ @call_parse
71
+ def legionella_parser(
72
+ legionella_sbt_file: Path = None, # Path "*.sbt.tsv from legionella_sbt program"
73
+ lag_1_blast_output: Path = None, # Path to output from lag1_blast. Generated with blastn -query lag-1.fasta -subject assembly.fasta -outfmt "6 qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore"
74
+ output_file: Path = None, # Path to output tsv
75
+ sample_name: str = None,
76
+ config_file: str = None, # config file to set env vars from
77
+ ) -> None:
78
+ """ """
79
+ # config = core.get_config(config_file) # Set env vars and get config variables
80
+ legionella_summary_dict = legionella_summary(
81
+ legionella_sbt_results_tsv=legionella_sbt_file,
82
+ lag1_blast_tsv=lag_1_blast_output,
83
+ )
84
+ core.print_results_dict_to_tsv(
85
+ results_dict=legionella_summary_dict,
86
+ output_file=output_file,
87
+ sample_name=sample_name,
88
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,38 @@
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'main',
4
+ 'doc_baseurl': '/ssi_analysis_result_parsers',
5
+ 'doc_host': 'https://$GIT_USER_NAME.github.io',
6
+ 'git_url': 'https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers',
7
+ 'lib_path': 'ssi_analysis_result_parsers'},
8
+ 'syms': { 'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
9
+ 'ssi_analysis_result_parsers/Legionella_parser.py'),
10
+ 'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
11
+ 'ssi_analysis_result_parsers/Legionella_parser.py'),
12
+ 'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
13
+ 'ssi_analysis_result_parsers/Legionella_parser.py')},
14
+ 'ssi_analysis_result_parsers.blast_parser': { 'ssi_analysis_result_parsers.blast_parser.allele_matches': ( 'blast_parser.html#allele_matches',
15
+ 'ssi_analysis_result_parsers/blast_parser.py'),
16
+ 'ssi_analysis_result_parsers.blast_parser.extract_allele_matches': ( 'blast_parser.html#extract_allele_matches',
17
+ 'ssi_analysis_result_parsers/blast_parser.py'),
18
+ 'ssi_analysis_result_parsers.blast_parser.extract_presence_absence': ( 'blast_parser.html#extract_presence_absence',
19
+ 'ssi_analysis_result_parsers/blast_parser.py'),
20
+ 'ssi_analysis_result_parsers.blast_parser.presence_absence': ( 'blast_parser.html#presence_absence',
21
+ 'ssi_analysis_result_parsers/blast_parser.py')},
22
+ 'ssi_analysis_result_parsers.core': { 'ssi_analysis_result_parsers.core.get_config': ( 'core.html#get_config',
23
+ 'ssi_analysis_result_parsers/core.py'),
24
+ 'ssi_analysis_result_parsers.core.get_samplesheet': ( 'core.html#get_samplesheet',
25
+ 'ssi_analysis_result_parsers/core.py'),
26
+ 'ssi_analysis_result_parsers.core.print_results_dict_to_tsv': ( 'core.html#print_results_dict_to_tsv',
27
+ 'ssi_analysis_result_parsers/core.py'),
28
+ 'ssi_analysis_result_parsers.core.set_env_variables': ( 'core.html#set_env_variables',
29
+ 'ssi_analysis_result_parsers/core.py'),
30
+ 'ssi_analysis_result_parsers.core.show_project_env_vars': ( 'core.html#show_project_env_vars',
31
+ 'ssi_analysis_result_parsers/core.py'),
32
+ 'ssi_analysis_result_parsers.core.update_results_dict': ( 'core.html#update_results_dict',
33
+ 'ssi_analysis_result_parsers/core.py')},
34
+ 'ssi_analysis_result_parsers.hello_world': { 'ssi_analysis_result_parsers.hello_world.cli': ( 'hello_world.html#cli',
35
+ 'ssi_analysis_result_parsers/hello_world.py'),
36
+ 'ssi_analysis_result_parsers.hello_world.hello_world': ( 'hello_world.html#hello_world',
37
+ 'ssi_analysis_result_parsers/hello_world.py')},
38
+ 'ssi_analysis_result_parsers.some_string': {}}}
@@ -0,0 +1,178 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/11_blast_parser.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['extract_presence_absence', 'extract_allele_matches', 'presence_absence', 'allele_matches']
5
+
6
+ # %% ../nbs/11_blast_parser.ipynb 3
7
+ # standard libs
8
+ import os
9
+ import re
10
+
11
+ # Common to template
12
+ # add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
13
+ import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
14
+ import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
15
+ import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
16
+ from fastcore import (
17
+ test,
18
+ )
19
+ from fastcore.script import (
20
+ call_parse,
21
+ ) # for @call_parse, https://fastcore.fast.ai/script
22
+ import json # for nicely printing json and yaml
23
+
24
+ # import functions from core module (optional, but most likely needed).
25
+ from . import core
26
+
27
+ # Project specific libraries
28
+ from pathlib import Path
29
+ import pandas
30
+ import sys
31
+
32
+ # %% ../nbs/11_blast_parser.ipynb 6
33
+ def extract_presence_absence(
34
+ blast_output_tsv: Path,
35
+ tsv_header: str = "qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore",
36
+ hits_as_string: bool = True,
37
+ include_match_stats=False,
38
+ pident_threshold: float = 90,
39
+ plen_threshold: float = 60,
40
+ gene_names: list = None,
41
+ ) -> dict:
42
+ """
43
+ Parse blast output tsv for best matching alleles
44
+ returns:
45
+ if include_match stats:
46
+ { <gene_name_1>: <allele_number>, <gene_name_2>: <allele_number>, <gene_name_3>: <allele_number> ...}
47
+ else:
48
+ a dictionary (allele_dict) in the format { <gene_name_1>: <allele_number>, <gene_name_2>: <allele_number>, <gene_name_3>: <allele_number> ...}
49
+
50
+ """
51
+ if os.path.exists(blast_output_tsv):
52
+ blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
53
+ blast_df.columns = tsv_header.split(" ")
54
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
55
+ blast_df_unique = (
56
+ blast_df.sort_values(by=["bitscore"], ascending=False)
57
+ .groupby("qseqid")
58
+ .first()
59
+ )
60
+ blast_df_filtered = blast_df_unique.query(
61
+ "plen > @plen_threshold and pident > @pident_threshold"
62
+ )
63
+ if hits_as_string:
64
+ if include_match_stats:
65
+ results = []
66
+ for gene, d in blast_df_filtered.to_dict(orient="index").items():
67
+ results.append(f"{gene}__{d['pident']}__{d['plen']}")
68
+ result_dict = {"genes_found": ", ".join(results)}
69
+ return result_dict
70
+
71
+ else:
72
+ result_dict = {
73
+ "genes_found": ", ".join(list(blast_df_filtered.index.values))
74
+ }
75
+ return result_dict
76
+
77
+ else:
78
+ result_dict = {}
79
+ blast_dict = dict(blast_df_filtered.to_dict(orient="index").items())
80
+ if gene_names is None:
81
+ gene_names = blast_dict.keys()
82
+ for gene in gene_names:
83
+ if gene in blast_dict:
84
+ if include_match_stats:
85
+ result_dict[gene] = (
86
+ f"{blast_dict[gene]['pident']}__{blast_dict[gene]['plen']}"
87
+ )
88
+ else:
89
+ result_dict[gene] = "1"
90
+ else:
91
+ result_dict[gene] = "0"
92
+ return result_dict
93
+
94
+ else:
95
+ print(f"No blast output found at {blast_output_tsv}", file=sys.stderr)
96
+
97
+
98
+ def extract_allele_matches(
99
+ blast_output_tsv: Path, tsv_header: str, include_match_stats=False
100
+ ) -> dict:
101
+ """
102
+ Parse blast output tsv for best matching alleles
103
+ returns:
104
+ if include_match stats:
105
+ { <gene_name_1>: <allele_number>__<pident>__<plen>, <gene_name_2>: <allele_number>__<pident>__<plen>, <gene_name_3>: <allele_number>__<pident>__<plen> ...}
106
+ else:
107
+ a dictionary (allele_dict) in the format { <gene_name_1>: <allele_number>, <gene_name_2>: <allele_number>, <gene_name_3>: <allele_number> ...}
108
+
109
+ """
110
+ allele_dict = {}
111
+ detailed_dict = {}
112
+ if os.path.exists(blast_output_tsv):
113
+ blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
114
+ blast_df.columns = tsv_header.split(" ")
115
+ blast_df.set_index("qseqid", drop=False)
116
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
117
+ blast_df[["gene", "allele"]] = blast_df["qseqid"].str.split("_", expand=True)
118
+ blast_df_unique = (
119
+ blast_df.sort_values(by=["bitscore"], ascending=False)
120
+ .groupby("gene")
121
+ .first()
122
+ )
123
+ for gene, d in blast_df_unique.to_dict(orient="index").items():
124
+ allele_dict[gene] = d["allele"]
125
+ detailed_dict[gene] = f"{d['allele']}__{d['pident']}__{d['plen']}"
126
+ else:
127
+ print(f"No blast output found at {blast_output_tsv}", file=sys.stderr)
128
+
129
+ if include_match_stats:
130
+ return detailed_dict
131
+ else:
132
+ return allele_dict
133
+
134
+ # %% ../nbs/11_blast_parser.ipynb 9
135
+ from fastcore.script import call_parse
136
+
137
+
138
+ @call_parse
139
+ def presence_absence(
140
+ blast_output: Path = None, # Path to blast output file. Generated with --outfmt 6 option
141
+ blast_tsv_header: str = "qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore", # headers in blast output
142
+ hits_as_string: bool = True, # True to print a comma separated list of found genes on a single line. False to return a key: value pair for each gene
143
+ include_match_stats: bool = False, # True to include percent identity and percent length in output, false to only include present/absent
144
+ percent_identityt: float = 90, # percent identity threshold for considering a gene present
145
+ percent_length: float = 60, # percent length threshold for considering a gene present
146
+ gene_names: list = None, # name of genes to look for when hits_as_string = False
147
+ output_file: Path = None,
148
+ config_file: str = None, # config file to set env vars from
149
+ ) -> None:
150
+ """ """
151
+ # config = core.get_config(config_file) # Set env vars and get config variables
152
+ gene_presence_dict = extract_presence_absence(
153
+ blast_output_tsv=blast_output,
154
+ tsv_header=blast_tsv_header,
155
+ hits_as_string=hits_as_string,
156
+ include_match_stats=include_match_stats,
157
+ pident_threshold=percent_identityt,
158
+ plen_threshold=percent_length,
159
+ gene_names=gene_names,
160
+ )
161
+
162
+
163
+ @call_parse
164
+ def allele_matches(
165
+ blast_output: Path = None, # Path to blast output file. Generated with --outfmt 6 option
166
+ blast_tsv_header: str = "qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore", # headers in blast output
167
+ include_match_stats: bool = False, # True to include percent identity and percent length in output, false to only include allele number
168
+ output_file: Path = None,
169
+ config_file: str = None, # config file to set env vars from
170
+ ) -> None:
171
+ """ """
172
+ # config = core.get_config(config_file) # Set env vars and get config variables
173
+ allele_dict = extract_allele_matches(
174
+ blast_output_tsv=blast_output,
175
+ tsv_header=blast_tsv_header,
176
+ include_match_stats=include_match_stats,
177
+ output_file=None,
178
+ )
@@ -0,0 +1,24 @@
1
+ # environmental (ENV) variables. These are written as SSI_ANALYSIS_RESULT_PARSERS_VARIABLENAME=VALUE to avoid conflicts with other ENV variables.
2
+ # Using the standard template these values can be overwritten by:
3
+ # - defining SSI_ANALYSIS_RESULT_PARSERS_CONFIG_FILE pointing to a similar file with a subset of values
4
+ # - setting the values as environmental variables.
5
+ # The priority goes env variables > config file > default file.
6
+ # The all configs other than config.default.env are in .gitignore
7
+ # All .env config files should have an associated .yaml config file with it which the program interacts with.
8
+
9
+ # NOTE: remember if referencing another ENV var as a variable it needs to be defined first
10
+
11
+ # If more structured variables are needed use config.default.yaml or another of your own creation
12
+ # This file path is stored as CORE_CONFIG_FILE when overriding
13
+ # It is commented out because of the default use case, but should be included for all non default cases.
14
+ # CORE_YAML_CONFIG_FILE=
15
+ CORE_PROJECT_VARIABLE_PREFIX=SSI_ANALYSIS_RESULT_PARSERS_
16
+ # For testing purposes
17
+ CORE_TEST_VAR="Test"
18
+
19
+ # Example variable please exchange with relevant variables
20
+ SSI_ANALYSIS_RESULT_PARSERS_INPUT_DIR=./input
21
+ SSI_ANALYSIS_RESULT_PARSERS_OUTPUT_DIR=./output
22
+ SSI_ANALYSIS_RESULT_PARSERS_OUTPUT_FILE=${SSI_ANALYSIS_RESULT_PARSERS_OUTPUT_DIR}/output.txt
23
+ SSI_ANALYSIS_RESULT_PARSERS_USER_INPUT_NAME=Kim
24
+
@@ -0,0 +1,9 @@
1
+ # When accessing this in the code you'll work with it as a dict.
2
+ # ENV variables will be replaced with their values. This is done with the envyaml package that is in the code template `load_config`.
3
+ # By convention all variables for the project should have the SSI_ANALYSIS_RESULT_PARSERS_* prefix.
4
+ # e.g
5
+ # name: ${SSI_ANALYSIS_RESULT_PARSERS_NAME}
6
+ example:
7
+ input:
8
+ name: ${SSI_ANALYSIS_RESULT_PARSERS_USER_INPUT_NAME}
9
+ alternative_name: Lee