ssi-analysis-result-parsers 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssi_analysis_result_parsers-0.0.1/LICENSE +8 -0
- ssi_analysis_result_parsers-0.0.1/MANIFEST.in +9 -0
- ssi_analysis_result_parsers-0.0.1/PKG-INFO +109 -0
- ssi_analysis_result_parsers-0.0.1/README.md +69 -0
- ssi_analysis_result_parsers-0.0.1/pyproject.toml +3 -0
- ssi_analysis_result_parsers-0.0.1/settings.ini +52 -0
- ssi_analysis_result_parsers-0.0.1/setup.cfg +4 -0
- ssi_analysis_result_parsers-0.0.1/setup.py +64 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/Legionella_parser.py +88 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/__init__.py +1 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/_modidx.py +38 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/blast_parser.py +178 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/config/config.default.env +24 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/config/config.default.yaml +9 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/core.py +252 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/hello_world.py +61 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers/some_string.py +27 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/PKG-INFO +109 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/SOURCES.txt +28 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/dependency_links.txt +1 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/entry_points.txt +7 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/not-zip-safe +1 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/requires.txt +7 -0
- ssi_analysis_result_parsers-0.0.1/ssi_analysis_result_parsers.egg-info/top_level.txt +8 -0
- ssi_analysis_result_parsers-0.0.1/test_input/.DS_Store +0 -0
- ssi_analysis_result_parsers-0.0.1/test_input/Legionella/lag-1_blast.tsv +1 -0
- ssi_analysis_result_parsers-0.0.1/test_input/Legionella/test.sbt.tsv +2 -0
- ssi_analysis_result_parsers-0.0.1/test_input/blast_parser/allele_matches_test.tsv +536 -0
- ssi_analysis_result_parsers-0.0.1/test_input/blast_parser/gene_presence_absence_test.tsv +3 -0
- ssi_analysis_result_parsers-0.0.1/test_output/output_with_sample_name.tsv +2 -0
@@ -0,0 +1,8 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
Copyright © 2023 Statens Serum Institut
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
5
|
+
|
6
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
7
|
+
|
8
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,9 @@
|
|
1
|
+
include settings.ini
|
2
|
+
include LICENSE
|
3
|
+
include CONTRIBUTING.md
|
4
|
+
include README.md
|
5
|
+
recursive-exclude * __pycache__
|
6
|
+
include ssi_analysis_result_parsers/config/config.default.env
|
7
|
+
include ssi_analysis_result_parsers/config/config.default.yaml
|
8
|
+
recursive-include test_input *
|
9
|
+
recursive-include test_output *
|
@@ -0,0 +1,109 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: ssi_analysis_result_parsers
|
3
|
+
Version: 0.0.1
|
4
|
+
Summary: TODO
|
5
|
+
Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
|
6
|
+
Author: thej-ssi
|
7
|
+
Author-email: thej@ssi.dk
|
8
|
+
License: MIT License
|
9
|
+
Keywords: nbdev jupyter notebook python
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
11
|
+
Classifier: Intended Audience :: Developers
|
12
|
+
Classifier: Natural Language :: English
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
18
|
+
Requires-Python: >=3.9
|
19
|
+
Description-Content-Type: text/markdown
|
20
|
+
License-File: LICENSE
|
21
|
+
Requires-Dist: fastcore
|
22
|
+
Requires-Dist: python_dotenv
|
23
|
+
Requires-Dist: envyaml
|
24
|
+
Requires-Dist: pandas
|
25
|
+
Requires-Dist: black
|
26
|
+
Provides-Extra: dev
|
27
|
+
Dynamic: author
|
28
|
+
Dynamic: author-email
|
29
|
+
Dynamic: classifier
|
30
|
+
Dynamic: description
|
31
|
+
Dynamic: description-content-type
|
32
|
+
Dynamic: home-page
|
33
|
+
Dynamic: keywords
|
34
|
+
Dynamic: license
|
35
|
+
Dynamic: license-file
|
36
|
+
Dynamic: provides-extra
|
37
|
+
Dynamic: requires-dist
|
38
|
+
Dynamic: requires-python
|
39
|
+
Dynamic: summary
|
40
|
+
|
41
|
+
# ssi_analysis_result_parsers
|
42
|
+
|
43
|
+
|
44
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
45
|
+
|
46
|
+
This file will become your README and also the index of your
|
47
|
+
documentation.
|
48
|
+
|
49
|
+
## Developer Guide
|
50
|
+
|
51
|
+
If you are new to using `nbdev` here are some useful pointers to get you
|
52
|
+
started.
|
53
|
+
|
54
|
+
### Install ssi_analysis_result_parsers in Development mode
|
55
|
+
|
56
|
+
``` sh
|
57
|
+
# make sure ssi_analysis_result_parsers package is installed in development mode
|
58
|
+
$ pip install -e .
|
59
|
+
|
60
|
+
# make changes under nbs/ directory
|
61
|
+
# ...
|
62
|
+
|
63
|
+
# compile to have changes apply to ssi_analysis_result_parsers
|
64
|
+
$ nbdev_prepare
|
65
|
+
```
|
66
|
+
|
67
|
+
## Usage
|
68
|
+
|
69
|
+
### Installation
|
70
|
+
|
71
|
+
Install latest from the GitHub
|
72
|
+
[repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers):
|
73
|
+
|
74
|
+
``` sh
|
75
|
+
$ pip install git+https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers.git
|
76
|
+
```
|
77
|
+
|
78
|
+
or from
|
79
|
+
[conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
|
80
|
+
|
81
|
+
``` sh
|
82
|
+
$ conda install -c $GIT_USER_NAME ssi_analysis_result_parsers
|
83
|
+
```
|
84
|
+
|
85
|
+
or from [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
|
86
|
+
|
87
|
+
``` sh
|
88
|
+
$ pip install ssi_analysis_result_parsers
|
89
|
+
```
|
90
|
+
|
91
|
+
### Documentation
|
92
|
+
|
93
|
+
Documentation can be found hosted on this GitHub
|
94
|
+
[repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers)’s
|
95
|
+
[pages](https://$GIT_USER_NAME.github.io/ssi_analysis_result_parsers/).
|
96
|
+
Additionally you can find package manager specific guidelines on
|
97
|
+
[conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
|
98
|
+
and [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
|
99
|
+
respectively.
|
100
|
+
|
101
|
+
## How to use
|
102
|
+
|
103
|
+
Fill me in please! Don’t forget code examples:
|
104
|
+
|
105
|
+
``` python
|
106
|
+
1+1
|
107
|
+
```
|
108
|
+
|
109
|
+
2
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# ssi_analysis_result_parsers
|
2
|
+
|
3
|
+
|
4
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
5
|
+
|
6
|
+
This file will become your README and also the index of your
|
7
|
+
documentation.
|
8
|
+
|
9
|
+
## Developer Guide
|
10
|
+
|
11
|
+
If you are new to using `nbdev` here are some useful pointers to get you
|
12
|
+
started.
|
13
|
+
|
14
|
+
### Install ssi_analysis_result_parsers in Development mode
|
15
|
+
|
16
|
+
``` sh
|
17
|
+
# make sure ssi_analysis_result_parsers package is installed in development mode
|
18
|
+
$ pip install -e .
|
19
|
+
|
20
|
+
# make changes under nbs/ directory
|
21
|
+
# ...
|
22
|
+
|
23
|
+
# compile to have changes apply to ssi_analysis_result_parsers
|
24
|
+
$ nbdev_prepare
|
25
|
+
```
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
### Installation
|
30
|
+
|
31
|
+
Install latest from the GitHub
|
32
|
+
[repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers):
|
33
|
+
|
34
|
+
``` sh
|
35
|
+
$ pip install git+https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers.git
|
36
|
+
```
|
37
|
+
|
38
|
+
or from
|
39
|
+
[conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
|
40
|
+
|
41
|
+
``` sh
|
42
|
+
$ conda install -c $GIT_USER_NAME ssi_analysis_result_parsers
|
43
|
+
```
|
44
|
+
|
45
|
+
or from [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
|
46
|
+
|
47
|
+
``` sh
|
48
|
+
$ pip install ssi_analysis_result_parsers
|
49
|
+
```
|
50
|
+
|
51
|
+
### Documentation
|
52
|
+
|
53
|
+
Documentation can be found hosted on this GitHub
|
54
|
+
[repository](https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers)’s
|
55
|
+
[pages](https://$GIT_USER_NAME.github.io/ssi_analysis_result_parsers/).
|
56
|
+
Additionally you can find package manager specific guidelines on
|
57
|
+
[conda](https://anaconda.org/$GIT_USER_NAME/ssi_analysis_result_parsers)
|
58
|
+
and [pypi](https://pypi.org/project/ssi_analysis_result_parsers/)
|
59
|
+
respectively.
|
60
|
+
|
61
|
+
## How to use
|
62
|
+
|
63
|
+
Fill me in please! Don’t forget code examples:
|
64
|
+
|
65
|
+
``` python
|
66
|
+
1+1
|
67
|
+
```
|
68
|
+
|
69
|
+
2
|
@@ -0,0 +1,52 @@
|
|
1
|
+
[DEFAULT]
|
2
|
+
# All sections below are required unless otherwise specified.
|
3
|
+
# See https://github.com/AnswerDotAI/nbdev/blob/main/settings.ini for examples.
|
4
|
+
|
5
|
+
### Python library ###
|
6
|
+
repo = ssi_analysis_result_parsers
|
7
|
+
lib_name = %(repo)s
|
8
|
+
version = 0.0.1
|
9
|
+
min_python = 3.9
|
10
|
+
license = MIT
|
11
|
+
black_formatting = True
|
12
|
+
|
13
|
+
### nbdev ###
|
14
|
+
doc_path = _docs
|
15
|
+
lib_path = ssi_analysis_result_parsers
|
16
|
+
nbs_path = nbs
|
17
|
+
recursive = True
|
18
|
+
tst_flags = notest
|
19
|
+
put_version_in_init = True
|
20
|
+
update_pyproject = True
|
21
|
+
|
22
|
+
### Docs ###
|
23
|
+
branch = main
|
24
|
+
custom_sidebar = False
|
25
|
+
doc_host = https://%(user)s.github.io
|
26
|
+
doc_baseurl = /%(repo)s
|
27
|
+
git_url = https://github.com/%(user)s/%(repo)s
|
28
|
+
title = %(lib_name)s
|
29
|
+
|
30
|
+
### PyPI ###
|
31
|
+
audience = Developers
|
32
|
+
author = thej-ssi
|
33
|
+
author_email = thej@ssi.dk
|
34
|
+
copyright = 2025 onwards, %(author)s
|
35
|
+
description = TODO
|
36
|
+
keywords = nbdev jupyter notebook python
|
37
|
+
language = English
|
38
|
+
status = 3
|
39
|
+
user = thej-ssi
|
40
|
+
|
41
|
+
### Optional ###
|
42
|
+
# requirements = fastcore pandas
|
43
|
+
# dev_requirements =
|
44
|
+
# console_scripts =
|
45
|
+
# conda_user =
|
46
|
+
# package_data =
|
47
|
+
requirements = fastcore
|
48
|
+
pip_requirements = python_dotenv envyaml pandas black
|
49
|
+
console_scripts =
|
50
|
+
blast_parser_presence_absence=ssi_analysis_result_parsers.blast_parser:presence_absence
|
51
|
+
blast_parser_allele_matches=ssi_analysis_result_parsers.blast_parser:allele_matches
|
52
|
+
legionella_parser=ssi_analysis_result_parsers.Legionella_parser:legionella_parser
|
@@ -0,0 +1,64 @@
|
|
1
|
+
from pkg_resources import parse_version
|
2
|
+
from configparser import ConfigParser
|
3
|
+
import setuptools, shlex
|
4
|
+
assert parse_version(setuptools.__version__)>=parse_version('36.2')
|
5
|
+
|
6
|
+
# note: all settings are in settings.ini; edit there, not here
|
7
|
+
config = ConfigParser(delimiters=['='])
|
8
|
+
config.read('settings.ini', encoding='utf-8')
|
9
|
+
cfg = config['DEFAULT']
|
10
|
+
|
11
|
+
cfg_keys = 'version description keywords author author_email'.split()
|
12
|
+
expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
|
13
|
+
for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
|
14
|
+
setup_cfg = {o:cfg[o] for o in cfg_keys}
|
15
|
+
|
16
|
+
licenses = {
|
17
|
+
'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
|
18
|
+
'mit': ('MIT License', 'OSI Approved :: MIT License'),
|
19
|
+
'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
|
20
|
+
'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
|
21
|
+
'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
|
22
|
+
}
|
23
|
+
statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
|
24
|
+
'4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
|
25
|
+
py_versions = '3.6 3.7 3.8 3.9 3.10 3.11 3.12'.split()
|
26
|
+
|
27
|
+
requirements = shlex.split(cfg.get('requirements', ''))
|
28
|
+
if cfg.get('pip_requirements'): requirements += shlex.split(cfg.get('pip_requirements', ''))
|
29
|
+
min_python = cfg['min_python']
|
30
|
+
lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
|
31
|
+
dev_requirements = (cfg.get('dev_requirements') or '').split()
|
32
|
+
|
33
|
+
package_data = dict()
|
34
|
+
pkg_data = cfg.get('package_data', None)
|
35
|
+
if pkg_data:
|
36
|
+
package_data[cfg['lib_name']] = pkg_data.split() # split as multiple files might be listed
|
37
|
+
# Add package data to setup_cfg for setuptools.setup(..., **setup_cfg)
|
38
|
+
setup_cfg['package_data'] = package_data
|
39
|
+
|
40
|
+
setuptools.setup(
|
41
|
+
name = cfg['lib_name'],
|
42
|
+
license = lic[0],
|
43
|
+
classifiers = [
|
44
|
+
'Development Status :: ' + statuses[int(cfg['status'])],
|
45
|
+
'Intended Audience :: ' + cfg['audience'].title(),
|
46
|
+
'Natural Language :: ' + cfg['language'].title(),
|
47
|
+
] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
|
48
|
+
url = cfg['git_url'],
|
49
|
+
packages = setuptools.find_namespace_packages(),
|
50
|
+
include_package_data = True,
|
51
|
+
install_requires = requirements,
|
52
|
+
extras_require={ 'dev': dev_requirements },
|
53
|
+
dependency_links = cfg.get('dep_links','').split(),
|
54
|
+
python_requires = '>=' + cfg['min_python'],
|
55
|
+
long_description = open('README.md', encoding='utf-8').read(),
|
56
|
+
long_description_content_type = 'text/markdown',
|
57
|
+
zip_safe = False,
|
58
|
+
entry_points = {
|
59
|
+
'console_scripts': cfg.get('console_scripts','').split(),
|
60
|
+
'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
|
61
|
+
},
|
62
|
+
**setup_cfg)
|
63
|
+
|
64
|
+
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
|
2
|
+
|
3
|
+
# %% auto 0
|
4
|
+
__all__ = ['extract_legionella_sbt', 'legionella_summary', 'legionella_parser']
|
5
|
+
|
6
|
+
# %% ../nbs/39_Legionella_parser.ipynb 3
|
7
|
+
# standard libs
|
8
|
+
import os
|
9
|
+
import re
|
10
|
+
|
11
|
+
# Common to template
|
12
|
+
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
|
13
|
+
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
|
14
|
+
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
|
15
|
+
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
|
16
|
+
from fastcore import (
|
17
|
+
test,
|
18
|
+
)
|
19
|
+
from fastcore.script import (
|
20
|
+
call_parse,
|
21
|
+
) # for @call_parse, https://fastcore.fast.ai/script
|
22
|
+
import json # for nicely printing json and yaml
|
23
|
+
|
24
|
+
# import functions from core module (optional, but most likely needed).
|
25
|
+
from ssi_analysis_result_parsers import (
|
26
|
+
core,
|
27
|
+
)
|
28
|
+
from .blast_parser import extract_presence_absence
|
29
|
+
|
30
|
+
# Project specific libraries
|
31
|
+
from pathlib import Path
|
32
|
+
import pandas
|
33
|
+
import sys
|
34
|
+
|
35
|
+
# %% ../nbs/39_Legionella_parser.ipynb 6
|
36
|
+
def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
|
37
|
+
"""
|
38
|
+
Returns dictionary of results found in the Legionella SBT summary output
|
39
|
+
"""
|
40
|
+
if os.path.exists(legionella_sbt_results_tsv):
|
41
|
+
df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
|
42
|
+
df.set_index("sample", inplace=True, drop=True)
|
43
|
+
d = df.to_dict(orient="index")
|
44
|
+
fname = next(iter(d))
|
45
|
+
return d[fname]
|
46
|
+
else:
|
47
|
+
print(
|
48
|
+
f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
|
49
|
+
file=sys.stderr,
|
50
|
+
)
|
51
|
+
return None
|
52
|
+
|
53
|
+
|
54
|
+
def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -> dict:
|
55
|
+
sbt_results_dict = extract_legionella_sbt(
|
56
|
+
legionella_sbt_results_tsv=legionella_sbt_results_tsv
|
57
|
+
)
|
58
|
+
lag1_blast_dict = extract_presence_absence(
|
59
|
+
blast_output_tsv=lag1_blast_tsv,
|
60
|
+
hits_as_string=False,
|
61
|
+
include_match_stats=False,
|
62
|
+
gene_names=["lag-1"],
|
63
|
+
)
|
64
|
+
results_dict = core.update_results_dict(
|
65
|
+
sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
|
66
|
+
)
|
67
|
+
return results_dict
|
68
|
+
|
69
|
+
# %% ../nbs/39_Legionella_parser.ipynb 9
|
70
|
+
@call_parse
|
71
|
+
def legionella_parser(
|
72
|
+
legionella_sbt_file: Path = None, # Path "*.sbt.tsv from legionella_sbt program"
|
73
|
+
lag_1_blast_output: Path = None, # Path to output from lag1_blast. Generated with blastn -query lag-1.fasta -subject assembly.fasta -outfmt "6 qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore"
|
74
|
+
output_file: Path = None, # Path to output tsv
|
75
|
+
sample_name: str = None,
|
76
|
+
config_file: str = None, # config file to set env vars from
|
77
|
+
) -> None:
|
78
|
+
""" """
|
79
|
+
# config = core.get_config(config_file) # Set env vars and get config variables
|
80
|
+
legionella_summary_dict = legionella_summary(
|
81
|
+
legionella_sbt_results_tsv=legionella_sbt_file,
|
82
|
+
lag1_blast_tsv=lag_1_blast_output,
|
83
|
+
)
|
84
|
+
core.print_results_dict_to_tsv(
|
85
|
+
results_dict=legionella_summary_dict,
|
86
|
+
output_file=output_file,
|
87
|
+
sample_name=sample_name,
|
88
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.0.1"
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Autogenerated by nbdev
|
2
|
+
|
3
|
+
d = { 'settings': { 'branch': 'main',
|
4
|
+
'doc_baseurl': '/ssi_analysis_result_parsers',
|
5
|
+
'doc_host': 'https://$GIT_USER_NAME.github.io',
|
6
|
+
'git_url': 'https://github.com/$GIT_USER_NAME/ssi_analysis_result_parsers',
|
7
|
+
'lib_path': 'ssi_analysis_result_parsers'},
|
8
|
+
'syms': { 'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
|
9
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
10
|
+
'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
|
11
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
12
|
+
'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
|
13
|
+
'ssi_analysis_result_parsers/Legionella_parser.py')},
|
14
|
+
'ssi_analysis_result_parsers.blast_parser': { 'ssi_analysis_result_parsers.blast_parser.allele_matches': ( 'blast_parser.html#allele_matches',
|
15
|
+
'ssi_analysis_result_parsers/blast_parser.py'),
|
16
|
+
'ssi_analysis_result_parsers.blast_parser.extract_allele_matches': ( 'blast_parser.html#extract_allele_matches',
|
17
|
+
'ssi_analysis_result_parsers/blast_parser.py'),
|
18
|
+
'ssi_analysis_result_parsers.blast_parser.extract_presence_absence': ( 'blast_parser.html#extract_presence_absence',
|
19
|
+
'ssi_analysis_result_parsers/blast_parser.py'),
|
20
|
+
'ssi_analysis_result_parsers.blast_parser.presence_absence': ( 'blast_parser.html#presence_absence',
|
21
|
+
'ssi_analysis_result_parsers/blast_parser.py')},
|
22
|
+
'ssi_analysis_result_parsers.core': { 'ssi_analysis_result_parsers.core.get_config': ( 'core.html#get_config',
|
23
|
+
'ssi_analysis_result_parsers/core.py'),
|
24
|
+
'ssi_analysis_result_parsers.core.get_samplesheet': ( 'core.html#get_samplesheet',
|
25
|
+
'ssi_analysis_result_parsers/core.py'),
|
26
|
+
'ssi_analysis_result_parsers.core.print_results_dict_to_tsv': ( 'core.html#print_results_dict_to_tsv',
|
27
|
+
'ssi_analysis_result_parsers/core.py'),
|
28
|
+
'ssi_analysis_result_parsers.core.set_env_variables': ( 'core.html#set_env_variables',
|
29
|
+
'ssi_analysis_result_parsers/core.py'),
|
30
|
+
'ssi_analysis_result_parsers.core.show_project_env_vars': ( 'core.html#show_project_env_vars',
|
31
|
+
'ssi_analysis_result_parsers/core.py'),
|
32
|
+
'ssi_analysis_result_parsers.core.update_results_dict': ( 'core.html#update_results_dict',
|
33
|
+
'ssi_analysis_result_parsers/core.py')},
|
34
|
+
'ssi_analysis_result_parsers.hello_world': { 'ssi_analysis_result_parsers.hello_world.cli': ( 'hello_world.html#cli',
|
35
|
+
'ssi_analysis_result_parsers/hello_world.py'),
|
36
|
+
'ssi_analysis_result_parsers.hello_world.hello_world': ( 'hello_world.html#hello_world',
|
37
|
+
'ssi_analysis_result_parsers/hello_world.py')},
|
38
|
+
'ssi_analysis_result_parsers.some_string': {}}}
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/11_blast_parser.ipynb.
|
2
|
+
|
3
|
+
# %% auto 0
|
4
|
+
__all__ = ['extract_presence_absence', 'extract_allele_matches', 'presence_absence', 'allele_matches']
|
5
|
+
|
6
|
+
# %% ../nbs/11_blast_parser.ipynb 3
|
7
|
+
# standard libs
|
8
|
+
import os
|
9
|
+
import re
|
10
|
+
|
11
|
+
# Common to template
|
12
|
+
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
|
13
|
+
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
|
14
|
+
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
|
15
|
+
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
|
16
|
+
from fastcore import (
|
17
|
+
test,
|
18
|
+
)
|
19
|
+
from fastcore.script import (
|
20
|
+
call_parse,
|
21
|
+
) # for @call_parse, https://fastcore.fast.ai/script
|
22
|
+
import json # for nicely printing json and yaml
|
23
|
+
|
24
|
+
# import functions from core module (optional, but most likely needed).
|
25
|
+
from . import core
|
26
|
+
|
27
|
+
# Project specific libraries
|
28
|
+
from pathlib import Path
|
29
|
+
import pandas
|
30
|
+
import sys
|
31
|
+
|
32
|
+
# %% ../nbs/11_blast_parser.ipynb 6
|
33
|
+
def extract_presence_absence(
|
34
|
+
blast_output_tsv: Path,
|
35
|
+
tsv_header: str = "qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore",
|
36
|
+
hits_as_string: bool = True,
|
37
|
+
include_match_stats=False,
|
38
|
+
pident_threshold: float = 90,
|
39
|
+
plen_threshold: float = 60,
|
40
|
+
gene_names: list = None,
|
41
|
+
) -> dict:
|
42
|
+
"""
|
43
|
+
Parse blast output tsv for best matching alleles
|
44
|
+
returns:
|
45
|
+
if include_match stats:
|
46
|
+
{ <gene_name_1>: <allele_number>, <gene_name_2>: <allele_number>, <gene_name_3>: <allele_number> ...}
|
47
|
+
else:
|
48
|
+
a dictionary (allele_dict) in the format { <gene_name_1>: <allele_number>, <gene_name_2>: <allele_number>, <gene_name_3>: <allele_number> ...}
|
49
|
+
|
50
|
+
"""
|
51
|
+
if os.path.exists(blast_output_tsv):
|
52
|
+
blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
|
53
|
+
blast_df.columns = tsv_header.split(" ")
|
54
|
+
blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
|
55
|
+
blast_df_unique = (
|
56
|
+
blast_df.sort_values(by=["bitscore"], ascending=False)
|
57
|
+
.groupby("qseqid")
|
58
|
+
.first()
|
59
|
+
)
|
60
|
+
blast_df_filtered = blast_df_unique.query(
|
61
|
+
"plen > @plen_threshold and pident > @pident_threshold"
|
62
|
+
)
|
63
|
+
if hits_as_string:
|
64
|
+
if include_match_stats:
|
65
|
+
results = []
|
66
|
+
for gene, d in blast_df_filtered.to_dict(orient="index").items():
|
67
|
+
results.append(f"{gene}__{d['pident']}__{d['plen']}")
|
68
|
+
result_dict = {"genes_found": ", ".join(results)}
|
69
|
+
return result_dict
|
70
|
+
|
71
|
+
else:
|
72
|
+
result_dict = {
|
73
|
+
"genes_found": ", ".join(list(blast_df_filtered.index.values))
|
74
|
+
}
|
75
|
+
return result_dict
|
76
|
+
|
77
|
+
else:
|
78
|
+
result_dict = {}
|
79
|
+
blast_dict = dict(blast_df_filtered.to_dict(orient="index").items())
|
80
|
+
if gene_names is None:
|
81
|
+
gene_names = blast_dict.keys()
|
82
|
+
for gene in gene_names:
|
83
|
+
if gene in blast_dict:
|
84
|
+
if include_match_stats:
|
85
|
+
result_dict[gene] = (
|
86
|
+
f"{blast_dict[gene]['pident']}__{blast_dict[gene]['plen']}"
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
result_dict[gene] = "1"
|
90
|
+
else:
|
91
|
+
result_dict[gene] = "0"
|
92
|
+
return result_dict
|
93
|
+
|
94
|
+
else:
|
95
|
+
print(f"No blast output found at {blast_output_tsv}", file=sys.stderr)
|
96
|
+
|
97
|
+
|
98
|
+
def extract_allele_matches(
|
99
|
+
blast_output_tsv: Path, tsv_header: str, include_match_stats=False
|
100
|
+
) -> dict:
|
101
|
+
"""
|
102
|
+
Parse blast output tsv for best matching alleles
|
103
|
+
returns:
|
104
|
+
if include_match stats:
|
105
|
+
{ <gene_name_1>: <allele_number>__<pident>__<plen>, <gene_name_2>: <allele_number>__<pident>__<plen>, <gene_name_3>: <allele_number>__<pident>__<plen> ...}
|
106
|
+
else:
|
107
|
+
a dictionary (allele_dict) in the format { <gene_name_1>: <allele_number>, <gene_name_2>: <allele_number>, <gene_name_3>: <allele_number> ...}
|
108
|
+
|
109
|
+
"""
|
110
|
+
allele_dict = {}
|
111
|
+
detailed_dict = {}
|
112
|
+
if os.path.exists(blast_output_tsv):
|
113
|
+
blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
|
114
|
+
blast_df.columns = tsv_header.split(" ")
|
115
|
+
blast_df.set_index("qseqid", drop=False)
|
116
|
+
blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
|
117
|
+
blast_df[["gene", "allele"]] = blast_df["qseqid"].str.split("_", expand=True)
|
118
|
+
blast_df_unique = (
|
119
|
+
blast_df.sort_values(by=["bitscore"], ascending=False)
|
120
|
+
.groupby("gene")
|
121
|
+
.first()
|
122
|
+
)
|
123
|
+
for gene, d in blast_df_unique.to_dict(orient="index").items():
|
124
|
+
allele_dict[gene] = d["allele"]
|
125
|
+
detailed_dict[gene] = f"{d['allele']}__{d['pident']}__{d['plen']}"
|
126
|
+
else:
|
127
|
+
print(f"No blast output found at {blast_output_tsv}", file=sys.stderr)
|
128
|
+
|
129
|
+
if include_match_stats:
|
130
|
+
return detailed_dict
|
131
|
+
else:
|
132
|
+
return allele_dict
|
133
|
+
|
134
|
+
# %% ../nbs/11_blast_parser.ipynb 9
|
135
|
+
from fastcore.script import call_parse
|
136
|
+
|
137
|
+
|
138
|
+
@call_parse
|
139
|
+
def presence_absence(
|
140
|
+
blast_output: Path = None, # Path to blast output file. Generated with --outfmt 6 option
|
141
|
+
blast_tsv_header: str = "qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore", # headers in blast output
|
142
|
+
hits_as_string: bool = True, # True to print a comma separated list of found genes on a single line. False to return a key: value pair for each gene
|
143
|
+
include_match_stats: bool = False, # True to include percent identity and percent length in output, false to only include present/absent
|
144
|
+
percent_identityt: float = 90, # percent identity threshold for considering a gene present
|
145
|
+
percent_length: float = 60, # percent length threshold for considering a gene present
|
146
|
+
gene_names: list = None, # name of genes to look for when hits_as_string = False
|
147
|
+
output_file: Path = None,
|
148
|
+
config_file: str = None, # config file to set env vars from
|
149
|
+
) -> None:
|
150
|
+
""" """
|
151
|
+
# config = core.get_config(config_file) # Set env vars and get config variables
|
152
|
+
gene_presence_dict = extract_presence_absence(
|
153
|
+
blast_output_tsv=blast_output,
|
154
|
+
tsv_header=blast_tsv_header,
|
155
|
+
hits_as_string=hits_as_string,
|
156
|
+
include_match_stats=include_match_stats,
|
157
|
+
pident_threshold=percent_identityt,
|
158
|
+
plen_threshold=percent_length,
|
159
|
+
gene_names=gene_names,
|
160
|
+
)
|
161
|
+
|
162
|
+
|
163
|
+
@call_parse
|
164
|
+
def allele_matches(
|
165
|
+
blast_output: Path = None, # Path to blast output file. Generated with --outfmt 6 option
|
166
|
+
blast_tsv_header: str = "qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore", # headers in blast output
|
167
|
+
include_match_stats: bool = False, # True to include percent identity and percent length in output, false to only include allele number
|
168
|
+
output_file: Path = None,
|
169
|
+
config_file: str = None, # config file to set env vars from
|
170
|
+
) -> None:
|
171
|
+
""" """
|
172
|
+
# config = core.get_config(config_file) # Set env vars and get config variables
|
173
|
+
allele_dict = extract_allele_matches(
|
174
|
+
blast_output_tsv=blast_output,
|
175
|
+
tsv_header=blast_tsv_header,
|
176
|
+
include_match_stats=include_match_stats,
|
177
|
+
output_file=None,
|
178
|
+
)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# environmental (ENV) variables. These are written as SSI_ANALYSIS_RESULT_PARSERS_VARIABLENAME=VALUE to avoid conflicts with other ENV variables.
|
2
|
+
# Using the standard template these values can be overwritten by:
|
3
|
+
# - defining SSI_ANALYSIS_RESULT_PARSERS_CONFIG_FILE pointing to a similar file with a subset of values
|
4
|
+
# - setting the values as environmental variables.
|
5
|
+
# The priority goes env variables > config file > default file.
|
6
|
+
# The all configs other than config.default.env are in .gitignore
|
7
|
+
# All .env config files should have an associated .yaml config file with it which the program interacts with.
|
8
|
+
|
9
|
+
# NOTE: remember if referencing another ENV var as a variable it needs to be defined first
|
10
|
+
|
11
|
+
# If more structured variables are needed use config.default.yaml or another of your own creation
|
12
|
+
# This file path is stored as CORE_CONFIG_FILE when overriding
|
13
|
+
# It is commented out because of the default use case, but should be included for all non default cases.
|
14
|
+
# CORE_YAML_CONFIG_FILE=
|
15
|
+
CORE_PROJECT_VARIABLE_PREFIX=SSI_ANALYSIS_RESULT_PARSERS_
|
16
|
+
# For testing purposes
|
17
|
+
CORE_TEST_VAR="Test"
|
18
|
+
|
19
|
+
# Example variable please exchange with relevant variables
|
20
|
+
SSI_ANALYSIS_RESULT_PARSERS_INPUT_DIR=./input
|
21
|
+
SSI_ANALYSIS_RESULT_PARSERS_OUTPUT_DIR=./output
|
22
|
+
SSI_ANALYSIS_RESULT_PARSERS_OUTPUT_FILE=${SSI_ANALYSIS_RESULT_PARSERS_OUTPUT_DIR}/output.txt
|
23
|
+
SSI_ANALYSIS_RESULT_PARSERS_USER_INPUT_NAME=Kim
|
24
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# When accessing this in the code you'll work with it as a dict.
|
2
|
+
# ENV variables will be replaced with their values. This is done with the envyaml package that is in the code template `load_config`.
|
3
|
+
# By convention all variables for the project should have the SSI_ANALYSIS_RESULT_PARSERS_* prefix.
|
4
|
+
# e.g
|
5
|
+
# name: ${SSI_ANALYSIS_RESULT_PARSERS_NAME}
|
6
|
+
example:
|
7
|
+
input:
|
8
|
+
name: ${SSI_ANALYSIS_RESULT_PARSERS_USER_INPUT_NAME}
|
9
|
+
alternative_name: Lee
|