ssi-analysis-result-parsers 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssi_analysis_result_parsers/Legionella_parser.py +22 -9
- ssi_analysis_result_parsers/__init__.py +1 -1
- ssi_analysis_result_parsers/blast_parser.py +21 -11
- {ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/METADATA +1 -1
- {ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/RECORD +11 -10
- test_input/Legionella/batch_parser_file_paths.tsv +2 -0
- test_input/empty_file.txt +0 -0
- {ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/WHEEL +0 -0
- {ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/entry_points.txt +0 -0
- {ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/licenses/LICENSE +0 -0
- {ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/top_level.txt +0 -0
@@ -25,8 +25,10 @@ import json # for nicely printing json and yaml
|
|
25
25
|
# import functions from core module (optional, but most likely needed).
|
26
26
|
from ssi_analysis_result_parsers import (
|
27
27
|
core,
|
28
|
+
blast_parser,
|
28
29
|
)
|
29
|
-
|
30
|
+
|
31
|
+
# from ssi_analysis_result_parsers.blast_parser import extract_presence_absence
|
30
32
|
|
31
33
|
# Project specific libraries
|
32
34
|
from pathlib import Path
|
@@ -39,11 +41,18 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
|
|
39
41
|
Returns dictionary of results found in the Legionella SBT summary output
|
40
42
|
"""
|
41
43
|
if os.path.exists(legionella_sbt_results_tsv):
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
44
|
+
try:
|
45
|
+
df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
|
46
|
+
df.set_index("sample", inplace=True, drop=True)
|
47
|
+
d = df.to_dict(orient="index")
|
48
|
+
fname = next(iter(d))
|
49
|
+
return d[fname]
|
50
|
+
except pandas.errors.EmptyDataError:
|
51
|
+
print(
|
52
|
+
f"No Legionella SBT output empty at {legionella_sbt_results_tsv}",
|
53
|
+
file=sys.stderr,
|
54
|
+
)
|
55
|
+
return None
|
47
56
|
else:
|
48
57
|
print(
|
49
58
|
f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
|
@@ -56,7 +65,7 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
|
|
56
65
|
sbt_results_dict = extract_legionella_sbt(
|
57
66
|
legionella_sbt_results_tsv=legionella_sbt_results_tsv
|
58
67
|
)
|
59
|
-
lag1_blast_dict = extract_presence_absence(
|
68
|
+
lag1_blast_dict = blast_parser.extract_presence_absence(
|
60
69
|
blast_output_tsv=lag1_blast_tsv,
|
61
70
|
hits_as_string=False,
|
62
71
|
include_match_stats=False,
|
@@ -65,6 +74,8 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
|
|
65
74
|
results_dict = core.update_results_dict(
|
66
75
|
sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
|
67
76
|
)
|
77
|
+
if results_dict is None:
|
78
|
+
return {}
|
68
79
|
return results_dict
|
69
80
|
|
70
81
|
|
@@ -133,22 +144,24 @@ class LegionellaResults(core.PipelineResults):
|
|
133
144
|
results_dict[sample_name] = legionella_results
|
134
145
|
return cls(results_dict)"""
|
135
146
|
|
147
|
+
@staticmethod
|
136
148
|
def legionella_summary(
|
137
149
|
legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
|
138
150
|
) -> dict:
|
139
151
|
sbt_results_dict = extract_legionella_sbt(
|
140
152
|
legionella_sbt_results_tsv=legionella_sbt_results_tsv
|
141
153
|
)
|
142
|
-
lag1_blast_dict = extract_presence_absence(
|
154
|
+
lag1_blast_dict = blast_parser.extract_presence_absence(
|
143
155
|
blast_output_tsv=lag1_blast_tsv,
|
144
156
|
hits_as_string=False,
|
145
157
|
include_match_stats=False,
|
146
158
|
gene_names=["lag-1"],
|
147
159
|
)
|
148
|
-
print(lag1_blast_dict)
|
149
160
|
results_dict = core.update_results_dict(
|
150
161
|
sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
|
151
162
|
)
|
163
|
+
if results_dict is None:
|
164
|
+
return {}
|
152
165
|
return results_dict
|
153
166
|
|
154
167
|
def __repr__(self):
|
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.0.
|
1
|
+
__version__ = "0.0.5"
|
@@ -51,21 +51,31 @@ def extract_presence_absence(
|
|
51
51
|
if os.path.exists(blast_output_tsv):
|
52
52
|
try:
|
53
53
|
blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
|
54
|
+
header_list = tsv_header.split(" ")
|
55
|
+
if len(header_list) == len(blast_df.columns):
|
56
|
+
blast_df.columns = tsv_header.split(" ")
|
57
|
+
blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
|
58
|
+
blast_df_unique = (
|
59
|
+
blast_df.sort_values(by=["bitscore"], ascending=False)
|
60
|
+
.groupby("qseqid")
|
61
|
+
.first()
|
62
|
+
)
|
63
|
+
blast_df_filtered = blast_df_unique.query(
|
64
|
+
"plen > @plen_threshold and pident > @pident_threshold"
|
65
|
+
)
|
66
|
+
blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
|
67
|
+
else:
|
68
|
+
print(
|
69
|
+
f"Failed to parse {blast_output_tsv}. Number of columns do not match length of provided header string",
|
70
|
+
file=sys.stderr,
|
71
|
+
)
|
72
|
+
return None
|
54
73
|
|
55
|
-
blast_df.columns = tsv_header.split(" ")
|
56
|
-
blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
|
57
|
-
blast_df_unique = (
|
58
|
-
blast_df.sort_values(by=["bitscore"], ascending=False)
|
59
|
-
.groupby("qseqid")
|
60
|
-
.first()
|
61
|
-
)
|
62
|
-
blast_df_filtered = blast_df_unique.query(
|
63
|
-
"plen > @plen_threshold and pident > @pident_threshold"
|
64
|
-
)
|
65
|
-
blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
|
66
74
|
except pandas.errors.EmptyDataError:
|
67
75
|
blast_dict = {}
|
68
76
|
print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
|
77
|
+
except Exception as e:
|
78
|
+
print(f"Error parsing blast: e")
|
69
79
|
if hits_as_string:
|
70
80
|
|
71
81
|
results = []
|
{ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/RECORD
RENAMED
@@ -1,15 +1,16 @@
|
|
1
|
-
ssi_analysis_result_parsers/Legionella_parser.py,sha256=
|
2
|
-
ssi_analysis_result_parsers/__init__.py,sha256=
|
1
|
+
ssi_analysis_result_parsers/Legionella_parser.py,sha256=8n15TEiOY1OxAe4VePVzaXz7WFen6U1y3pM5Vsjsg7U,9292
|
2
|
+
ssi_analysis_result_parsers/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
|
3
3
|
ssi_analysis_result_parsers/_modidx.py,sha256=JY_GM0tMojzTtX9O4D8as4k5a-sXqkxkb7ZUEPzhuMk,12232
|
4
|
-
ssi_analysis_result_parsers/blast_parser.py,sha256=
|
4
|
+
ssi_analysis_result_parsers/blast_parser.py,sha256=EBqWlx8bDlaSzqAZomiUGnT2DGaaA-L7ukny7SEJbpk,7915
|
5
5
|
ssi_analysis_result_parsers/core.py,sha256=6TGURv8spPdBpwKv6LvqvbVzJChdeHwsG3WQ6QLUuvE,12124
|
6
6
|
ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
|
7
7
|
ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
|
8
8
|
ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
|
9
9
|
ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
|
10
|
-
ssi_analysis_result_parsers-0.0.
|
10
|
+
ssi_analysis_result_parsers-0.0.5.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
|
11
11
|
test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
|
12
|
-
test_input/
|
12
|
+
test_input/empty_file.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
test_input/Legionella/batch_parser_file_paths.tsv,sha256=AikBS_Ez1xO3UrEQ19AY3z6drBDdMAiSGK66NLeyYj4,356
|
13
14
|
test_input/Legionella/lag-1_blast.tsv,sha256=MN5QL_iBn9gQ8VTYEcTnT0JwKgpkD8G15-QFOrSWxkU,1133
|
14
15
|
test_input/Legionella/lag-1_blast_2.tsv,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
16
|
test_input/Legionella/test.sbt.tsv,sha256=ibhaH3is2dxHaABPvR2QM2HAq9bKOs1AwOTmrwSrcd8,168
|
@@ -20,8 +21,8 @@ test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDh
|
|
20
21
|
test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
|
21
22
|
test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
22
23
|
test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
23
|
-
ssi_analysis_result_parsers-0.0.
|
24
|
-
ssi_analysis_result_parsers-0.0.
|
25
|
-
ssi_analysis_result_parsers-0.0.
|
26
|
-
ssi_analysis_result_parsers-0.0.
|
27
|
-
ssi_analysis_result_parsers-0.0.
|
24
|
+
ssi_analysis_result_parsers-0.0.5.dist-info/METADATA,sha256=BF-cuY_EJow8haoGw99WeGWAf_zWWQdWQ4OFu42NtcM,2765
|
25
|
+
ssi_analysis_result_parsers-0.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
26
|
+
ssi_analysis_result_parsers-0.0.5.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
|
27
|
+
ssi_analysis_result_parsers-0.0.5.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
|
28
|
+
ssi_analysis_result_parsers-0.0.5.dist-info/RECORD,,
|
@@ -1,3 +1,5 @@
|
|
1
1
|
sample_name sbt_results lag1_blast_results
|
2
2
|
sample_1 test_input/Legionella/test.sbt.tsv test_input/Legionella/lag-1_blast.tsv
|
3
3
|
sample_2 test_input/Legionella/test2.sbt.tsv test_input/Legionella/lag-1_blast_2.tsv
|
4
|
+
sample_3 test_input/Legionella/test2.sbt.tsv test_input/empty_file.txt
|
5
|
+
sample_4 test_input/empty_file.txt test_input/Legionella/lag-1_blast_2.tsv
|
File without changes
|
{ssi_analysis_result_parsers-0.0.3.dist-info → ssi_analysis_result_parsers-0.0.5.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|