ssi-analysis-result-parsers 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,10 @@ import json # for nicely printing json and yaml
25
25
  # import functions from core module (optional, but most likely needed).
26
26
  from ssi_analysis_result_parsers import (
27
27
  core,
28
+ blast_parser,
28
29
  )
29
- from .blast_parser import extract_presence_absence
30
+
31
+ # from ssi_analysis_result_parsers.blast_parser import extract_presence_absence
30
32
 
31
33
  # Project specific libraries
32
34
  from pathlib import Path
@@ -39,11 +41,18 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
39
41
  Returns dictionary of results found in the Legionella SBT summary output
40
42
  """
41
43
  if os.path.exists(legionella_sbt_results_tsv):
42
- df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
43
- df.set_index("sample", inplace=True, drop=True)
44
- d = df.to_dict(orient="index")
45
- fname = next(iter(d))
46
- return d[fname]
44
+ try:
45
+ df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
46
+ df.set_index("sample", inplace=True, drop=True)
47
+ d = df.to_dict(orient="index")
48
+ fname = next(iter(d))
49
+ return d[fname]
50
+ except pandas.errors.EmptyDataError:
51
+ print(
52
+ f"No Legionella SBT output empty at {legionella_sbt_results_tsv}",
53
+ file=sys.stderr,
54
+ )
55
+ return None
47
56
  else:
48
57
  print(
49
58
  f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
@@ -56,7 +65,7 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
56
65
  sbt_results_dict = extract_legionella_sbt(
57
66
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
58
67
  )
59
- lag1_blast_dict = extract_presence_absence(
68
+ lag1_blast_dict = blast_parser.extract_presence_absence(
60
69
  blast_output_tsv=lag1_blast_tsv,
61
70
  hits_as_string=False,
62
71
  include_match_stats=False,
@@ -135,13 +144,14 @@ class LegionellaResults(core.PipelineResults):
135
144
  results_dict[sample_name] = legionella_results
136
145
  return cls(results_dict)"""
137
146
 
147
+ @staticmethod
138
148
  def legionella_summary(
139
149
  legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
140
150
  ) -> dict:
141
151
  sbt_results_dict = extract_legionella_sbt(
142
152
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
143
153
  )
144
- lag1_blast_dict = extract_presence_absence(
154
+ lag1_blast_dict = blast_parser.extract_presence_absence(
145
155
  blast_output_tsv=lag1_blast_tsv,
146
156
  hits_as_string=False,
147
157
  include_match_stats=False,
@@ -150,6 +160,8 @@ class LegionellaResults(core.PipelineResults):
150
160
  results_dict = core.update_results_dict(
151
161
  sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
152
162
  )
163
+ if results_dict is None:
164
+ return {}
153
165
  return results_dict
154
166
 
155
167
  def __repr__(self):
@@ -1 +1 @@
1
- __version__ = "0.0.3"
1
+ __version__ = "0.0.5"
@@ -51,21 +51,31 @@ def extract_presence_absence(
51
51
  if os.path.exists(blast_output_tsv):
52
52
  try:
53
53
  blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
54
+ header_list = tsv_header.split(" ")
55
+ if len(header_list) == len(blast_df.columns):
56
+ blast_df.columns = tsv_header.split(" ")
57
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
58
+ blast_df_unique = (
59
+ blast_df.sort_values(by=["bitscore"], ascending=False)
60
+ .groupby("qseqid")
61
+ .first()
62
+ )
63
+ blast_df_filtered = blast_df_unique.query(
64
+ "plen > @plen_threshold and pident > @pident_threshold"
65
+ )
66
+ blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
67
+ else:
68
+ print(
69
+ f"Failed to parse {blast_output_tsv}. Number of columns do not match length of provided header string",
70
+ file=sys.stderr,
71
+ )
72
+ return None
54
73
 
55
- blast_df.columns = tsv_header.split(" ")
56
- blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
57
- blast_df_unique = (
58
- blast_df.sort_values(by=["bitscore"], ascending=False)
59
- .groupby("qseqid")
60
- .first()
61
- )
62
- blast_df_filtered = blast_df_unique.query(
63
- "plen > @plen_threshold and pident > @pident_threshold"
64
- )
65
- blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
66
74
  except pandas.errors.EmptyDataError:
67
75
  blast_dict = {}
68
76
  print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
77
+ except Exception as e:
78
+ print(f"Error parsing blast: e")
69
79
  if hits_as_string:
70
80
 
71
81
  results = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -1,15 +1,16 @@
1
- ssi_analysis_result_parsers/Legionella_parser.py,sha256=CP5r1RriVd4zxeTBokLJYcu5iS6xbK3pBzI6xwITSm0,8894
2
- ssi_analysis_result_parsers/__init__.py,sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c,22
1
+ ssi_analysis_result_parsers/Legionella_parser.py,sha256=8n15TEiOY1OxAe4VePVzaXz7WFen6U1y3pM5Vsjsg7U,9292
2
+ ssi_analysis_result_parsers/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
3
3
  ssi_analysis_result_parsers/_modidx.py,sha256=JY_GM0tMojzTtX9O4D8as4k5a-sXqkxkb7ZUEPzhuMk,12232
4
- ssi_analysis_result_parsers/blast_parser.py,sha256=L7EdW2LUwSS2OQO7WZUAxP6whJXdvTILE2a3O59uv-s,7441
4
+ ssi_analysis_result_parsers/blast_parser.py,sha256=EBqWlx8bDlaSzqAZomiUGnT2DGaaA-L7ukny7SEJbpk,7915
5
5
  ssi_analysis_result_parsers/core.py,sha256=6TGURv8spPdBpwKv6LvqvbVzJChdeHwsG3WQ6QLUuvE,12124
6
6
  ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
7
7
  ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
8
8
  ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
9
9
  ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
10
- ssi_analysis_result_parsers-0.0.4.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
10
+ ssi_analysis_result_parsers-0.0.5.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
11
11
  test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
12
- test_input/Legionella/batch_parser_file_paths.tsv,sha256=zls11lmEA5U89d8RsX6PR8M1zXNVimeL4raqdZ3ijvQ,210
12
+ test_input/empty_file.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ test_input/Legionella/batch_parser_file_paths.tsv,sha256=AikBS_Ez1xO3UrEQ19AY3z6drBDdMAiSGK66NLeyYj4,356
13
14
  test_input/Legionella/lag-1_blast.tsv,sha256=MN5QL_iBn9gQ8VTYEcTnT0JwKgpkD8G15-QFOrSWxkU,1133
14
15
  test_input/Legionella/lag-1_blast_2.tsv,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
16
  test_input/Legionella/test.sbt.tsv,sha256=ibhaH3is2dxHaABPvR2QM2HAq9bKOs1AwOTmrwSrcd8,168
@@ -20,8 +21,8 @@ test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDh
20
21
  test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
21
22
  test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
22
23
  test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
23
- ssi_analysis_result_parsers-0.0.4.dist-info/METADATA,sha256=r6IJQQ7JgRD_--UlM80lNMH5ZzlxYQCmvaVBIyPhF7k,2765
24
- ssi_analysis_result_parsers-0.0.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
25
- ssi_analysis_result_parsers-0.0.4.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
26
- ssi_analysis_result_parsers-0.0.4.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
27
- ssi_analysis_result_parsers-0.0.4.dist-info/RECORD,,
24
+ ssi_analysis_result_parsers-0.0.5.dist-info/METADATA,sha256=BF-cuY_EJow8haoGw99WeGWAf_zWWQdWQ4OFu42NtcM,2765
25
+ ssi_analysis_result_parsers-0.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
26
+ ssi_analysis_result_parsers-0.0.5.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
27
+ ssi_analysis_result_parsers-0.0.5.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
28
+ ssi_analysis_result_parsers-0.0.5.dist-info/RECORD,,
@@ -1,3 +1,5 @@
1
1
  sample_name sbt_results lag1_blast_results
2
2
  sample_1 test_input/Legionella/test.sbt.tsv test_input/Legionella/lag-1_blast.tsv
3
3
  sample_2 test_input/Legionella/test2.sbt.tsv test_input/Legionella/lag-1_blast_2.tsv
4
+ sample_3 test_input/Legionella/test2.sbt.tsv test_input/empty_file.txt
5
+ sample_4 test_input/empty_file.txt test_input/Legionella/lag-1_blast_2.tsv
File without changes