ssi-analysis-result-parsers 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,10 @@ import json # for nicely printing json and yaml
25
25
  # import functions from core module (optional, but most likely needed).
26
26
  from ssi_analysis_result_parsers import (
27
27
  core,
28
+ blast_parser,
28
29
  )
29
- from .blast_parser import extract_presence_absence
30
+
31
+ # from ssi_analysis_result_parsers.blast_parser import extract_presence_absence
30
32
 
31
33
  # Project specific libraries
32
34
  from pathlib import Path
@@ -39,11 +41,18 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
39
41
  Returns dictionary of results found in the Legionella SBT summary output
40
42
  """
41
43
  if os.path.exists(legionella_sbt_results_tsv):
42
- df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
43
- df.set_index("sample", inplace=True, drop=True)
44
- d = df.to_dict(orient="index")
45
- fname = next(iter(d))
46
- return d[fname]
44
+ try:
45
+ df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
46
+ df.set_index("sample", inplace=True, drop=True)
47
+ d = df.to_dict(orient="index")
48
+ fname = next(iter(d))
49
+ return d[fname]
50
+ except pandas.errors.EmptyDataError:
51
+ print(
52
+ f"No Legionella SBT output empty at {legionella_sbt_results_tsv}",
53
+ file=sys.stderr,
54
+ )
55
+ return None
47
56
  else:
48
57
  print(
49
58
  f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
@@ -56,7 +65,7 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
56
65
  sbt_results_dict = extract_legionella_sbt(
57
66
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
58
67
  )
59
- lag1_blast_dict = extract_presence_absence(
68
+ lag1_blast_dict = blast_parser.extract_presence_absence(
60
69
  blast_output_tsv=lag1_blast_tsv,
61
70
  hits_as_string=False,
62
71
  include_match_stats=False,
@@ -65,6 +74,8 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
65
74
  results_dict = core.update_results_dict(
66
75
  sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
67
76
  )
77
+ if results_dict is None:
78
+ return {}
68
79
  return results_dict
69
80
 
70
81
 
@@ -133,22 +144,24 @@ class LegionellaResults(core.PipelineResults):
133
144
  results_dict[sample_name] = legionella_results
134
145
  return cls(results_dict)"""
135
146
 
147
+ @staticmethod
136
148
  def legionella_summary(
137
149
  legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
138
150
  ) -> dict:
139
151
  sbt_results_dict = extract_legionella_sbt(
140
152
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
141
153
  )
142
- lag1_blast_dict = extract_presence_absence(
154
+ lag1_blast_dict = blast_parser.extract_presence_absence(
143
155
  blast_output_tsv=lag1_blast_tsv,
144
156
  hits_as_string=False,
145
157
  include_match_stats=False,
146
158
  gene_names=["lag-1"],
147
159
  )
148
- print(lag1_blast_dict)
149
160
  results_dict = core.update_results_dict(
150
161
  sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
151
162
  )
163
+ if results_dict is None:
164
+ return {}
152
165
  return results_dict
153
166
 
154
167
  def __repr__(self):
@@ -1 +1 @@
1
- __version__ = "0.0.2"
1
+ __version__ = "0.0.5"
@@ -51,21 +51,31 @@ def extract_presence_absence(
51
51
  if os.path.exists(blast_output_tsv):
52
52
  try:
53
53
  blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
54
+ header_list = tsv_header.split(" ")
55
+ if len(header_list) == len(blast_df.columns):
56
+ blast_df.columns = tsv_header.split(" ")
57
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
58
+ blast_df_unique = (
59
+ blast_df.sort_values(by=["bitscore"], ascending=False)
60
+ .groupby("qseqid")
61
+ .first()
62
+ )
63
+ blast_df_filtered = blast_df_unique.query(
64
+ "plen > @plen_threshold and pident > @pident_threshold"
65
+ )
66
+ blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
67
+ else:
68
+ print(
69
+ f"Failed to parse {blast_output_tsv}. Number of columns do not match length of provided header string",
70
+ file=sys.stderr,
71
+ )
72
+ return None
54
73
 
55
- blast_df.columns = tsv_header.split(" ")
56
- blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
57
- blast_df_unique = (
58
- blast_df.sort_values(by=["bitscore"], ascending=False)
59
- .groupby("qseqid")
60
- .first()
61
- )
62
- blast_df_filtered = blast_df_unique.query(
63
- "plen > @plen_threshold and pident > @pident_threshold"
64
- )
65
- blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
66
74
  except pandas.errors.EmptyDataError:
67
75
  blast_dict = {}
68
76
  print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
77
+ except Exception as e:
78
+ print(f"Error parsing blast: e")
69
79
  if hits_as_string:
70
80
 
71
81
  results = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -1,15 +1,16 @@
1
- ssi_analysis_result_parsers/Legionella_parser.py,sha256=zBsSYfuUj2uN_LRqgTpOwhFUQF2jTLNvVzKbh6OPNxE,8878
2
- ssi_analysis_result_parsers/__init__.py,sha256=QvlVh4JTl3JL7jQAja76yKtT-IvF4631ASjWY1wS6AQ,22
1
+ ssi_analysis_result_parsers/Legionella_parser.py,sha256=8n15TEiOY1OxAe4VePVzaXz7WFen6U1y3pM5Vsjsg7U,9292
2
+ ssi_analysis_result_parsers/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
3
3
  ssi_analysis_result_parsers/_modidx.py,sha256=JY_GM0tMojzTtX9O4D8as4k5a-sXqkxkb7ZUEPzhuMk,12232
4
- ssi_analysis_result_parsers/blast_parser.py,sha256=L7EdW2LUwSS2OQO7WZUAxP6whJXdvTILE2a3O59uv-s,7441
4
+ ssi_analysis_result_parsers/blast_parser.py,sha256=EBqWlx8bDlaSzqAZomiUGnT2DGaaA-L7ukny7SEJbpk,7915
5
5
  ssi_analysis_result_parsers/core.py,sha256=6TGURv8spPdBpwKv6LvqvbVzJChdeHwsG3WQ6QLUuvE,12124
6
6
  ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
7
7
  ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
8
8
  ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
9
9
  ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
10
- ssi_analysis_result_parsers-0.0.3.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
10
+ ssi_analysis_result_parsers-0.0.5.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
11
11
  test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
12
- test_input/Legionella/batch_parser_file_paths.tsv,sha256=zls11lmEA5U89d8RsX6PR8M1zXNVimeL4raqdZ3ijvQ,210
12
+ test_input/empty_file.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ test_input/Legionella/batch_parser_file_paths.tsv,sha256=AikBS_Ez1xO3UrEQ19AY3z6drBDdMAiSGK66NLeyYj4,356
13
14
  test_input/Legionella/lag-1_blast.tsv,sha256=MN5QL_iBn9gQ8VTYEcTnT0JwKgpkD8G15-QFOrSWxkU,1133
14
15
  test_input/Legionella/lag-1_blast_2.tsv,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
16
  test_input/Legionella/test.sbt.tsv,sha256=ibhaH3is2dxHaABPvR2QM2HAq9bKOs1AwOTmrwSrcd8,168
@@ -20,8 +21,8 @@ test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDh
20
21
  test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
21
22
  test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
22
23
  test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
23
- ssi_analysis_result_parsers-0.0.3.dist-info/METADATA,sha256=LPIINEBm-fhPx_sG1lS1xLf2ZIWtHXU97hFFW2M58fE,2765
24
- ssi_analysis_result_parsers-0.0.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
25
- ssi_analysis_result_parsers-0.0.3.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
26
- ssi_analysis_result_parsers-0.0.3.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
27
- ssi_analysis_result_parsers-0.0.3.dist-info/RECORD,,
24
+ ssi_analysis_result_parsers-0.0.5.dist-info/METADATA,sha256=BF-cuY_EJow8haoGw99WeGWAf_zWWQdWQ4OFu42NtcM,2765
25
+ ssi_analysis_result_parsers-0.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
26
+ ssi_analysis_result_parsers-0.0.5.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
27
+ ssi_analysis_result_parsers-0.0.5.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
28
+ ssi_analysis_result_parsers-0.0.5.dist-info/RECORD,,
@@ -1,3 +1,5 @@
1
1
  sample_name sbt_results lag1_blast_results
2
2
  sample_1 test_input/Legionella/test.sbt.tsv test_input/Legionella/lag-1_blast.tsv
3
3
  sample_2 test_input/Legionella/test2.sbt.tsv test_input/Legionella/lag-1_blast_2.tsv
4
+ sample_3 test_input/Legionella/test2.sbt.tsv test_input/empty_file.txt
5
+ sample_4 test_input/empty_file.txt test_input/Legionella/lag-1_blast_2.tsv
File without changes