ssi-analysis-result-parsers 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {ssi_analysis_result_parsers-0.0.3/ssi_analysis_result_parsers.egg-info → ssi_analysis_result_parsers-0.0.5}/PKG-INFO +1 -1
  2. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/settings.ini +1 -1
  3. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/Legionella_parser.py +22 -9
  4. ssi_analysis_result_parsers-0.0.5/ssi_analysis_result_parsers/__init__.py +1 -0
  5. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/blast_parser.py +21 -11
  6. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5/ssi_analysis_result_parsers.egg-info}/PKG-INFO +1 -1
  7. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/SOURCES.txt +1 -0
  8. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/batch_parser_file_paths.tsv +2 -0
  9. ssi_analysis_result_parsers-0.0.5/test_input/empty_file.txt +0 -0
  10. ssi_analysis_result_parsers-0.0.3/ssi_analysis_result_parsers/__init__.py +0 -1
  11. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/LICENSE +0 -0
  12. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/MANIFEST.in +0 -0
  13. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/README.md +0 -0
  14. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/pyproject.toml +0 -0
  15. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/setup.cfg +0 -0
  16. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/setup.py +0 -0
  17. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/_modidx.py +0 -0
  18. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/config/config.default.env +0 -0
  19. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/config/config.default.yaml +0 -0
  20. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/core.py +0 -0
  21. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/hello_world.py +0 -0
  22. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/some_string.py +0 -0
  23. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/dependency_links.txt +0 -0
  24. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/entry_points.txt +0 -0
  25. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/not-zip-safe +0 -0
  26. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/requires.txt +0 -0
  27. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/top_level.txt +0 -0
  28. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/.DS_Store +0 -0
  29. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/lag-1_blast.tsv +0 -0
  30. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/lag-1_blast_2.tsv +0 -0
  31. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/test.sbt.tsv +0 -0
  32. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/test2.sbt.tsv +0 -0
  33. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/blast_parser/allele_matches_test.tsv +0 -0
  34. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/blast_parser/empty_gene_presence_absense_test.tsv +0 -0
  35. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_input/blast_parser/gene_presence_absence_test.tsv +0 -0
  36. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_output/output_with_sample_name.tsv +0 -0
  37. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_output/test.tsv +0 -0
  38. {ssi_analysis_result_parsers-0.0.3 → ssi_analysis_result_parsers-0.0.5}/test_output/test_batch_output.tsv +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -5,7 +5,7 @@
5
5
  ### Python library ###
6
6
  repo = ssi_analysis_result_parsers
7
7
  lib_name = %(repo)s
8
- version = 0.0.3
8
+ version = 0.0.5
9
9
  min_python = 3.9
10
10
  license = MIT
11
11
  black_formatting = True
@@ -25,8 +25,10 @@ import json # for nicely printing json and yaml
25
25
  # import functions from core module (optional, but most likely needed).
26
26
  from ssi_analysis_result_parsers import (
27
27
  core,
28
+ blast_parser,
28
29
  )
29
- from .blast_parser import extract_presence_absence
30
+
31
+ # from ssi_analysis_result_parsers.blast_parser import extract_presence_absence
30
32
 
31
33
  # Project specific libraries
32
34
  from pathlib import Path
@@ -39,11 +41,18 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
39
41
  Returns dictionary of results found in the Legionella SBT summary output
40
42
  """
41
43
  if os.path.exists(legionella_sbt_results_tsv):
42
- df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
43
- df.set_index("sample", inplace=True, drop=True)
44
- d = df.to_dict(orient="index")
45
- fname = next(iter(d))
46
- return d[fname]
44
+ try:
45
+ df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
46
+ df.set_index("sample", inplace=True, drop=True)
47
+ d = df.to_dict(orient="index")
48
+ fname = next(iter(d))
49
+ return d[fname]
50
+ except pandas.errors.EmptyDataError:
51
+ print(
52
+ f"No Legionella SBT output empty at {legionella_sbt_results_tsv}",
53
+ file=sys.stderr,
54
+ )
55
+ return None
47
56
  else:
48
57
  print(
49
58
  f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
@@ -56,7 +65,7 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
56
65
  sbt_results_dict = extract_legionella_sbt(
57
66
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
58
67
  )
59
- lag1_blast_dict = extract_presence_absence(
68
+ lag1_blast_dict = blast_parser.extract_presence_absence(
60
69
  blast_output_tsv=lag1_blast_tsv,
61
70
  hits_as_string=False,
62
71
  include_match_stats=False,
@@ -65,6 +74,8 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
65
74
  results_dict = core.update_results_dict(
66
75
  sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
67
76
  )
77
+ if results_dict is None:
78
+ return {}
68
79
  return results_dict
69
80
 
70
81
 
@@ -133,22 +144,24 @@ class LegionellaResults(core.PipelineResults):
133
144
  results_dict[sample_name] = legionella_results
134
145
  return cls(results_dict)"""
135
146
 
147
+ @staticmethod
136
148
  def legionella_summary(
137
149
  legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
138
150
  ) -> dict:
139
151
  sbt_results_dict = extract_legionella_sbt(
140
152
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
141
153
  )
142
- lag1_blast_dict = extract_presence_absence(
154
+ lag1_blast_dict = blast_parser.extract_presence_absence(
143
155
  blast_output_tsv=lag1_blast_tsv,
144
156
  hits_as_string=False,
145
157
  include_match_stats=False,
146
158
  gene_names=["lag-1"],
147
159
  )
148
- print(lag1_blast_dict)
149
160
  results_dict = core.update_results_dict(
150
161
  sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
151
162
  )
163
+ if results_dict is None:
164
+ return {}
152
165
  return results_dict
153
166
 
154
167
  def __repr__(self):
@@ -0,0 +1 @@
1
+ __version__ = "0.0.5"
@@ -51,21 +51,31 @@ def extract_presence_absence(
51
51
  if os.path.exists(blast_output_tsv):
52
52
  try:
53
53
  blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
54
+ header_list = tsv_header.split(" ")
55
+ if len(header_list) == len(blast_df.columns):
56
+ blast_df.columns = tsv_header.split(" ")
57
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
58
+ blast_df_unique = (
59
+ blast_df.sort_values(by=["bitscore"], ascending=False)
60
+ .groupby("qseqid")
61
+ .first()
62
+ )
63
+ blast_df_filtered = blast_df_unique.query(
64
+ "plen > @plen_threshold and pident > @pident_threshold"
65
+ )
66
+ blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
67
+ else:
68
+ print(
69
+ f"Failed to parse {blast_output_tsv}. Number of columns do not match length of provided header string",
70
+ file=sys.stderr,
71
+ )
72
+ return None
54
73
 
55
- blast_df.columns = tsv_header.split(" ")
56
- blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
57
- blast_df_unique = (
58
- blast_df.sort_values(by=["bitscore"], ascending=False)
59
- .groupby("qseqid")
60
- .first()
61
- )
62
- blast_df_filtered = blast_df_unique.query(
63
- "plen > @plen_threshold and pident > @pident_threshold"
64
- )
65
- blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
66
74
  except pandas.errors.EmptyDataError:
67
75
  blast_dict = {}
68
76
  print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
77
+ except Exception as e:
78
+ print(f"Error parsing blast: e")
69
79
  if hits_as_string:
70
80
 
71
81
  results = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -21,6 +21,7 @@ ssi_analysis_result_parsers.egg-info/top_level.txt
21
21
  ssi_analysis_result_parsers/config/config.default.env
22
22
  ssi_analysis_result_parsers/config/config.default.yaml
23
23
  test_input/.DS_Store
24
+ test_input/empty_file.txt
24
25
  test_input/Legionella/batch_parser_file_paths.tsv
25
26
  test_input/Legionella/lag-1_blast.tsv
26
27
  test_input/Legionella/lag-1_blast_2.tsv
@@ -1,3 +1,5 @@
1
1
  sample_name sbt_results lag1_blast_results
2
2
  sample_1 test_input/Legionella/test.sbt.tsv test_input/Legionella/lag-1_blast.tsv
3
3
  sample_2 test_input/Legionella/test2.sbt.tsv test_input/Legionella/lag-1_blast_2.tsv
4
+ sample_3 test_input/Legionella/test2.sbt.tsv test_input/empty_file.txt
5
+ sample_4 test_input/empty_file.txt test_input/Legionella/lag-1_blast_2.tsv
@@ -1 +0,0 @@
1
- __version__ = "0.0.2"