ssi-analysis-result-parsers 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {ssi_analysis_result_parsers-0.0.4/ssi_analysis_result_parsers.egg-info → ssi_analysis_result_parsers-0.0.5}/PKG-INFO +1 -1
  2. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/settings.ini +1 -1
  3. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/Legionella_parser.py +20 -8
  4. ssi_analysis_result_parsers-0.0.5/ssi_analysis_result_parsers/__init__.py +1 -0
  5. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/blast_parser.py +21 -11
  6. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5/ssi_analysis_result_parsers.egg-info}/PKG-INFO +1 -1
  7. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/SOURCES.txt +1 -0
  8. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/batch_parser_file_paths.tsv +2 -0
  9. ssi_analysis_result_parsers-0.0.5/test_input/empty_file.txt +0 -0
  10. ssi_analysis_result_parsers-0.0.4/ssi_analysis_result_parsers/__init__.py +0 -1
  11. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/LICENSE +0 -0
  12. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/MANIFEST.in +0 -0
  13. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/README.md +0 -0
  14. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/pyproject.toml +0 -0
  15. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/setup.cfg +0 -0
  16. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/setup.py +0 -0
  17. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/_modidx.py +0 -0
  18. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/config/config.default.env +0 -0
  19. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/config/config.default.yaml +0 -0
  20. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/core.py +0 -0
  21. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/hello_world.py +0 -0
  22. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers/some_string.py +0 -0
  23. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/dependency_links.txt +0 -0
  24. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/entry_points.txt +0 -0
  25. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/not-zip-safe +0 -0
  26. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/requires.txt +0 -0
  27. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/ssi_analysis_result_parsers.egg-info/top_level.txt +0 -0
  28. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/.DS_Store +0 -0
  29. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/lag-1_blast.tsv +0 -0
  30. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/lag-1_blast_2.tsv +0 -0
  31. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/test.sbt.tsv +0 -0
  32. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/Legionella/test2.sbt.tsv +0 -0
  33. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/blast_parser/allele_matches_test.tsv +0 -0
  34. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/blast_parser/empty_gene_presence_absense_test.tsv +0 -0
  35. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_input/blast_parser/gene_presence_absence_test.tsv +0 -0
  36. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_output/output_with_sample_name.tsv +0 -0
  37. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_output/test.tsv +0 -0
  38. {ssi_analysis_result_parsers-0.0.4 → ssi_analysis_result_parsers-0.0.5}/test_output/test_batch_output.tsv +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -5,7 +5,7 @@
5
5
  ### Python library ###
6
6
  repo = ssi_analysis_result_parsers
7
7
  lib_name = %(repo)s
8
- version = 0.0.4
8
+ version = 0.0.5
9
9
  min_python = 3.9
10
10
  license = MIT
11
11
  black_formatting = True
@@ -25,8 +25,10 @@ import json # for nicely printing json and yaml
25
25
  # import functions from core module (optional, but most likely needed).
26
26
  from ssi_analysis_result_parsers import (
27
27
  core,
28
+ blast_parser,
28
29
  )
29
- from .blast_parser import extract_presence_absence
30
+
31
+ # from ssi_analysis_result_parsers.blast_parser import extract_presence_absence
30
32
 
31
33
  # Project specific libraries
32
34
  from pathlib import Path
@@ -39,11 +41,18 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
39
41
  Returns dictionary of results found in the Legionella SBT summary output
40
42
  """
41
43
  if os.path.exists(legionella_sbt_results_tsv):
42
- df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
43
- df.set_index("sample", inplace=True, drop=True)
44
- d = df.to_dict(orient="index")
45
- fname = next(iter(d))
46
- return d[fname]
44
+ try:
45
+ df = pandas.read_csv(legionella_sbt_results_tsv, sep="\t")
46
+ df.set_index("sample", inplace=True, drop=True)
47
+ d = df.to_dict(orient="index")
48
+ fname = next(iter(d))
49
+ return d[fname]
50
+ except pandas.errors.EmptyDataError:
51
+ print(
52
+ f"No Legionella SBT output empty at {legionella_sbt_results_tsv}",
53
+ file=sys.stderr,
54
+ )
55
+ return None
47
56
  else:
48
57
  print(
49
58
  f"No Legionella SBT output found at {legionella_sbt_results_tsv}",
@@ -56,7 +65,7 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
56
65
  sbt_results_dict = extract_legionella_sbt(
57
66
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
58
67
  )
59
- lag1_blast_dict = extract_presence_absence(
68
+ lag1_blast_dict = blast_parser.extract_presence_absence(
60
69
  blast_output_tsv=lag1_blast_tsv,
61
70
  hits_as_string=False,
62
71
  include_match_stats=False,
@@ -135,13 +144,14 @@ class LegionellaResults(core.PipelineResults):
135
144
  results_dict[sample_name] = legionella_results
136
145
  return cls(results_dict)"""
137
146
 
147
+ @staticmethod
138
148
  def legionella_summary(
139
149
  legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
140
150
  ) -> dict:
141
151
  sbt_results_dict = extract_legionella_sbt(
142
152
  legionella_sbt_results_tsv=legionella_sbt_results_tsv
143
153
  )
144
- lag1_blast_dict = extract_presence_absence(
154
+ lag1_blast_dict = blast_parser.extract_presence_absence(
145
155
  blast_output_tsv=lag1_blast_tsv,
146
156
  hits_as_string=False,
147
157
  include_match_stats=False,
@@ -150,6 +160,8 @@ class LegionellaResults(core.PipelineResults):
150
160
  results_dict = core.update_results_dict(
151
161
  sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
152
162
  )
163
+ if results_dict is None:
164
+ return {}
153
165
  return results_dict
154
166
 
155
167
  def __repr__(self):
@@ -0,0 +1 @@
1
+ __version__ = "0.0.5"
@@ -51,21 +51,31 @@ def extract_presence_absence(
51
51
  if os.path.exists(blast_output_tsv):
52
52
  try:
53
53
  blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
54
+ header_list = tsv_header.split(" ")
55
+ if len(header_list) == len(blast_df.columns):
56
+ blast_df.columns = tsv_header.split(" ")
57
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
58
+ blast_df_unique = (
59
+ blast_df.sort_values(by=["bitscore"], ascending=False)
60
+ .groupby("qseqid")
61
+ .first()
62
+ )
63
+ blast_df_filtered = blast_df_unique.query(
64
+ "plen > @plen_threshold and pident > @pident_threshold"
65
+ )
66
+ blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
67
+ else:
68
+ print(
69
+ f"Failed to parse {blast_output_tsv}. Number of columns do not match length of provided header string",
70
+ file=sys.stderr,
71
+ )
72
+ return None
54
73
 
55
- blast_df.columns = tsv_header.split(" ")
56
- blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
57
- blast_df_unique = (
58
- blast_df.sort_values(by=["bitscore"], ascending=False)
59
- .groupby("qseqid")
60
- .first()
61
- )
62
- blast_df_filtered = blast_df_unique.query(
63
- "plen > @plen_threshold and pident > @pident_threshold"
64
- )
65
- blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
66
74
  except pandas.errors.EmptyDataError:
67
75
  blast_dict = {}
68
76
  print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
77
+ except Exception as e:
78
+ print(f"Error parsing blast: e")
69
79
  if hits_as_string:
70
80
 
71
81
  results = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -21,6 +21,7 @@ ssi_analysis_result_parsers.egg-info/top_level.txt
21
21
  ssi_analysis_result_parsers/config/config.default.env
22
22
  ssi_analysis_result_parsers/config/config.default.yaml
23
23
  test_input/.DS_Store
24
+ test_input/empty_file.txt
24
25
  test_input/Legionella/batch_parser_file_paths.tsv
25
26
  test_input/Legionella/lag-1_blast.tsv
26
27
  test_input/Legionella/lag-1_blast_2.tsv
@@ -1,3 +1,5 @@
1
1
  sample_name sbt_results lag1_blast_results
2
2
  sample_1 test_input/Legionella/test.sbt.tsv test_input/Legionella/lag-1_blast.tsv
3
3
  sample_2 test_input/Legionella/test2.sbt.tsv test_input/Legionella/lag-1_blast_2.tsv
4
+ sample_3 test_input/Legionella/test2.sbt.tsv test_input/empty_file.txt
5
+ sample_4 test_input/empty_file.txt test_input/Legionella/lag-1_blast_2.tsv
@@ -1 +0,0 @@
1
- __version__ = "0.0.3"