ssi-analysis-result-parsers 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,7 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
2
2
 
3
3
  # %% auto 0
4
- __all__ = ['extract_legionella_sbt', 'legionella_summary', 'legionella_batch_from_sheet', 'LegionellaResults',
5
- 'legionella_batch_from_dict', 'legionella_parser', 'legionella_batch_parser']
4
+ __all__ = ['extract_legionella_sbt', 'LegionellaResults', 'legionella_parser', 'legionella_batch_parser']
6
5
 
7
6
  # %% ../nbs/39_Legionella_parser.ipynb 3
8
7
  # standard libs
@@ -38,7 +37,8 @@ import sys
38
37
  # %% ../nbs/39_Legionella_parser.ipynb 6
39
38
  def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
40
39
  """
41
- Returns dictionary of results found in the Legionella SBT summary output
40
+ Extract results from Legionella SBT results file
41
+ Returns a dictionary with ST, allele variant for each gene, and notes from output
42
42
  """
43
43
  if os.path.exists(legionella_sbt_results_tsv):
44
44
  try:
@@ -61,46 +61,16 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
61
61
  return None
62
62
 
63
63
 
64
- def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -> dict:
65
- sbt_results_dict = extract_legionella_sbt(
66
- legionella_sbt_results_tsv=legionella_sbt_results_tsv
67
- )
68
- lag1_blast_dict = blast_parser.extract_presence_absence(
69
- blast_output_tsv=lag1_blast_tsv,
70
- hits_as_string=False,
71
- include_match_stats=False,
72
- gene_names=["lag-1"],
73
- )
74
- results_dict = core.update_results_dict(
75
- sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
76
- )
77
- if results_dict is None:
78
- return {}
79
- return results_dict
80
-
81
-
82
- def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
83
- results_dict = {}
84
- for sample_name, path_dict in file_paths.items():
85
- legionella_results = legionella_summary(
86
- legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
87
- lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
88
- )
89
- results_dict[sample_name] = legionella_results
90
- if output_file is not None:
91
- df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
92
- names="sample_name"
93
- )
94
- df.to_csv(output_file, sep="\t", index=False)
95
- return results_dict
96
-
97
-
98
64
  class LegionellaResults(core.PipelineResults):
99
65
 
100
66
  @classmethod
101
67
  def from_tool_paths(
102
68
  cls, legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path, sample_name=None
103
69
  ):
70
+ """
71
+ Alternative constructor for initializing results for single sample,
72
+ Initializes LegionellaResults instance provided paths to outputs from tools (legionella sbt and lag1 presence blast)
73
+ """
104
74
  legionella_results = cls.legionella_summary(
105
75
  legionella_sbt_results_tsv=legionella_sbt_results_tsv,
106
76
  lag1_blast_tsv=lag1_blast_tsv,
@@ -109,6 +79,10 @@ class LegionellaResults(core.PipelineResults):
109
79
 
110
80
  @classmethod
111
81
  def from_tool_paths_dict(cls, file_paths: dict):
82
+ """
83
+ Alternative constructor for initializing results for multiple samples,
84
+ Initializes LegionellaResults instance by providing a dictionary of paths to outputs from tools (legionella sbt and lag1 presence blast)
85
+ """
112
86
  results_dict = {}
113
87
  for sample_name, path_dict in file_paths.items():
114
88
  legionella_results = cls.legionella_summary(
@@ -120,6 +94,10 @@ class LegionellaResults(core.PipelineResults):
120
94
 
121
95
  @classmethod
122
96
  def from_tool_paths_dataframe(cls, file_paths_df: pandas.DataFrame):
97
+ """
98
+ Alternative constructor for initializing results for multiple samples,
99
+ Initializes LegionellaResults instance by providing a DataFrame of paths to outputs from tools (legionella sbt and lag1 presence blast)
100
+ """
123
101
  file_paths = file_paths_df.to_dict(orient="index")
124
102
  results_dict = {}
125
103
  for sample_name, path_dict in file_paths.items():
@@ -133,16 +111,13 @@ class LegionellaResults(core.PipelineResults):
133
111
 
134
112
  @classmethod
135
113
  def from_tool_paths_tsv(cls, tool_paths_tsv: Path):
114
+ """
115
+ Alternative constructor for initializing results for multiple samples,
116
+ Initializes LegionellaResults instance by providing a tsv-file with paths to outputs from tools (legionella sbt and lag1 presence blast)
117
+ """
136
118
  file_paths_df = pandas.read_csv(tool_paths_tsv, sep="\t")
137
119
  file_paths_df.set_index("sample_name", inplace=True, drop=True)
138
- # return_cls =
139
- # results_dict = file_paths_df.to_dict(orient="index")
140
120
  return cls.from_tool_paths_dataframe(file_paths_df)
141
- """for sample_name, path_dict in file_paths.items():
142
- legionella_results = cls.legionella_summary(legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
143
- lag1_blast_tsv=Path(path_dict["lag1_blast_results"]))
144
- results_dict[sample_name] = legionella_results
145
- return cls(results_dict)"""
146
121
 
147
122
  @staticmethod
148
123
  def legionella_summary(
@@ -167,38 +142,6 @@ class LegionellaResults(core.PipelineResults):
167
142
  def __repr__(self):
168
143
  return f"< Legionella analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
169
144
 
170
-
171
- def legionella_batch_from_dict(file_paths: dict, output_file: Path = None):
172
- results_dict = {}
173
- for sample_name, path_dict in file_paths.items():
174
- legionella_results = legionella_summary(
175
- legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
176
- lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
177
- )
178
- results_dict[sample_name] = legionella_results
179
- if output_file is not None:
180
- df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
181
- names="sample_name"
182
- )
183
- df.to_csv(output_file, sep="\t", index=False)
184
- return results_dict
185
-
186
-
187
- def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
188
- results_dict = {}
189
- for sample_name, path_dict in file_paths.items():
190
- legionella_results = legionella_summary(
191
- legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
192
- lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
193
- )
194
- results_dict[sample_name] = legionella_results
195
- if output_file is not None:
196
- df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
197
- names="sample_name"
198
- )
199
- df.to_csv(output_file, sep="\t", index=False)
200
- return results_dict
201
-
202
145
  # %% ../nbs/39_Legionella_parser.ipynb 9
203
146
  @call_parse
204
147
  def legionella_parser(
@@ -21,16 +21,10 @@ d = { 'settings': { 'branch': 'main',
21
21
  'ssi_analysis_result_parsers/Legionella_parser.py'),
22
22
  'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
23
23
  'ssi_analysis_result_parsers/Legionella_parser.py'),
24
- 'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_dict': ( 'legionella_parser.html#legionella_batch_from_dict',
25
- 'ssi_analysis_result_parsers/Legionella_parser.py'),
26
- 'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_sheet': ( 'legionella_parser.html#legionella_batch_from_sheet',
27
- 'ssi_analysis_result_parsers/Legionella_parser.py'),
28
24
  'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_parser': ( 'legionella_parser.html#legionella_batch_parser',
29
25
  'ssi_analysis_result_parsers/Legionella_parser.py'),
30
26
  'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
31
- 'ssi_analysis_result_parsers/Legionella_parser.py'),
32
- 'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
33
- 'ssi_analysis_result_parsers/Legionella_parser.py')},
27
+ 'ssi_analysis_result_parsers/Legionella_parser.py')},
34
28
  'ssi_analysis_result_parsers.blast_parser': { 'ssi_analysis_result_parsers.blast_parser.allele_matches': ( 'blast_parser.html#allele_matches',
35
29
  'ssi_analysis_result_parsers/blast_parser.py'),
36
30
  'ssi_analysis_result_parsers.blast_parser.extract_allele_matches': ( 'blast_parser.html#extract_allele_matches',
@@ -196,7 +196,6 @@ def get_samplesheet(sample_sheet_config: dict) -> pd.DataFrame:
196
196
  class PipelineResults:
197
197
 
198
198
  def __init__(self, results_dict):
199
- print(results_dict)
200
199
  self.results_dict = results_dict
201
200
  self.results_df = pandas.DataFrame.from_dict(results_dict, orient="index")
202
201
 
@@ -207,12 +206,17 @@ class PipelineResults:
207
206
 
208
207
  @classmethod
209
208
  def from_results_dataframe(cls, results_df: pandas.DataFrame):
210
- # results_df = results_df.set_index("sample_name")
209
+ """
210
+ Alternative constructor for initializing from DataFrame instead of dictionary
211
+ """
211
212
  results_dict = results_df.to_dict(orient="index")
212
213
  return cls(results_dict)
213
214
 
214
215
  @classmethod
215
216
  def from_results_tsv(cls, results_tsv: Path):
217
+ """
218
+ Alternative constructor for initializing from a tsv-file instead of dictionary
219
+ """
216
220
  results_df = pandas.read_csv(results_tsv, sep="\t")
217
221
  results_df.set_index("sample_name", inplace=True, drop=True)
218
222
  results_dict = results_df.to_dict(orient="index")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -1,13 +1,13 @@
1
- ssi_analysis_result_parsers/Legionella_parser.py,sha256=8n15TEiOY1OxAe4VePVzaXz7WFen6U1y3pM5Vsjsg7U,9292
1
+ ssi_analysis_result_parsers/Legionella_parser.py,sha256=an9Rm9r4N3wQXy0qhUpvacy4Wb6HxUXFdDA7D6YsQyY,7237
2
2
  ssi_analysis_result_parsers/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
3
- ssi_analysis_result_parsers/_modidx.py,sha256=JY_GM0tMojzTtX9O4D8as4k5a-sXqkxkb7ZUEPzhuMk,12232
3
+ ssi_analysis_result_parsers/_modidx.py,sha256=ysvICOsqtGaXuCYPu-UuRGVRhZDJ-O9X3o9lE7rzzGI,11089
4
4
  ssi_analysis_result_parsers/blast_parser.py,sha256=EBqWlx8bDlaSzqAZomiUGnT2DGaaA-L7ukny7SEJbpk,7915
5
- ssi_analysis_result_parsers/core.py,sha256=6TGURv8spPdBpwKv6LvqvbVzJChdeHwsG3WQ6QLUuvE,12124
5
+ ssi_analysis_result_parsers/core.py,sha256=8CzFMDrGJ24D9aoIebLsG8tx-OxvYJod1cxBITqNfaY,12258
6
6
  ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
7
7
  ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
8
8
  ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
9
9
  ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
10
- ssi_analysis_result_parsers-0.0.5.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
10
+ ssi_analysis_result_parsers-0.0.6.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
11
11
  test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
12
12
  test_input/empty_file.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  test_input/Legionella/batch_parser_file_paths.tsv,sha256=AikBS_Ez1xO3UrEQ19AY3z6drBDdMAiSGK66NLeyYj4,356
@@ -21,8 +21,8 @@ test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDh
21
21
  test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
22
22
  test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
23
23
  test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
24
- ssi_analysis_result_parsers-0.0.5.dist-info/METADATA,sha256=BF-cuY_EJow8haoGw99WeGWAf_zWWQdWQ4OFu42NtcM,2765
25
- ssi_analysis_result_parsers-0.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
26
- ssi_analysis_result_parsers-0.0.5.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
27
- ssi_analysis_result_parsers-0.0.5.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
28
- ssi_analysis_result_parsers-0.0.5.dist-info/RECORD,,
24
+ ssi_analysis_result_parsers-0.0.6.dist-info/METADATA,sha256=WUMU9Lfanw3DLtDNZUzKIZaBU071v00068cENqOkpq8,2765
25
+ ssi_analysis_result_parsers-0.0.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
26
+ ssi_analysis_result_parsers-0.0.6.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
27
+ ssi_analysis_result_parsers-0.0.6.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
28
+ ssi_analysis_result_parsers-0.0.6.dist-info/RECORD,,