ssi-analysis-result-parsers 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssi_analysis_result_parsers/Legionella_parser.py +19 -76
- ssi_analysis_result_parsers/_modidx.py +1 -7
- ssi_analysis_result_parsers/core.py +6 -2
- {ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/METADATA +1 -1
- {ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/RECORD +9 -9
- {ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/WHEEL +0 -0
- {ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/entry_points.txt +0 -0
- {ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/licenses/LICENSE +0 -0
- {ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,7 @@
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
|
2
2
|
|
3
3
|
# %% auto 0
|
4
|
-
__all__ = ['extract_legionella_sbt', '
|
5
|
-
'legionella_batch_from_dict', 'legionella_parser', 'legionella_batch_parser']
|
4
|
+
__all__ = ['extract_legionella_sbt', 'LegionellaResults', 'legionella_parser', 'legionella_batch_parser']
|
6
5
|
|
7
6
|
# %% ../nbs/39_Legionella_parser.ipynb 3
|
8
7
|
# standard libs
|
@@ -38,7 +37,8 @@ import sys
|
|
38
37
|
# %% ../nbs/39_Legionella_parser.ipynb 6
|
39
38
|
def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
|
40
39
|
"""
|
41
|
-
|
40
|
+
Extract results from Legionella SBT results file
|
41
|
+
Returns a dictionary with ST, allele variant for each gene, and notes from output
|
42
42
|
"""
|
43
43
|
if os.path.exists(legionella_sbt_results_tsv):
|
44
44
|
try:
|
@@ -61,46 +61,16 @@ def extract_legionella_sbt(legionella_sbt_results_tsv: Path) -> dict:
|
|
61
61
|
return None
|
62
62
|
|
63
63
|
|
64
|
-
def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -> dict:
|
65
|
-
sbt_results_dict = extract_legionella_sbt(
|
66
|
-
legionella_sbt_results_tsv=legionella_sbt_results_tsv
|
67
|
-
)
|
68
|
-
lag1_blast_dict = blast_parser.extract_presence_absence(
|
69
|
-
blast_output_tsv=lag1_blast_tsv,
|
70
|
-
hits_as_string=False,
|
71
|
-
include_match_stats=False,
|
72
|
-
gene_names=["lag-1"],
|
73
|
-
)
|
74
|
-
results_dict = core.update_results_dict(
|
75
|
-
sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
|
76
|
-
)
|
77
|
-
if results_dict is None:
|
78
|
-
return {}
|
79
|
-
return results_dict
|
80
|
-
|
81
|
-
|
82
|
-
def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
|
83
|
-
results_dict = {}
|
84
|
-
for sample_name, path_dict in file_paths.items():
|
85
|
-
legionella_results = legionella_summary(
|
86
|
-
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
87
|
-
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
88
|
-
)
|
89
|
-
results_dict[sample_name] = legionella_results
|
90
|
-
if output_file is not None:
|
91
|
-
df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
|
92
|
-
names="sample_name"
|
93
|
-
)
|
94
|
-
df.to_csv(output_file, sep="\t", index=False)
|
95
|
-
return results_dict
|
96
|
-
|
97
|
-
|
98
64
|
class LegionellaResults(core.PipelineResults):
|
99
65
|
|
100
66
|
@classmethod
|
101
67
|
def from_tool_paths(
|
102
68
|
cls, legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path, sample_name=None
|
103
69
|
):
|
70
|
+
"""
|
71
|
+
Alternative constructor for initializing results for single sample,
|
72
|
+
Initializes LegionellaResults instance provided paths to outputs from tools (legionella sbt and lag1 presence blast)
|
73
|
+
"""
|
104
74
|
legionella_results = cls.legionella_summary(
|
105
75
|
legionella_sbt_results_tsv=legionella_sbt_results_tsv,
|
106
76
|
lag1_blast_tsv=lag1_blast_tsv,
|
@@ -109,6 +79,10 @@ class LegionellaResults(core.PipelineResults):
|
|
109
79
|
|
110
80
|
@classmethod
|
111
81
|
def from_tool_paths_dict(cls, file_paths: dict):
|
82
|
+
"""
|
83
|
+
Alternative constructor for initializing results for multiple samples,
|
84
|
+
Initializes LegionellaResults instance by providing a dictionary of paths to outputs from tools (legionella sbt and lag1 presence blast)
|
85
|
+
"""
|
112
86
|
results_dict = {}
|
113
87
|
for sample_name, path_dict in file_paths.items():
|
114
88
|
legionella_results = cls.legionella_summary(
|
@@ -120,6 +94,10 @@ class LegionellaResults(core.PipelineResults):
|
|
120
94
|
|
121
95
|
@classmethod
|
122
96
|
def from_tool_paths_dataframe(cls, file_paths_df: pandas.DataFrame):
|
97
|
+
"""
|
98
|
+
Alternative constructor for initializing results for multiple samples,
|
99
|
+
Initializes LegionellaResults instance by providing a DataFrame of paths to outputs from tools (legionella sbt and lag1 presence blast)
|
100
|
+
"""
|
123
101
|
file_paths = file_paths_df.to_dict(orient="index")
|
124
102
|
results_dict = {}
|
125
103
|
for sample_name, path_dict in file_paths.items():
|
@@ -133,16 +111,13 @@ class LegionellaResults(core.PipelineResults):
|
|
133
111
|
|
134
112
|
@classmethod
|
135
113
|
def from_tool_paths_tsv(cls, tool_paths_tsv: Path):
|
114
|
+
"""
|
115
|
+
Alternative constructor for initializing results for multiple samples,
|
116
|
+
Initializes LegionellaResults instance by providing a tsv-file with paths to outputs from tools (legionella sbt and lag1 presence blast)
|
117
|
+
"""
|
136
118
|
file_paths_df = pandas.read_csv(tool_paths_tsv, sep="\t")
|
137
119
|
file_paths_df.set_index("sample_name", inplace=True, drop=True)
|
138
|
-
# return_cls =
|
139
|
-
# results_dict = file_paths_df.to_dict(orient="index")
|
140
120
|
return cls.from_tool_paths_dataframe(file_paths_df)
|
141
|
-
"""for sample_name, path_dict in file_paths.items():
|
142
|
-
legionella_results = cls.legionella_summary(legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
143
|
-
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]))
|
144
|
-
results_dict[sample_name] = legionella_results
|
145
|
-
return cls(results_dict)"""
|
146
121
|
|
147
122
|
@staticmethod
|
148
123
|
def legionella_summary(
|
@@ -167,38 +142,6 @@ class LegionellaResults(core.PipelineResults):
|
|
167
142
|
def __repr__(self):
|
168
143
|
return f"< Legionella analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
|
169
144
|
|
170
|
-
|
171
|
-
def legionella_batch_from_dict(file_paths: dict, output_file: Path = None):
|
172
|
-
results_dict = {}
|
173
|
-
for sample_name, path_dict in file_paths.items():
|
174
|
-
legionella_results = legionella_summary(
|
175
|
-
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
176
|
-
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
177
|
-
)
|
178
|
-
results_dict[sample_name] = legionella_results
|
179
|
-
if output_file is not None:
|
180
|
-
df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
|
181
|
-
names="sample_name"
|
182
|
-
)
|
183
|
-
df.to_csv(output_file, sep="\t", index=False)
|
184
|
-
return results_dict
|
185
|
-
|
186
|
-
|
187
|
-
def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
|
188
|
-
results_dict = {}
|
189
|
-
for sample_name, path_dict in file_paths.items():
|
190
|
-
legionella_results = legionella_summary(
|
191
|
-
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
192
|
-
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
193
|
-
)
|
194
|
-
results_dict[sample_name] = legionella_results
|
195
|
-
if output_file is not None:
|
196
|
-
df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
|
197
|
-
names="sample_name"
|
198
|
-
)
|
199
|
-
df.to_csv(output_file, sep="\t", index=False)
|
200
|
-
return results_dict
|
201
|
-
|
202
145
|
# %% ../nbs/39_Legionella_parser.ipynb 9
|
203
146
|
@call_parse
|
204
147
|
def legionella_parser(
|
@@ -21,16 +21,10 @@ d = { 'settings': { 'branch': 'main',
|
|
21
21
|
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
22
22
|
'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
|
23
23
|
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
24
|
-
'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_dict': ( 'legionella_parser.html#legionella_batch_from_dict',
|
25
|
-
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
26
|
-
'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_sheet': ( 'legionella_parser.html#legionella_batch_from_sheet',
|
27
|
-
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
28
24
|
'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_parser': ( 'legionella_parser.html#legionella_batch_parser',
|
29
25
|
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
30
26
|
'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
|
31
|
-
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
32
|
-
'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
|
33
|
-
'ssi_analysis_result_parsers/Legionella_parser.py')},
|
27
|
+
'ssi_analysis_result_parsers/Legionella_parser.py')},
|
34
28
|
'ssi_analysis_result_parsers.blast_parser': { 'ssi_analysis_result_parsers.blast_parser.allele_matches': ( 'blast_parser.html#allele_matches',
|
35
29
|
'ssi_analysis_result_parsers/blast_parser.py'),
|
36
30
|
'ssi_analysis_result_parsers.blast_parser.extract_allele_matches': ( 'blast_parser.html#extract_allele_matches',
|
@@ -196,7 +196,6 @@ def get_samplesheet(sample_sheet_config: dict) -> pd.DataFrame:
|
|
196
196
|
class PipelineResults:
|
197
197
|
|
198
198
|
def __init__(self, results_dict):
|
199
|
-
print(results_dict)
|
200
199
|
self.results_dict = results_dict
|
201
200
|
self.results_df = pandas.DataFrame.from_dict(results_dict, orient="index")
|
202
201
|
|
@@ -207,12 +206,17 @@ class PipelineResults:
|
|
207
206
|
|
208
207
|
@classmethod
|
209
208
|
def from_results_dataframe(cls, results_df: pandas.DataFrame):
|
210
|
-
|
209
|
+
"""
|
210
|
+
Alternative constructor for initializing from DataFrame instead of dictionary
|
211
|
+
"""
|
211
212
|
results_dict = results_df.to_dict(orient="index")
|
212
213
|
return cls(results_dict)
|
213
214
|
|
214
215
|
@classmethod
|
215
216
|
def from_results_tsv(cls, results_tsv: Path):
|
217
|
+
"""
|
218
|
+
Alternative constructor for initializing from a tsv-file instead of dictionary
|
219
|
+
"""
|
216
220
|
results_df = pandas.read_csv(results_tsv, sep="\t")
|
217
221
|
results_df.set_index("sample_name", inplace=True, drop=True)
|
218
222
|
results_dict = results_df.to_dict(orient="index")
|
{ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/RECORD
RENAMED
@@ -1,13 +1,13 @@
|
|
1
|
-
ssi_analysis_result_parsers/Legionella_parser.py,sha256=
|
1
|
+
ssi_analysis_result_parsers/Legionella_parser.py,sha256=an9Rm9r4N3wQXy0qhUpvacy4Wb6HxUXFdDA7D6YsQyY,7237
|
2
2
|
ssi_analysis_result_parsers/__init__.py,sha256=S7u1lbuWmM3A3ajykBialmPoJUK6Jg-WmNqM-9OZFdk,22
|
3
|
-
ssi_analysis_result_parsers/_modidx.py,sha256=
|
3
|
+
ssi_analysis_result_parsers/_modidx.py,sha256=ysvICOsqtGaXuCYPu-UuRGVRhZDJ-O9X3o9lE7rzzGI,11089
|
4
4
|
ssi_analysis_result_parsers/blast_parser.py,sha256=EBqWlx8bDlaSzqAZomiUGnT2DGaaA-L7ukny7SEJbpk,7915
|
5
|
-
ssi_analysis_result_parsers/core.py,sha256=
|
5
|
+
ssi_analysis_result_parsers/core.py,sha256=8CzFMDrGJ24D9aoIebLsG8tx-OxvYJod1cxBITqNfaY,12258
|
6
6
|
ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
|
7
7
|
ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
|
8
8
|
ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
|
9
9
|
ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
|
10
|
-
ssi_analysis_result_parsers-0.0.
|
10
|
+
ssi_analysis_result_parsers-0.0.6.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
|
11
11
|
test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
|
12
12
|
test_input/empty_file.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
test_input/Legionella/batch_parser_file_paths.tsv,sha256=AikBS_Ez1xO3UrEQ19AY3z6drBDdMAiSGK66NLeyYj4,356
|
@@ -21,8 +21,8 @@ test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDh
|
|
21
21
|
test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
|
22
22
|
test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
23
23
|
test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
24
|
-
ssi_analysis_result_parsers-0.0.
|
25
|
-
ssi_analysis_result_parsers-0.0.
|
26
|
-
ssi_analysis_result_parsers-0.0.
|
27
|
-
ssi_analysis_result_parsers-0.0.
|
28
|
-
ssi_analysis_result_parsers-0.0.
|
24
|
+
ssi_analysis_result_parsers-0.0.6.dist-info/METADATA,sha256=WUMU9Lfanw3DLtDNZUzKIZaBU071v00068cENqOkpq8,2765
|
25
|
+
ssi_analysis_result_parsers-0.0.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
26
|
+
ssi_analysis_result_parsers-0.0.6.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
|
27
|
+
ssi_analysis_result_parsers-0.0.6.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
|
28
|
+
ssi_analysis_result_parsers-0.0.6.dist-info/RECORD,,
|
{ssi_analysis_result_parsers-0.0.5.dist-info → ssi_analysis_result_parsers-0.0.6.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|