ssi-analysis-result-parsers 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssi_analysis_result_parsers/Legionella_parser.py +137 -6
- ssi_analysis_result_parsers/__init__.py +1 -1
- ssi_analysis_result_parsers/_modidx.py +44 -4
- ssi_analysis_result_parsers/blast_parser.py +26 -24
- ssi_analysis_result_parsers/core.py +83 -33
- {ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/METADATA +2 -2
- ssi_analysis_result_parsers-0.0.3.dist-info/RECORD +27 -0
- {ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/entry_points.txt +2 -1
- test_input/Legionella/batch_parser_file_paths.tsv +3 -0
- test_input/Legionella/lag-1_blast_2.tsv +0 -0
- test_input/Legionella/test2.sbt.tsv +2 -0
- test_input/blast_parser/empty_gene_presence_absense_test.tsv +0 -0
- test_output/test.tsv +3 -0
- test_output/test_batch_output.tsv +3 -0
- ssi_analysis_result_parsers-0.0.1.dist-info/RECORD +0 -21
- {ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/WHEEL +0 -0
- {ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/licenses/LICENSE +0 -0
- {ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/39_Legionella_parser.ipynb.
|
2
2
|
|
3
3
|
# %% auto 0
|
4
|
-
__all__ = ['extract_legionella_sbt', 'legionella_summary', '
|
4
|
+
__all__ = ['extract_legionella_sbt', 'legionella_summary', 'legionella_batch_from_sheet', 'LegionellaResults',
|
5
|
+
'legionella_batch_from_dict', 'legionella_parser', 'legionella_batch_parser']
|
5
6
|
|
6
7
|
# %% ../nbs/39_Legionella_parser.ipynb 3
|
7
8
|
# standard libs
|
@@ -66,6 +67,125 @@ def legionella_summary(legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path) -
|
|
66
67
|
)
|
67
68
|
return results_dict
|
68
69
|
|
70
|
+
|
71
|
+
def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
|
72
|
+
results_dict = {}
|
73
|
+
for sample_name, path_dict in file_paths.items():
|
74
|
+
legionella_results = legionella_summary(
|
75
|
+
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
76
|
+
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
77
|
+
)
|
78
|
+
results_dict[sample_name] = legionella_results
|
79
|
+
if output_file is not None:
|
80
|
+
df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
|
81
|
+
names="sample_name"
|
82
|
+
)
|
83
|
+
df.to_csv(output_file, sep="\t", index=False)
|
84
|
+
return results_dict
|
85
|
+
|
86
|
+
|
87
|
+
class LegionellaResults(core.PipelineResults):
|
88
|
+
|
89
|
+
@classmethod
|
90
|
+
def from_tool_paths(
|
91
|
+
cls, legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path, sample_name=None
|
92
|
+
):
|
93
|
+
legionella_results = cls.legionella_summary(
|
94
|
+
legionella_sbt_results_tsv=legionella_sbt_results_tsv,
|
95
|
+
lag1_blast_tsv=lag1_blast_tsv,
|
96
|
+
)
|
97
|
+
return cls({sample_name: legionella_results})
|
98
|
+
|
99
|
+
@classmethod
|
100
|
+
def from_tool_paths_dict(cls, file_paths: dict):
|
101
|
+
results_dict = {}
|
102
|
+
for sample_name, path_dict in file_paths.items():
|
103
|
+
legionella_results = cls.legionella_summary(
|
104
|
+
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
105
|
+
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
106
|
+
)
|
107
|
+
results_dict[sample_name] = legionella_results
|
108
|
+
return cls(results_dict)
|
109
|
+
|
110
|
+
@classmethod
|
111
|
+
def from_tool_paths_dataframe(cls, file_paths_df: pandas.DataFrame):
|
112
|
+
file_paths = file_paths_df.to_dict(orient="index")
|
113
|
+
results_dict = {}
|
114
|
+
for sample_name, path_dict in file_paths.items():
|
115
|
+
legionella_results = cls.legionella_summary(
|
116
|
+
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
117
|
+
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
118
|
+
)
|
119
|
+
print(legionella_results)
|
120
|
+
results_dict[sample_name] = legionella_results
|
121
|
+
return cls(results_dict)
|
122
|
+
|
123
|
+
@classmethod
|
124
|
+
def from_tool_paths_tsv(cls, tool_paths_tsv: Path):
|
125
|
+
file_paths_df = pandas.read_csv(tool_paths_tsv, sep="\t")
|
126
|
+
file_paths_df.set_index("sample_name", inplace=True, drop=True)
|
127
|
+
# return_cls =
|
128
|
+
# results_dict = file_paths_df.to_dict(orient="index")
|
129
|
+
return cls.from_tool_paths_dataframe(file_paths_df)
|
130
|
+
"""for sample_name, path_dict in file_paths.items():
|
131
|
+
legionella_results = cls.legionella_summary(legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
132
|
+
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]))
|
133
|
+
results_dict[sample_name] = legionella_results
|
134
|
+
return cls(results_dict)"""
|
135
|
+
|
136
|
+
def legionella_summary(
|
137
|
+
legionella_sbt_results_tsv: Path, lag1_blast_tsv: Path
|
138
|
+
) -> dict:
|
139
|
+
sbt_results_dict = extract_legionella_sbt(
|
140
|
+
legionella_sbt_results_tsv=legionella_sbt_results_tsv
|
141
|
+
)
|
142
|
+
lag1_blast_dict = extract_presence_absence(
|
143
|
+
blast_output_tsv=lag1_blast_tsv,
|
144
|
+
hits_as_string=False,
|
145
|
+
include_match_stats=False,
|
146
|
+
gene_names=["lag-1"],
|
147
|
+
)
|
148
|
+
print(lag1_blast_dict)
|
149
|
+
results_dict = core.update_results_dict(
|
150
|
+
sbt_results_dict, lag1_blast_dict, old_duplicate_key_prefix="SBT: "
|
151
|
+
)
|
152
|
+
return results_dict
|
153
|
+
|
154
|
+
def __repr__(self):
|
155
|
+
return f"< Legionella analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
|
156
|
+
|
157
|
+
|
158
|
+
def legionella_batch_from_dict(file_paths: dict, output_file: Path = None):
|
159
|
+
results_dict = {}
|
160
|
+
for sample_name, path_dict in file_paths.items():
|
161
|
+
legionella_results = legionella_summary(
|
162
|
+
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
163
|
+
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
164
|
+
)
|
165
|
+
results_dict[sample_name] = legionella_results
|
166
|
+
if output_file is not None:
|
167
|
+
df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
|
168
|
+
names="sample_name"
|
169
|
+
)
|
170
|
+
df.to_csv(output_file, sep="\t", index=False)
|
171
|
+
return results_dict
|
172
|
+
|
173
|
+
|
174
|
+
def legionella_batch_from_sheet(file_paths: dict, output_file: Path = None):
|
175
|
+
results_dict = {}
|
176
|
+
for sample_name, path_dict in file_paths.items():
|
177
|
+
legionella_results = legionella_summary(
|
178
|
+
legionella_sbt_results_tsv=Path(path_dict["sbt_results"]),
|
179
|
+
lag1_blast_tsv=Path(path_dict["lag1_blast_results"]),
|
180
|
+
)
|
181
|
+
results_dict[sample_name] = legionella_results
|
182
|
+
if output_file is not None:
|
183
|
+
df = pandas.DataFrame.from_dict(results_dict, orient="index").reset_index(
|
184
|
+
names="sample_name"
|
185
|
+
)
|
186
|
+
df.to_csv(output_file, sep="\t", index=False)
|
187
|
+
return results_dict
|
188
|
+
|
69
189
|
# %% ../nbs/39_Legionella_parser.ipynb 9
|
70
190
|
@call_parse
|
71
191
|
def legionella_parser(
|
@@ -77,12 +197,23 @@ def legionella_parser(
|
|
77
197
|
) -> None:
|
78
198
|
""" """
|
79
199
|
# config = core.get_config(config_file) # Set env vars and get config variables
|
80
|
-
|
200
|
+
legionella_results = LegionellaResults.from_tool_paths(
|
81
201
|
legionella_sbt_results_tsv=legionella_sbt_file,
|
82
202
|
lag1_blast_tsv=lag_1_blast_output,
|
83
|
-
)
|
84
|
-
core.print_results_dict_to_tsv(
|
85
|
-
results_dict=legionella_summary_dict,
|
86
|
-
output_file=output_file,
|
87
203
|
sample_name=sample_name,
|
88
204
|
)
|
205
|
+
legionella_results.write_tsv(output_file=output_file)
|
206
|
+
|
207
|
+
|
208
|
+
@call_parse
|
209
|
+
def legionella_batch_parser(
|
210
|
+
file_path_tsv: Path = None, # Path to tsv containing file paths to the outputs from tools to be parsed. Must contain headers "sample_name", "sbt_results", and "lag1_blast_results"
|
211
|
+
output_file: Path = None, # Path to output tsv
|
212
|
+
config_file: str = None, # config file to set env vars from
|
213
|
+
) -> None:
|
214
|
+
""" """
|
215
|
+
# config = core.get_config(config_file) # Set env vars and get config variables
|
216
|
+
legionella_results = LegionellaResults.from_tool_paths_tsv(
|
217
|
+
tool_paths_tsv=file_path_tsv
|
218
|
+
)
|
219
|
+
legionella_results.write_tsv(output_file)
|
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.0.
|
1
|
+
__version__ = "0.0.2"
|
@@ -2,11 +2,31 @@
|
|
2
2
|
|
3
3
|
d = { 'settings': { 'branch': 'main',
|
4
4
|
'doc_baseurl': '/ssi_analysis_result_parsers',
|
5
|
-
'doc_host': 'https
|
6
|
-
'git_url': 'https://github.com
|
5
|
+
'doc_host': 'https://thej-ssi.github.io',
|
6
|
+
'git_url': 'https://github.com/thej-ssi/ssi_analysis_result_parsers',
|
7
7
|
'lib_path': 'ssi_analysis_result_parsers'},
|
8
|
-
'syms': { 'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.
|
8
|
+
'syms': { 'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults': ( 'legionella_parser.html#legionellaresults',
|
9
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
10
|
+
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.__repr__': ( 'legionella_parser.html#legionellaresults.__repr__',
|
11
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
12
|
+
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths': ( 'legionella_parser.html#legionellaresults.from_tool_paths',
|
13
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
14
|
+
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths_dataframe': ( 'legionella_parser.html#legionellaresults.from_tool_paths_dataframe',
|
15
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
16
|
+
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths_dict': ( 'legionella_parser.html#legionellaresults.from_tool_paths_dict',
|
17
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
18
|
+
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.from_tool_paths_tsv': ( 'legionella_parser.html#legionellaresults.from_tool_paths_tsv',
|
19
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
20
|
+
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.legionella_summary': ( 'legionella_parser.html#legionellaresults.legionella_summary',
|
21
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
22
|
+
'ssi_analysis_result_parsers.Legionella_parser.extract_legionella_sbt': ( 'legionella_parser.html#extract_legionella_sbt',
|
9
23
|
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
24
|
+
'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_dict': ( 'legionella_parser.html#legionella_batch_from_dict',
|
25
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
26
|
+
'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_from_sheet': ( 'legionella_parser.html#legionella_batch_from_sheet',
|
27
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
28
|
+
'ssi_analysis_result_parsers.Legionella_parser.legionella_batch_parser': ( 'legionella_parser.html#legionella_batch_parser',
|
29
|
+
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
10
30
|
'ssi_analysis_result_parsers.Legionella_parser.legionella_parser': ( 'legionella_parser.html#legionella_parser',
|
11
31
|
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
12
32
|
'ssi_analysis_result_parsers.Legionella_parser.legionella_summary': ( 'legionella_parser.html#legionella_summary',
|
@@ -19,7 +39,27 @@ d = { 'settings': { 'branch': 'main',
|
|
19
39
|
'ssi_analysis_result_parsers/blast_parser.py'),
|
20
40
|
'ssi_analysis_result_parsers.blast_parser.presence_absence': ( 'blast_parser.html#presence_absence',
|
21
41
|
'ssi_analysis_result_parsers/blast_parser.py')},
|
22
|
-
'ssi_analysis_result_parsers.core': { 'ssi_analysis_result_parsers.core.
|
42
|
+
'ssi_analysis_result_parsers.core': { 'ssi_analysis_result_parsers.core.PipelineResults': ( 'core.html#pipelineresults',
|
43
|
+
'ssi_analysis_result_parsers/core.py'),
|
44
|
+
'ssi_analysis_result_parsers.core.PipelineResults.__init__': ( 'core.html#pipelineresults.__init__',
|
45
|
+
'ssi_analysis_result_parsers/core.py'),
|
46
|
+
'ssi_analysis_result_parsers.core.PipelineResults.__iter__': ( 'core.html#pipelineresults.__iter__',
|
47
|
+
'ssi_analysis_result_parsers/core.py'),
|
48
|
+
'ssi_analysis_result_parsers.core.PipelineResults.__len__': ( 'core.html#pipelineresults.__len__',
|
49
|
+
'ssi_analysis_result_parsers/core.py'),
|
50
|
+
'ssi_analysis_result_parsers.core.PipelineResults.__repr__': ( 'core.html#pipelineresults.__repr__',
|
51
|
+
'ssi_analysis_result_parsers/core.py'),
|
52
|
+
'ssi_analysis_result_parsers.core.PipelineResults.from_results_dataframe': ( 'core.html#pipelineresults.from_results_dataframe',
|
53
|
+
'ssi_analysis_result_parsers/core.py'),
|
54
|
+
'ssi_analysis_result_parsers.core.PipelineResults.from_results_tsv': ( 'core.html#pipelineresults.from_results_tsv',
|
55
|
+
'ssi_analysis_result_parsers/core.py'),
|
56
|
+
'ssi_analysis_result_parsers.core.PipelineResults.items': ( 'core.html#pipelineresults.items',
|
57
|
+
'ssi_analysis_result_parsers/core.py'),
|
58
|
+
'ssi_analysis_result_parsers.core.PipelineResults.results': ( 'core.html#pipelineresults.results',
|
59
|
+
'ssi_analysis_result_parsers/core.py'),
|
60
|
+
'ssi_analysis_result_parsers.core.PipelineResults.write_tsv': ( 'core.html#pipelineresults.write_tsv',
|
61
|
+
'ssi_analysis_result_parsers/core.py'),
|
62
|
+
'ssi_analysis_result_parsers.core.get_config': ( 'core.html#get_config',
|
23
63
|
'ssi_analysis_result_parsers/core.py'),
|
24
64
|
'ssi_analysis_result_parsers.core.get_samplesheet': ( 'core.html#get_samplesheet',
|
25
65
|
'ssi_analysis_result_parsers/core.py'),
|
@@ -49,36 +49,38 @@ def extract_presence_absence(
|
|
49
49
|
|
50
50
|
"""
|
51
51
|
if os.path.exists(blast_output_tsv):
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
blast_df
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
52
|
+
try:
|
53
|
+
blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
|
54
|
+
|
55
|
+
blast_df.columns = tsv_header.split(" ")
|
56
|
+
blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
|
57
|
+
blast_df_unique = (
|
58
|
+
blast_df.sort_values(by=["bitscore"], ascending=False)
|
59
|
+
.groupby("qseqid")
|
60
|
+
.first()
|
61
|
+
)
|
62
|
+
blast_df_filtered = blast_df_unique.query(
|
63
|
+
"plen > @plen_threshold and pident > @pident_threshold"
|
64
|
+
)
|
65
|
+
blast_dict = dict(blast_df_filtered.to_dict(orient="index"))
|
66
|
+
except pandas.errors.EmptyDataError:
|
67
|
+
blast_dict = {}
|
68
|
+
print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
|
63
69
|
if hits_as_string:
|
64
|
-
if include_match_stats:
|
65
|
-
results = []
|
66
|
-
for gene, d in blast_df_filtered.to_dict(orient="index").items():
|
67
|
-
results.append(f"{gene}__{d['pident']}__{d['plen']}")
|
68
|
-
result_dict = {"genes_found": ", ".join(results)}
|
69
|
-
return result_dict
|
70
70
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
71
|
+
results = []
|
72
|
+
for gene, d in blast_dict.items():
|
73
|
+
if include_match_stats:
|
74
|
+
results.append(f"{gene}__{d['pident']}__{d['plen']}")
|
75
|
+
else:
|
76
|
+
results.append(gene)
|
77
|
+
result_dict = {"genes_found": ", ".join(results)}
|
78
|
+
return result_dict
|
76
79
|
|
77
80
|
else:
|
78
81
|
result_dict = {}
|
79
|
-
blast_dict = dict(blast_df_filtered.to_dict(orient="index").items())
|
80
82
|
if gene_names is None:
|
81
|
-
gene_names = blast_dict.keys()
|
83
|
+
gene_names = list(blast_dict.keys())
|
82
84
|
for gene in gene_names:
|
83
85
|
if gene in blast_dict:
|
84
86
|
if include_match_stats:
|
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
# %% auto 0
|
4
4
|
__all__ = ['PACKAGE_NAME', 'DEV_MODE', 'PACKAGE_DIR', 'PROJECT_DIR', 'config', 'set_env_variables', 'get_config',
|
5
|
-
'show_project_env_vars', 'get_samplesheet', '
|
5
|
+
'show_project_env_vars', 'get_samplesheet', 'PipelineResults', 'update_results_dict',
|
6
|
+
'print_results_dict_to_tsv']
|
6
7
|
|
7
8
|
# %% ../nbs/00_core.ipynb 4
|
8
9
|
# Need the ssi_analysis_result_parsers for a few functions, this can be considered a static var
|
@@ -192,48 +193,97 @@ def get_samplesheet(sample_sheet_config: dict) -> pd.DataFrame:
|
|
192
193
|
return df
|
193
194
|
|
194
195
|
# %% ../nbs/00_core.ipynb 24
|
196
|
+
class PipelineResults:
|
197
|
+
|
198
|
+
def __init__(self, results_dict):
|
199
|
+
print(results_dict)
|
200
|
+
self.results_dict = results_dict
|
201
|
+
self.results_df = pandas.DataFrame.from_dict(results_dict, orient="index")
|
202
|
+
|
203
|
+
def write_tsv(self, output_file: Path) -> None:
|
204
|
+
print_df = self.results_df.reset_index(names="sample_name")
|
205
|
+
print_df.to_csv(output_file, sep="\t", index=False)
|
206
|
+
return None
|
207
|
+
|
208
|
+
@classmethod
|
209
|
+
def from_results_dataframe(cls, results_df: pandas.DataFrame):
|
210
|
+
# results_df = results_df.set_index("sample_name")
|
211
|
+
results_dict = results_df.to_dict(orient="index")
|
212
|
+
return cls(results_dict)
|
213
|
+
|
214
|
+
@classmethod
|
215
|
+
def from_results_tsv(cls, results_tsv: Path):
|
216
|
+
results_df = pandas.read_csv(results_tsv, sep="\t")
|
217
|
+
results_df.set_index("sample_name", inplace=True, drop=True)
|
218
|
+
results_dict = results_df.to_dict(orient="index")
|
219
|
+
return cls(results_dict)
|
220
|
+
|
221
|
+
def __repr__(self):
|
222
|
+
return f"< Generic analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
|
223
|
+
|
224
|
+
def __len__(self):
|
225
|
+
return len(self.results_dict)
|
226
|
+
|
227
|
+
def __iter__(self):
|
228
|
+
for sample_name in self.results_dict:
|
229
|
+
yield sample_name
|
230
|
+
|
231
|
+
def items(self):
|
232
|
+
for sample_name, results_d in self.results_dict:
|
233
|
+
yield sample_name, results_d
|
234
|
+
|
235
|
+
def results(self):
|
236
|
+
for results_d in self.results_dict.values():
|
237
|
+
yield results_d
|
238
|
+
|
239
|
+
|
195
240
|
def update_results_dict(
|
196
241
|
old_results: dict,
|
197
242
|
new_results: dict,
|
198
243
|
old_duplicate_key_prefix: str = None,
|
199
244
|
new_duplicate_key_prefix: str = None,
|
200
245
|
):
|
201
|
-
|
202
|
-
|
203
|
-
|
246
|
+
if old_results is None:
|
247
|
+
return new_results
|
248
|
+
elif new_results is None:
|
204
249
|
return old_results
|
205
250
|
else:
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
elif old_duplicate_key_prefix == new_duplicate_key_prefix:
|
211
|
-
raise ValueError(
|
212
|
-
"old_duplicate_key_prefix and new_duplicate_key_prefix cannot be identical"
|
213
|
-
)
|
251
|
+
duplicate_keys = list(set(old_results.keys()) & set(new_results.keys()))
|
252
|
+
if len(duplicate_keys) == 0:
|
253
|
+
old_results.update(new_results)
|
254
|
+
return old_results
|
214
255
|
else:
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
)
|
224
|
-
else:
|
225
|
-
combined_dict.update({key: value})
|
226
|
-
if new_duplicate_key_prefix is None:
|
227
|
-
combined_dict.update(new_results)
|
256
|
+
if old_duplicate_key_prefix is None and new_duplicate_key_prefix is None:
|
257
|
+
raise ValueError(
|
258
|
+
"Provided dictionaries contain duplicate keys. Old_duplicate_key_prefix and/or new_duplicate_key_prefix must be provided"
|
259
|
+
)
|
260
|
+
elif old_duplicate_key_prefix == new_duplicate_key_prefix:
|
261
|
+
raise ValueError(
|
262
|
+
"old_duplicate_key_prefix and new_duplicate_key_prefix cannot be identical"
|
263
|
+
)
|
228
264
|
else:
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
265
|
+
combined_dict = {}
|
266
|
+
if old_duplicate_key_prefix is None:
|
267
|
+
combined_dict.update(old_results)
|
268
|
+
else:
|
269
|
+
for key, value in old_results.items():
|
270
|
+
if key in duplicate_keys:
|
271
|
+
combined_dict.update(
|
272
|
+
{f"{old_duplicate_key_prefix}{key}": value}
|
273
|
+
)
|
274
|
+
else:
|
275
|
+
combined_dict.update({key: value})
|
276
|
+
if new_duplicate_key_prefix is None:
|
277
|
+
combined_dict.update(new_results)
|
278
|
+
else:
|
279
|
+
for key, value in new_results.items():
|
280
|
+
if key in duplicate_keys:
|
281
|
+
combined_dict.update(
|
282
|
+
{f"{new_duplicate_key_prefix}{key}": value}
|
283
|
+
)
|
284
|
+
else:
|
285
|
+
combined_dict.update({key: value})
|
286
|
+
return combined_dict
|
237
287
|
|
238
288
|
|
239
289
|
def print_results_dict_to_tsv(
|
{ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/METADATA
RENAMED
@@ -1,9 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ssi_analysis_result_parsers
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: TODO
|
5
5
|
Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
|
6
|
-
Author:
|
6
|
+
Author: Thor Bech Johannesen
|
7
7
|
Author-email: thej@ssi.dk
|
8
8
|
License: MIT License
|
9
9
|
Keywords: nbdev jupyter notebook python
|
@@ -0,0 +1,27 @@
|
|
1
|
+
ssi_analysis_result_parsers/Legionella_parser.py,sha256=zBsSYfuUj2uN_LRqgTpOwhFUQF2jTLNvVzKbh6OPNxE,8878
|
2
|
+
ssi_analysis_result_parsers/__init__.py,sha256=QvlVh4JTl3JL7jQAja76yKtT-IvF4631ASjWY1wS6AQ,22
|
3
|
+
ssi_analysis_result_parsers/_modidx.py,sha256=JY_GM0tMojzTtX9O4D8as4k5a-sXqkxkb7ZUEPzhuMk,12232
|
4
|
+
ssi_analysis_result_parsers/blast_parser.py,sha256=L7EdW2LUwSS2OQO7WZUAxP6whJXdvTILE2a3O59uv-s,7441
|
5
|
+
ssi_analysis_result_parsers/core.py,sha256=6TGURv8spPdBpwKv6LvqvbVzJChdeHwsG3WQ6QLUuvE,12124
|
6
|
+
ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
|
7
|
+
ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
|
8
|
+
ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
|
9
|
+
ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
|
10
|
+
ssi_analysis_result_parsers-0.0.3.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
|
11
|
+
test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
|
12
|
+
test_input/Legionella/batch_parser_file_paths.tsv,sha256=zls11lmEA5U89d8RsX6PR8M1zXNVimeL4raqdZ3ijvQ,210
|
13
|
+
test_input/Legionella/lag-1_blast.tsv,sha256=MN5QL_iBn9gQ8VTYEcTnT0JwKgpkD8G15-QFOrSWxkU,1133
|
14
|
+
test_input/Legionella/lag-1_blast_2.tsv,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
test_input/Legionella/test.sbt.tsv,sha256=ibhaH3is2dxHaABPvR2QM2HAq9bKOs1AwOTmrwSrcd8,168
|
16
|
+
test_input/Legionella/test2.sbt.tsv,sha256=uJyVGHKXPmnvaXSt_84_buATOyl79H6vZjkWRitca9k,170
|
17
|
+
test_input/blast_parser/allele_matches_test.tsv,sha256=7vfQAOxz3fKc84HtxN9eoCyQoF9G8MFd-GKH3Krw_Cs,233035
|
18
|
+
test_input/blast_parser/empty_gene_presence_absense_test.tsv,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDhnGAuuBXAlIq4e_IW0rrNVn2yA,1447
|
20
|
+
test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
|
21
|
+
test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
22
|
+
test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
23
|
+
ssi_analysis_result_parsers-0.0.3.dist-info/METADATA,sha256=LPIINEBm-fhPx_sG1lS1xLf2ZIWtHXU97hFFW2M58fE,2765
|
24
|
+
ssi_analysis_result_parsers-0.0.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
25
|
+
ssi_analysis_result_parsers-0.0.3.dist-info/entry_points.txt,sha256=eG2NzlNDoG__0PPHl3eoKK5EXIz02BGhRX-L2aWgKCY,447
|
26
|
+
ssi_analysis_result_parsers-0.0.3.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
|
27
|
+
ssi_analysis_result_parsers-0.0.3.dist-info/RECORD,,
|
@@ -1,7 +1,8 @@
|
|
1
1
|
[console_scripts]
|
2
2
|
blast_parser_allele_matches = ssi_analysis_result_parsers.blast_parser:allele_matches
|
3
3
|
blast_parser_presence_absence = ssi_analysis_result_parsers.blast_parser:presence_absence
|
4
|
-
|
4
|
+
get_leg_results = ssi_analysis_result_parsers.Legionella_parser:legionella_parser
|
5
|
+
get_leg_results_batch = ssi_analysis_result_parsers.Legionella_parser:legionella_batch_parser
|
5
6
|
|
6
7
|
[nbdev]
|
7
8
|
ssi_analysis_result_parsers = ssi_analysis_result_parsers._modidx:d
|
File without changes
|
File without changes
|
test_output/test.tsv
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
sample_name ST flaA pilE asd mip mompS proA neuA notes lag-1
|
2
|
+
sample_1 23 2 3 9 10 2 1 6 Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call 1
|
3
|
+
sample_2 182 3 4 1 3 35 9 11 Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call 0
|
@@ -0,0 +1,3 @@
|
|
1
|
+
sample_name ST flaA pilE asd mip mompS proA neuA notes lag-1
|
2
|
+
sample_1 23 2 3 9 10 2 1 6 Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call 1
|
3
|
+
sample_2 182 3 4 1 3 35 9 11 Exact ST match, Heterozygous mompS alleles, High confidence mompS allele call 0
|
@@ -1,21 +0,0 @@
|
|
1
|
-
ssi_analysis_result_parsers/Legionella_parser.py,sha256=cDaBYC6UjuXLI1GGSihY6zlVFBcqYDhmhFZ5xfchSSQ,3279
|
2
|
-
ssi_analysis_result_parsers/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
|
3
|
-
ssi_analysis_result_parsers/_modidx.py,sha256=Kojwrr-9jPtuKhxTDhjhs5cn1FBm_A-U9J1AND_eujw,5159
|
4
|
-
ssi_analysis_result_parsers/blast_parser.py,sha256=Yam04LnF2-eECalVGyyq7DDq3ZYRqrQ6I8dX0Z0V8-w,7389
|
5
|
-
ssi_analysis_result_parsers/core.py,sha256=6j6mQQws-mcR2Ef7KY_9F8jenrT9a_DtGYVH-9D9rzw,10335
|
6
|
-
ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
|
7
|
-
ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
|
8
|
-
ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
|
9
|
-
ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
|
10
|
-
ssi_analysis_result_parsers-0.0.1.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
|
11
|
-
test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
|
12
|
-
test_input/Legionella/lag-1_blast.tsv,sha256=MN5QL_iBn9gQ8VTYEcTnT0JwKgpkD8G15-QFOrSWxkU,1133
|
13
|
-
test_input/Legionella/test.sbt.tsv,sha256=ibhaH3is2dxHaABPvR2QM2HAq9bKOs1AwOTmrwSrcd8,168
|
14
|
-
test_input/blast_parser/allele_matches_test.tsv,sha256=7vfQAOxz3fKc84HtxN9eoCyQoF9G8MFd-GKH3Krw_Cs,233035
|
15
|
-
test_input/blast_parser/gene_presence_absence_test.tsv,sha256=qCvMkBC-1GuXx83RDhnGAuuBXAlIq4e_IW0rrNVn2yA,1447
|
16
|
-
test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9gw08OLdEdUo,41
|
17
|
-
ssi_analysis_result_parsers-0.0.1.dist-info/METADATA,sha256=lLlQweqBO3vbnxiQdvfIXyOJtDZjJ5Xv4D4fA4lL8Pw,2753
|
18
|
-
ssi_analysis_result_parsers-0.0.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
19
|
-
ssi_analysis_result_parsers-0.0.1.dist-info/entry_points.txt,sha256=nR2p-a6E4W1Xg7oR-f4OXXcnR7ohtT64ZdyaCPG7N7k,355
|
20
|
-
ssi_analysis_result_parsers-0.0.1.dist-info/top_level.txt,sha256=3q56bBc2Wv2a6ZQ1l_9m66vot2-Qu6tM9tDr3QQ8auM,81
|
21
|
-
ssi_analysis_result_parsers-0.0.1.dist-info/RECORD,,
|
{ssi_analysis_result_parsers-0.0.1.dist-info → ssi_analysis_result_parsers-0.0.3.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|