ssi-analysis-result-parsers 0.0.7__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {ssi_analysis_result_parsers-0.0.7/ssi_analysis_result_parsers.egg-info → ssi_analysis_result_parsers-0.0.9}/PKG-INFO +1 -1
  2. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/settings.ini +1 -1
  3. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/Spyogenes_parser.py +111 -72
  4. ssi_analysis_result_parsers-0.0.9/ssi_analysis_result_parsers/__init__.py +1 -0
  5. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/blast_parser.py +33 -15
  6. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9/ssi_analysis_result_parsers.egg-info}/PKG-INFO +1 -1
  7. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers.egg-info/SOURCES.txt +2 -0
  8. ssi_analysis_result_parsers-0.0.9/test_input/Spyogenes/batch_parser_file_paths.tsv +10 -0
  9. ssi_analysis_result_parsers-0.0.9/test_input/Spyogenes/emm_typing/test6.emm.blast.tsv +45 -0
  10. ssi_analysis_result_parsers-0.0.9/test_input/Spyogenes/emm_typing/test7.emm.blast.tsv +40 -0
  11. ssi_analysis_result_parsers-0.0.7/ssi_analysis_result_parsers/__init__.py +0 -1
  12. ssi_analysis_result_parsers-0.0.7/test_input/Spyogenes/batch_parser_file_paths.tsv +0 -5
  13. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/LICENSE +0 -0
  14. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/MANIFEST.in +0 -0
  15. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/README.md +0 -0
  16. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/pyproject.toml +0 -0
  17. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/setup.cfg +0 -0
  18. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/setup.py +0 -0
  19. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/Legionella_parser.py +0 -0
  20. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/_modidx.py +0 -0
  21. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/config/config.default.env +0 -0
  22. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/config/config.default.yaml +0 -0
  23. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/core.py +0 -0
  24. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/hello_world.py +0 -0
  25. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers/some_string.py +0 -0
  26. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers.egg-info/dependency_links.txt +0 -0
  27. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers.egg-info/entry_points.txt +0 -0
  28. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers.egg-info/not-zip-safe +0 -0
  29. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers.egg-info/requires.txt +0 -0
  30. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/ssi_analysis_result_parsers.egg-info/top_level.txt +0 -0
  31. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/.DS_Store +0 -0
  32. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Legionella/batch_parser_file_paths.tsv +0 -0
  33. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Legionella/lag-1_blast.tsv +0 -0
  34. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Legionella/lag-1_blast_2.tsv +0 -0
  35. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Legionella/test.sbt.tsv +0 -0
  36. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Legionella/test2.sbt.tsv +0 -0
  37. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/Mga.fasta +0 -0
  38. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/emm_clusters.txt +0 -0
  39. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test1.emm.blast.tsv +0 -0
  40. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test1.mga.blast.tsv +0 -0
  41. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test2.emm.blast.tsv +0 -0
  42. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test2.mga.blast.tsv +0 -0
  43. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test3.emm.blast.tsv +0 -0
  44. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test4.emm.blast.tsv +0 -0
  45. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/Spyogenes/emm_typing/test5.emm.blast.tsv +0 -0
  46. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/blast_parser/allele_matches_test.tsv +0 -0
  47. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/blast_parser/empty_gene_presence_absense_test.tsv +0 -0
  48. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/blast_parser/gene_presence_absence_test.tsv +0 -0
  49. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_input/empty_file.txt +0 -0
  50. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_output/output_with_sample_name.tsv +0 -0
  51. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_output/test.tsv +0 -0
  52. {ssi_analysis_result_parsers-0.0.7 → ssi_analysis_result_parsers-0.0.9}/test_output/test_batch_output.tsv +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -5,7 +5,7 @@
5
5
  ### Python library ###
6
6
  repo = ssi_analysis_result_parsers
7
7
  lib_name = %(repo)s
8
- version = 0.0.7
8
+ version = 0.0.9
9
9
  min_python = 3.9
10
10
  license = MIT
11
11
  black_formatting = True
@@ -56,7 +56,14 @@ def extract_emm_type(emm_blast_tsv: Path):
56
56
  """
57
57
 
58
58
  emm_types_in_emm_plus_mrp_operons = [] ### to update
59
- mrp_types_in_emm_plus_mrp_operons = ["156"] ### to update
59
+ mrp_types_in_emm_plus_mrp_operons = [
60
+ "134",
61
+ "156",
62
+ "159",
63
+ "164",
64
+ "174",
65
+ "205",
66
+ ] ### to update
60
67
  emm_blast_tsv = Path(emm_blast_tsv)
61
68
  emm_typing_results = {"EMM_type": "-", "ENN_type": "-", "MRP_type": "-"}
62
69
  if not emm_blast_tsv.exists():
@@ -85,87 +92,119 @@ def extract_emm_type(emm_blast_tsv: Path):
85
92
  .groupby("extended_sstart")
86
93
  .first()
87
94
  )
88
- if blast_df_unique.shape[0] == 1:
89
- emm_typing_results["EMM_type"] = blast_df_unique.iloc[0]["qseqid"][3:]
90
- if (
91
- blast_df_unique.iloc[0]["length"] < 180
92
- or blast_df_unique.iloc[0]["pident"] < 100
93
- ):
94
- emm_typing_results["EMM_type"] += "*"
95
- notes.append(
96
- f"EMM{blast_df_unique.iloc[0]['qseqid'][3:]} with {round(blast_df_unique.iloc[0]['pident'],2)} and length {blast_df_unique.iloc[0]['length']}/{blast_df_unique.iloc[0]['qlen']}"
97
- )
98
- else:
99
- if blast_df_unique.iloc[0]["sstart"] < blast_df_unique.iloc[0]["send"]:
100
- blast_df_unique = blast_df_unique.sort_values(by=["sstart"], ascending=True)
101
- else:
102
- blast_df_unique = blast_df_unique.sort_values(
103
- by=["sstart"], ascending=False
95
+
96
+ if blast_df_unique.shape[0] == 0:
97
+ notes.append("No blast hits found for EMM genes")
98
+ elif len(set(blast_df_unique["sseqid"])) == 1:
99
+ if blast_df_unique.shape[0] == 1:
100
+ emm_typing_results["EMM_type"] = (
101
+ "EMM" + blast_df_unique.iloc[0]["qseqid"][3:]
104
102
  )
105
- if blast_df_unique.shape[0] == 2:
106
- emm_typing_results["EMM_type"] = blast_df_unique.iloc[0]["qseqid"][3:]
107
103
  if (
108
- blast_df_unique.iloc[0]["length"] < 180
104
+ blast_df_unique.iloc[0]["length"] < blast_df_unique.iloc[0]["qlen"]
109
105
  or blast_df_unique.iloc[0]["pident"] < 100
110
106
  ):
111
107
  emm_typing_results["EMM_type"] += "*"
112
108
  notes.append(
113
- f"EMM{blast_df_unique.iloc[0]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[0]['pident'],2)} and length {blast_df_unique.iloc[0]['length']}/{blast_df_unique.iloc[0]['qlen']}"
109
+ f"EMM{blast_df_unique.iloc[0]['qseqid'][3:]} with {round(blast_df_unique.iloc[0]['pident'],2)} and length {blast_df_unique.iloc[0]['length']}/{blast_df_unique.iloc[0]['qlen']}"
110
+ )
111
+ else:
112
+ if blast_df_unique.iloc[0]["sstart"] < blast_df_unique.iloc[0]["send"]:
113
+ blast_df_unique = blast_df_unique.sort_values(
114
+ by=["sstart"], ascending=True
114
115
  )
116
+ else:
117
+ blast_df_unique = blast_df_unique.sort_values(
118
+ by=["sstart"], ascending=False
119
+ )
120
+ if blast_df_unique.shape[0] == 2:
121
+ emm_typing_results["EMM_type"] = (
122
+ "EMM" + blast_df_unique.iloc[0]["qseqid"][3:]
123
+ )
124
+ if (
125
+ blast_df_unique.iloc[0]["length"] < blast_df_unique.iloc[0]["qlen"]
126
+ or blast_df_unique.iloc[0]["pident"] < 100
127
+ ):
128
+ emm_typing_results["EMM_type"] += "*"
129
+ notes.append(
130
+ f"EMM{blast_df_unique.iloc[0]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[0]['pident'],2)} and length {blast_df_unique.iloc[0]['length']}/{blast_df_unique.iloc[0]['qlen']}"
131
+ )
115
132
 
116
- emm_typing_results["ENN_type"] = blast_df_unique.iloc[1]["qseqid"][3:]
117
- if (
118
- blast_df_unique.iloc[1]["length"] < 180
119
- or blast_df_unique.iloc[1]["pident"] < 100
120
- ):
121
- emm_typing_results["ENN_type"] += "*"
122
- notes.append(
123
- f"ENN{blast_df_unique.iloc[1]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[1]['pident'],2)} and length {blast_df_unique.iloc[1]['length']}/{blast_df_unique.iloc[1]['qlen']}"
133
+ emm_typing_results["ENN_type"] = (
134
+ "ENN" + blast_df_unique.iloc[1]["qseqid"][3:]
124
135
  )
136
+ if (
137
+ blast_df_unique.iloc[1]["length"] < blast_df_unique.iloc[1]["qlen"]
138
+ or blast_df_unique.iloc[1]["pident"] < 100
139
+ ):
140
+ emm_typing_results["ENN_type"] += "*"
141
+ notes.append(
142
+ f"ENN{blast_df_unique.iloc[1]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[1]['pident'],2)} and length {blast_df_unique.iloc[1]['length']}/{blast_df_unique.iloc[1]['qlen']}"
143
+ )
144
+ emm_maintype = blast_df_unique.iloc[0]["qseqid"][3:].split(".")[0]
145
+ mrp_maintype = blast_df_unique.iloc[1]["qseqid"][3:].split(".")[0]
146
+ if (
147
+ mrp_maintype in emm_types_in_emm_plus_mrp_operons
148
+ or emm_maintype in mrp_types_in_emm_plus_mrp_operons
149
+ ):
150
+ emm_typing_results["MRP_type"] = (
151
+ "MRP" + emm_typing_results["EMM_type"][3:]
152
+ )
153
+ emm_typing_results["EMM_type"] = (
154
+ "EMM" + emm_typing_results["ENN_type"][3:]
155
+ )
156
+ emm_typing_results["ENN_type"] = "-"
157
+ notes.append(f"EMM redesignated due to known MRP+EMM operon")
125
158
 
126
- emm_maintype = blast_df_unique.iloc[0]["qseqid"][3:].split(".")[0]
127
- mrp_maintype = blast_df_unique.iloc[1]["qseqid"][3:].split(".")[0]
128
- if (
129
- mrp_maintype in emm_types_in_emm_plus_mrp_operons
130
- or emm_maintype in mrp_types_in_emm_plus_mrp_operons
131
- ):
132
- emm_typing_results["MRP_type"] = emm_typing_results["EMM_type"]
133
- emm_typing_results["EMM_type"] = emm_typing_results["ENN_type"]
134
- emm_typing_results["ENN_type"] = "-"
135
- notes.append(f"EMM redesignated due to known MRP+EMM operon")
136
-
137
- elif blast_df_unique.shape[0] == 3:
138
- emm_typing_results["MRP_type"] = blast_df_unique.iloc[0]["qseqid"][3:]
139
- if (
140
- blast_df_unique.iloc[0]["length"] < 180
141
- or blast_df_unique.iloc[0]["pident"] < 100
142
- ):
143
- emm_typing_results["MRP_type"] += "*"
144
- notes.append(
145
- f"MRP{blast_df_unique.iloc[0]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[0]['pident'],2)} and length {blast_df_unique.iloc[0]['length']}/{blast_df_unique.iloc[0]['qlen']}"
159
+ elif blast_df_unique.shape[0] == 3:
160
+ emm_typing_results["MRP_type"] = (
161
+ "MRP" + blast_df_unique.iloc[0]["qseqid"][3:]
146
162
  )
163
+ if (
164
+ blast_df_unique.iloc[0]["length"] < blast_df_unique.iloc[0]["qlen"]
165
+ or blast_df_unique.iloc[0]["pident"] < 100
166
+ ):
167
+ emm_typing_results["MRP_type"] += "*"
168
+ notes.append(
169
+ f"MRP{blast_df_unique.iloc[0]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[0]['pident'],2)} and length {blast_df_unique.iloc[0]['length']}/{blast_df_unique.iloc[0]['qlen']}"
170
+ )
147
171
 
148
- emm_typing_results["EMM_type"] = blast_df_unique.iloc[1]["qseqid"][3:]
149
- if (
150
- blast_df_unique.iloc[1]["length"] < 180
151
- or blast_df_unique.iloc[1]["pident"] < 100
152
- ):
153
- emm_typing_results["EMM_type"] += "*"
154
- notes.append(
155
- f"EMM{blast_df_unique.iloc[1]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[1]['pident'],2)} and length {blast_df_unique.iloc[1]['length']}/{blast_df_unique.iloc[1]['qlen']}"
172
+ emm_typing_results["EMM_type"] = (
173
+ "EMM" + blast_df_unique.iloc[1]["qseqid"][3:]
156
174
  )
175
+ if (
176
+ blast_df_unique.iloc[1]["length"] < blast_df_unique.iloc[1]["qlen"]
177
+ or blast_df_unique.iloc[1]["pident"] < 100
178
+ ):
179
+ emm_typing_results["EMM_type"] += "*"
180
+ notes.append(
181
+ f"EMM{blast_df_unique.iloc[1]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[1]['pident'],2)} and length {blast_df_unique.iloc[1]['length']}/{blast_df_unique.iloc[1]['qlen']}"
182
+ )
157
183
 
158
- emm_typing_results["ENN_type"] = blast_df_unique.iloc[2]["qseqid"][3:]
159
- if (
160
- blast_df_unique.iloc[2]["length"] < 180
161
- or blast_df_unique.iloc[2]["pident"] < 100
162
- ):
163
- emm_typing_results["ENN_type"] += "*"
164
- notes.append(
165
- f"ENN{blast_df_unique.iloc[2]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[2]['pident'],2)} and length {blast_df_unique.iloc[2]['length']}/{blast_df_unique.iloc[2]['qlen']}"
184
+ emm_typing_results["ENN_type"] = (
185
+ "ENN" + blast_df_unique.iloc[2]["qseqid"][3:]
166
186
  )
167
- elif blast_df_unique.shape[0] == 0:
168
- notes.append("No blast hits found for EMM genes")
187
+ if (
188
+ blast_df_unique.iloc[2]["length"] < blast_df_unique.iloc[2]["qlen"]
189
+ or blast_df_unique.iloc[2]["pident"] < 100
190
+ ):
191
+ emm_typing_results["ENN_type"] += "*"
192
+ notes.append(
193
+ f"ENN{blast_df_unique.iloc[2]['qseqid'][3:]} with pident {round(blast_df_unique.iloc[2]['pident'],2)} and length {blast_df_unique.iloc[2]['length']}/{blast_df_unique.iloc[2]['qlen']}"
194
+ )
195
+ else:
196
+ note_to_add = "EMM and EMM-like genes found on multiple contigs"
197
+ emm_genes = []
198
+ for index, row in blast_df_unique.iterrows():
199
+ if row["length"] < row["qlen"] or row["pident"] < 100:
200
+ emm_genes.append(row["qseqid"][3:] + "*")
201
+ else:
202
+ emm_genes.append(row["qseqid"][3:])
203
+ notes.append(
204
+ "EMM and EMM-like genes found on multiple contigs. Alleles found: "
205
+ + "/".join(emm_genes)
206
+ )
207
+
169
208
  emm_typing_results["emm_typing_notes"] = ", ".join(notes)
170
209
  return emm_typing_results
171
210
 
@@ -176,7 +215,7 @@ class SpyogenesResults(core.PipelineResults):
176
215
  def from_tool_paths(cls, emm_blast_tsv: Path, sample_name=None):
177
216
  """
178
217
  Alternative constructor for initializing results for single sample,
179
- Initializes LegionellaResults instance provided paths to outputs from tools (legionella sbt and lag1 presence blast)
218
+ Initializes SpyogenesResults instance provided paths to outputs from tools (legionella sbt and lag1 presence blast)
180
219
  """
181
220
  gas_results = cls.summary(emm_blast_tsv=emm_blast_tsv)
182
221
  return cls({sample_name: gas_results})
@@ -185,7 +224,7 @@ class SpyogenesResults(core.PipelineResults):
185
224
  def from_tool_paths_dict(cls, file_paths: dict):
186
225
  """
187
226
  Alternative constructor for initializing results for multiple samples,
188
- Initializes LegionellaResults instance by providing a dictionary of paths to outputs from tools (legionella sbt and lag1 presence blast)
227
+ Initializes SpyogenesResults instance by providing a dictionary of paths to outputs from tools (legionella sbt and lag1 presence blast)
189
228
  """
190
229
  results_dict = {}
191
230
  for sample_name, path_dict in file_paths.items():
@@ -197,7 +236,7 @@ class SpyogenesResults(core.PipelineResults):
197
236
  def from_tool_paths_dataframe(cls, file_paths_df: pandas.DataFrame):
198
237
  """
199
238
  Alternative constructor for initializing results for multiple samples,
200
- Initializes LegionellaResults instance by providing a DataFrame of paths to outputs from tools (legionella sbt and lag1 presence blast)
239
+ Initializes SpyogenesResults instance by providing a DataFrame of paths to outputs from tools (legionella sbt and lag1 presence blast)
201
240
  """
202
241
  file_paths = file_paths_df.to_dict(orient="index")
203
242
  results_dict = {}
@@ -210,7 +249,7 @@ class SpyogenesResults(core.PipelineResults):
210
249
  def from_tool_paths_tsv(cls, tool_paths_tsv: Path):
211
250
  """
212
251
  Alternative constructor for initializing results for multiple samples,
213
- Initializes LegionellaResults instance by providing a tsv-file with paths to outputs from tools (legionella sbt and lag1 presence blast)
252
+ Initializes SpyogenesResults instance by providing a tsv-file with paths to outputs from tools (legionella sbt and lag1 presence blast)
214
253
  """
215
254
  file_paths_df = pandas.read_csv(tool_paths_tsv, sep="\t")
216
255
  file_paths_df.set_index("sample_name", inplace=True, drop=True)
@@ -226,7 +265,7 @@ class SpyogenesResults(core.PipelineResults):
226
265
  return results_dict
227
266
 
228
267
  def __repr__(self):
229
- return f"< Legionella analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
268
+ return f"< Spyogenes analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables > "
230
269
 
231
270
  # %% ../nbs/31_Spyogenes_parser.ipynb 9
232
271
  @call_parse
@@ -0,0 +1 @@
1
+ __version__ = "0.0.9"
@@ -74,8 +74,8 @@ def extract_presence_absence(
74
74
  except pandas.errors.EmptyDataError:
75
75
  blast_dict = {}
76
76
  print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
77
- except Exception as e:
78
- print(f"Error parsing blast: e")
77
+ # except Exception as e:
78
+ # print(f"Error parsing blast: e")
79
79
  if hits_as_string:
80
80
 
81
81
  results = []
@@ -105,6 +105,7 @@ def extract_presence_absence(
105
105
 
106
106
  else:
107
107
  print(f"No blast output found at {blast_output_tsv}", file=sys.stderr)
108
+ return None
108
109
 
109
110
 
110
111
  def extract_allele_matches(
@@ -122,21 +123,38 @@ def extract_allele_matches(
122
123
  allele_dict = {}
123
124
  detailed_dict = {}
124
125
  if os.path.exists(blast_output_tsv):
125
- blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
126
- blast_df.columns = tsv_header.split(" ")
127
- blast_df.set_index("qseqid", drop=False)
128
- blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
129
- blast_df[["gene", "allele"]] = blast_df["qseqid"].str.split("_", expand=True)
130
- blast_df_unique = (
131
- blast_df.sort_values(by=["bitscore"], ascending=False)
132
- .groupby("gene")
133
- .first()
134
- )
135
- for gene, d in blast_df_unique.to_dict(orient="index").items():
136
- allele_dict[gene] = d["allele"]
137
- detailed_dict[gene] = f"{d['allele']}__{d['pident']}__{d['plen']}"
126
+ try:
127
+ blast_df = pandas.read_csv(blast_output_tsv, sep="\t", header=None)
128
+ header_list = tsv_header.split(" ")
129
+ if len(header_list) == len(blast_df.columns):
130
+ blast_df.columns = tsv_header.split(" ")
131
+ blast_df.set_index("qseqid", drop=False)
132
+ blast_df["plen"] = blast_df["length"] / blast_df["qlen"] * 100
133
+ blast_df[["gene", "allele"]] = blast_df["qseqid"].str.split(
134
+ "_", expand=True
135
+ )
136
+ blast_df_unique = (
137
+ blast_df.sort_values(by=["bitscore"], ascending=False)
138
+ .groupby("gene")
139
+ .first()
140
+ )
141
+ for gene, d in blast_df_unique.to_dict(orient="index").items():
142
+ allele_dict[gene] = d["allele"]
143
+ detailed_dict[gene] = f"{d['allele']}__{d['pident']}__{d['plen']}"
144
+ else:
145
+ print(
146
+ f"Failed to parse {blast_output_tsv}. Number of columns do not match length of provided header string",
147
+ file=sys.stderr,
148
+ )
149
+ return None
150
+
151
+ except pandas.errors.EmptyDataError:
152
+ detailed_dict = {}
153
+ allele_dict = {}
154
+ print(f"Blast output file {blast_output_tsv} empty. Assuming 0 blast hits.")
138
155
  else:
139
156
  print(f"No blast output found at {blast_output_tsv}", file=sys.stderr)
157
+ return None
140
158
 
141
159
  if include_match_stats:
142
160
  return detailed_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ssi_analysis_result_parsers
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: TODO
5
5
  Home-page: https://github.com/thej-ssi/ssi_analysis_result_parsers
6
6
  Author: Thor Bech Johannesen
@@ -38,6 +38,8 @@ test_input/Spyogenes/emm_typing/test2.mga.blast.tsv
38
38
  test_input/Spyogenes/emm_typing/test3.emm.blast.tsv
39
39
  test_input/Spyogenes/emm_typing/test4.emm.blast.tsv
40
40
  test_input/Spyogenes/emm_typing/test5.emm.blast.tsv
41
+ test_input/Spyogenes/emm_typing/test6.emm.blast.tsv
42
+ test_input/Spyogenes/emm_typing/test7.emm.blast.tsv
41
43
  test_input/blast_parser/allele_matches_test.tsv
42
44
  test_input/blast_parser/empty_gene_presence_absense_test.tsv
43
45
  test_input/blast_parser/gene_presence_absence_test.tsv
@@ -0,0 +1,10 @@
1
+ sample_name emm_results
2
+ sample_1 test_input/Spyogenes/emm_typing/test1.emm.blast.tsv
3
+ sample_2 test_input/Spyogenes/emm_typing/test2.emm.blast.tsv
4
+ sample_3 test_input/Spyogenes/emm_typing/test3.emm.blast.tsv
5
+ sample_4 test_input/Spyogenes/emm_typing/test4.emm.blast.tsv
6
+ sample_5 test_input/Spyogenes/emm_typing/test5.emm.blast.tsv
7
+ sample_6 test_input/Spyogenes/emm_typing/test6.emm.blast.tsv
8
+ sample_7 test_input/Spyogenes/emm_typing/test7.emm.blast.tsv
9
+ sample_empty test_input/empty_file.txt
10
+ sample_nonexist test_input/files_that_does_not_exist.tsv
@@ -0,0 +1,45 @@
1
+ EMM156.0 GAS-2025-0367_2_61.3538 85.556 180 180 1 180 200941 201108 GGATTAGCAAACACAACTGATGTAAAGGCTGAGA--C-T-G--AGCATG---TTGATGTTGTGCTT-T-C-AGCAAAAGAAGCTAACAAAGTATTCGAAGAGCGCAAAGCCTTGGAAAAACAAGCGCGTGATTTGGGTGACACTATTAATCACATGTCACAAACCATTAGTGAGCAAAGC 5.79e-46 178
2
+ EMM156.1 GAS-2025-0367_2_61.3538 85.000 180 180 1 180 200941 201108 GGATTAGCAAACACAACTGATGTAAAGGCTGAGA--C-T-G--AGCA--TG-TTGAT-GT-TG-TGCTTTCAGCAAAAGAAGCTAACAAAGTATTCGAAGAGCGCAAAGCCTTGGAAAAACAAGCGCGTGATTTGGGTGACACTATTAATCACATGTCACAAACCATTAGTGAGCAAAGC 2.69e-44 172
3
+ EMM156.2 GAS-2025-0367_2_61.3538 86.111 180 180 1 180 200941 201108 GGATTAGCAAACACAACTGATGTAAAGGCTGAGA--C-T-G--AGCA--TG-TTGAT-GT-TG-TGCTTTCAGCAAAAGAAGCTAACAAAGTATTCGAAGAGCGCAAAGCCTTGGAAAAACAAGCGCGTGATTTGGGTGACACTATTAATCACATGTCACAAACCATTAGTGAGCAAAGC 1.24e-47 183
4
+ EMM156.3 GAS-2025-0367_2_61.3538 85.556 180 180 1 180 200941 201108 GGATTAGCAAACACAACTGATGTAAAGGCTGAGA--C-T-G--AGCA--TG-TTGAT-GT-TG-TGCTTTCAGCAAAAGAAGCTAACAAAGTATTCGAAGAGCGCAAAGCCTTGGAAAAACAAGCGCGTGATTTGGGTGACACTATTAATCACATGTCACAAACCATTAGTGAGCAAAGC 5.79e-46 178
5
+ EMM156.4 GAS-2025-0367_2_61.3538 85.000 180 180 1 180 200941 201108 GGATTAGCAAACACAACTGATGTAAAGGCTGAGA--C-T-G--AGCA--TG-TTGAT-GT-TG-TGCTTTCAGCAAAAGAAGCTAACAAAGTATTCGAAGAGCGCAAAGCCTTGGAAAAACAAGCGCGTGATTTGGGTGACACTATTAATCACATGTCACAAACCATTAGTGAGCAAAGC 2.69e-44 172
6
+ EMM203.3 GAS-2025-0367_2_61.3538 96.111 180 180 1 180 203502 203681 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 5.51e-81 294
7
+ EMM203.4 GAS-2025-0367_2_61.3538 96.667 180 180 1 180 203502 203681 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 1.18e-82 300
8
+ EMM203.5 GAS-2025-0367_2_61.3538 93.333 180 180 1 180 203502 203681 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 1.20e-72 267
9
+ EMM236.0 GAS-2025-0367_2_61.3538 93.333 180 180 1 180 203502 203681 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 1.20e-72 267
10
+ EMM236.1 GAS-2025-0367_2_61.3538 93.889 180 180 1 180 203502 203681 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 2.58e-74 272
11
+ EMM236.3 GAS-2025-0367_2_61.3538 93.889 180 180 1 180 203502 203681 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 2.58e-74 272
12
+ EMM28.0 GAS-2025-0367_2_61.3538 100.000 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 1.17e-92 333
13
+ EMM28.1 GAS-2025-0367_2_61.3538 98.788 165 180 1 165 202215 202379 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAA 5.51e-81 294
14
+ EMM28.10 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
15
+ EMM28.11 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
16
+ EMM28.12 GAS-2025-0367_2_61.3538 98.333 180 180 1 180 202215 202391 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTA---AAGAAGAAGAACCTAGGTATAAA 1.52e-86 313
17
+ EMM28.13 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
18
+ EMM28.14 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
19
+ EMM28.15 GAS-2025-0367_2_61.3538 98.333 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 1.18e-87 316
20
+ EMM28.16 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
21
+ EMM28.17 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
22
+ EMM28.18 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
23
+ EMM28.19 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
24
+ EMM28.2 GAS-2025-0367_2_61.3538 99.390 164 180 1 164 202215 202378 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGA 4.26e-82 298
25
+ EMM28.20 GAS-2025-0367_2_61.3538 98.889 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 2.53e-89 322
26
+ EMM28.21 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
27
+ EMM28.22 GAS-2025-0367_2_61.3538 98.765 162 180 1 162 202215 202376 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAA 2.56e-79 289
28
+ EMM28.23 GAS-2025-0367_2_61.3538 98.889 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 2.53e-89 322
29
+ EMM28.24 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
30
+ EMM28.25 GAS-2025-0367_2_61.3538 100.000 175 180 1 175 202215 202389 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATA 7.02e-90 324
31
+ EMM28.26 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
32
+ EMM28.27 GAS-2025-0367_2_61.3538 98.889 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 2.53e-89 322
33
+ EMM28.28 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
34
+ EMM28.29 GAS-2025-0367_2_61.3538 98.361 183 180 1 180 202215 202397 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCATTG 3.27e-88 318
35
+ EMM28.3 GAS-2025-0367_2_61.3538 99.394 165 180 1 165 202215 202379 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAA 1.18e-82 300
36
+ EMM28.30 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
37
+ EMM28.31 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
38
+ EMM28.32 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
39
+ EMM28.33 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
40
+ EMM28.4 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
41
+ EMM28.5 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
42
+ EMM28.6 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
43
+ EMM28.7 GAS-2025-0367_2_61.3538 99.383 162 180 1 162 202215 202376 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAA 5.51e-81 294
44
+ EMM28.8 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
45
+ EMM28.9 GAS-2025-0367_2_61.3538 98.333 180 180 1 180 202215 202391 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCT---AAAGAAGAAGAACCTAGGTATAAA 1.52e-86 313
@@ -0,0 +1,40 @@
1
+ EMM203.3 GAS-2025-0367_5_42.2241 96.111 180 180 1 180 55602 55781 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 5.51e-81 294
2
+ EMM203.4 GAS-2025-0367_5_42.2241 96.667 180 180 1 180 55602 55781 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 1.18e-82 300
3
+ EMM203.5 GAS-2025-0367_5_42.2241 93.333 180 180 1 180 55602 55781 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 1.20e-72 267
4
+ EMM236.0 GAS-2025-0367_5_42.2241 93.333 180 180 1 180 55602 55781 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 1.20e-72 267
5
+ EMM236.1 GAS-2025-0367_5_42.2241 93.889 180 180 1 180 55602 55781 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 2.58e-74 272
6
+ EMM236.3 GAS-2025-0367_5_42.2241 93.889 180 180 1 180 55602 55781 GGTTTTGCAAACCAAACGGAAGTAAGAGCTGAAGGGGTAAACCCGACTACGAACTTGCCAGAGAAGGCTAAATATGCCGCAGTGAAAGATGAGAATACTGGTTTACGTGGTGATCAGAAAAAATTAGTAAAAAAACTTGAAGAAGAACAAGAGAAGAGCAAAAATCTAGAAAAGCAAAAA 2.58e-74 272
7
+ EMM28.0 GAS-2025-0367_2_61.3538 100.000 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 1.17e-92 333
8
+ EMM28.1 GAS-2025-0367_2_61.3538 98.788 165 180 1 165 202215 202379 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAA 5.51e-81 294
9
+ EMM28.10 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
10
+ EMM28.11 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
11
+ EMM28.12 GAS-2025-0367_2_61.3538 98.333 180 180 1 180 202215 202391 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTA---AAGAAGAAGAACCTAGGTATAAA 1.52e-86 313
12
+ EMM28.13 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
13
+ EMM28.14 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
14
+ EMM28.15 GAS-2025-0367_2_61.3538 98.333 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 1.18e-87 316
15
+ EMM28.16 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
16
+ EMM28.17 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
17
+ EMM28.18 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
18
+ EMM28.19 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
19
+ EMM28.2 GAS-2025-0367_2_61.3538 99.390 164 180 1 164 202215 202378 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGA 4.26e-82 298
20
+ EMM28.20 GAS-2025-0367_2_61.3538 98.889 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 2.53e-89 322
21
+ EMM28.21 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
22
+ EMM28.22 GAS-2025-0367_2_61.3538 98.765 162 180 1 162 202215 202376 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAA 2.56e-79 289
23
+ EMM28.23 GAS-2025-0367_2_61.3538 98.889 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 2.53e-89 322
24
+ EMM28.24 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
25
+ EMM28.25 GAS-2025-0367_2_61.3538 100.000 175 180 1 175 202215 202389 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATA 7.02e-90 324
26
+ EMM28.26 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
27
+ EMM28.27 GAS-2025-0367_2_61.3538 98.889 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 2.53e-89 322
28
+ EMM28.28 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
29
+ EMM28.29 GAS-2025-0367_2_61.3538 98.361 183 180 1 180 202215 202397 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCATTG 3.27e-88 318
30
+ EMM28.3 GAS-2025-0367_2_61.3538 99.394 165 180 1 165 202215 202379 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAA 1.18e-82 300
31
+ EMM28.30 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
32
+ EMM28.31 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
33
+ EMM28.32 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
34
+ EMM28.33 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
35
+ EMM28.4 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
36
+ EMM28.5 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
37
+ EMM28.6 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
38
+ EMM28.7 GAS-2025-0367_2_61.3538 99.383 162 180 1 162 202215 202376 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAA 5.51e-81 294
39
+ EMM28.8 GAS-2025-0367_2_61.3538 99.444 180 180 1 180 202215 202394 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCTAAAGAAGAAGAACCTAGGTATAAAGCA 5.43e-91 327
40
+ EMM28.9 GAS-2025-0367_2_61.3538 98.333 180 180 1 180 202215 202391 GGCTTTGCAAACCAAACAGAAGTTAAGGCTGCGGAGTCTCCAAAAAGTACTGAGACTTCTGCTAATGGAGCTGATAAATTAGCTGATGCATACAACACATTGCTTACTGAACATGAGAAACTCAGAGATGAGTATTATACATTAATTGATGCT---AAAGAAGAAGAACCTAGGTATAAA 1.52e-86 313
@@ -1 +0,0 @@
1
- __version__ = "0.0.7"
@@ -1,5 +0,0 @@
1
- sample_name emm_results
2
- sample_1 test_input/Spyogenes/emm_typing/test1.emm.blast.tsv
3
- sample_2 test_input/Spyogenes/emm_typing/test2.emm.blast.tsv
4
- sample_3 test_input/files_that_does_not_exist.tsv
5
- sample_4 test_input/empty_file.txt