ssi-analysis-result-parsers 0.0.14__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssi_analysis_result_parsers/Ecoli_parser.py +1 -168
- ssi_analysis_result_parsers/Hinfluenzae_parser.py +302 -0
- ssi_analysis_result_parsers/__init__.py +1 -1
- ssi_analysis_result_parsers/_modidx.py +24 -0
- {ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/METADATA +1 -1
- {ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/RECORD +17 -9
- {ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/entry_points.txt +2 -0
- test_input/Hinfluenzae/.DS_Store +0 -0
- test_input/Hinfluenzae/ariba_ftsI/ftsI_types_table.txt +12 -0
- test_input/Hinfluenzae/ariba_ftsI/sample1.ftsI.ariba.tsv +5 -0
- test_input/Hinfluenzae/ariba_ftsI/sample2.ftsI.ariba.tsv +4 -0
- test_input/Hinfluenzae/batch_parser_file_paths.tsv +5 -0
- test_input/Hinfluenzae/biotype/sample1.biotype.blast.tsv +2 -0
- test_input/Hinfluenzae/hicap/sample1.hicap.tsv +2 -0
- {ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/WHEEL +0 -0
- {ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/licenses/LICENSE +0 -0
- {ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/top_level.txt +0 -0
@@ -5,8 +5,7 @@ __all__ = ['thresholds', 'samplesheet_path', 'output_dir', 'output_path', 'sampl
|
|
5
5
|
'output_cols', 'output_initial_cols', 'output_specific_cols', 'ERR3528110_res_path', 'ERR3528110_input_df',
|
6
6
|
'ERR3528110_row', 'gene_hits', 'parsed_hits', 'O_gene_alleles', 'H_gene_alleles', 'O_type', 'H_type',
|
7
7
|
'O_gene_keys', 'H_gene_keys', 'O_genes_no', 'H_genes_no', 'ERR14229029_row', 'ERR14229029_expected_values',
|
8
|
-
'ERR14229029_values', '
|
9
|
-
'ecoli_parser']
|
8
|
+
'ERR14229029_values', 'setup_logging', 'get_threshold', 'process_res_file', 'EcoliResults', 'ecoli_parser']
|
10
9
|
|
11
10
|
# %% ../nbs/49_Ecoli_parser.ipynb 3
|
12
11
|
import os
|
@@ -501,169 +500,3 @@ for col, actual, expected in zip(
|
|
501
500
|
assert pd.isna(actual), f"{col}: Expected NaN, got {actual}"
|
502
501
|
else:
|
503
502
|
assert actual == expected, f"{col}: Expected '{expected}', got '{actual}'"
|
504
|
-
|
505
|
-
# %% ../nbs/49_Ecoli_parser.ipynb 19
|
506
|
-
import os
|
507
|
-
from tempfile import TemporaryDirectory
|
508
|
-
from pathlib import Path
|
509
|
-
|
510
|
-
test_cases = [
|
511
|
-
# sample_name, res_content, expected_oh, expected_stx, expected_eae, expected_ehxA
|
512
|
-
(
|
513
|
-
"sample1",
|
514
|
-
"1__wzx__O103__X\t100\t100\t60\n2__wzy__O103__X\t100\t100\t65\n3__fliC__H2__X\t100\t100\t70",
|
515
|
-
"O103;H2",
|
516
|
-
"-",
|
517
|
-
"-",
|
518
|
-
"-",
|
519
|
-
),
|
520
|
-
(
|
521
|
-
"sample2",
|
522
|
-
"1__wzt__O8__X\t100\t100\t60\n2__wzm__O8__X\t100\t100\t65\n3__fliC__H10__X\t100\t100\t70\n4__stx2__stx2-a__X\t100\t100\t90\n5__eae__eae-5__X\t100\t100\t80",
|
523
|
-
"O8;H10",
|
524
|
-
"stx2-a",
|
525
|
-
"Positive",
|
526
|
-
"-",
|
527
|
-
),
|
528
|
-
("sample3", "1__fliC__H7__X\t100\t100\t70", "-;H7", "-", "-", "-"),
|
529
|
-
(
|
530
|
-
"sample4",
|
531
|
-
"bad_line\n2__wzy__O111__X\t100\t100\t70\n3__fliC__H11__X\t100\t100\t70",
|
532
|
-
"-;H11",
|
533
|
-
"-",
|
534
|
-
"-",
|
535
|
-
"-",
|
536
|
-
),
|
537
|
-
("sample5", "", "-;-", "-", "-", "-"),
|
538
|
-
(
|
539
|
-
"sample6",
|
540
|
-
"1__wzx__O157__X\t100\t100\t60\n2__wzy__O157__X\t100\t100\t65\n3__wzt__O8__X\t100\t100\t60\n4__wzm__O8__X\t100\t100\t65\n5__fli__H2__X\t100\t100\t70",
|
541
|
-
"-;H2",
|
542
|
-
"-",
|
543
|
-
"-",
|
544
|
-
"-",
|
545
|
-
),
|
546
|
-
(
|
547
|
-
"sample7",
|
548
|
-
"1__wzx__O157__X\t100\t100\t60\n2__wzy__O111__X\t100\t100\t65\n3__fliC__H9__X\t100\t100\t70",
|
549
|
-
"-;H9",
|
550
|
-
"-",
|
551
|
-
"-",
|
552
|
-
"-",
|
553
|
-
),
|
554
|
-
(
|
555
|
-
"sample8",
|
556
|
-
"1__fli__H1__X\t100\t100\t70\n2__fliC__H12__X\t100\t100\t70",
|
557
|
-
"-;H1",
|
558
|
-
"-",
|
559
|
-
"-",
|
560
|
-
"-",
|
561
|
-
),
|
562
|
-
(
|
563
|
-
"sample9",
|
564
|
-
"1__wzx__O157__X\t100\t100\t60\n2__wzy__O157__X\t100\t100\t65\n3__wzt__O8__X\t100\t100\t60\n4__wzm__O8__X\t100\t100\t65\n5__fliC__H10__X\t100\t100\t70\n6__fli__H2__X\t100\t100\t70\n7__stx1__stx1-a__X\t100\t100\t90\n8__stx2__stx2-d__X\t100\t100\t90\n9__stx2__stx2-a__X\t100\t100\t90\n10__eae__eae-42-5__X\t100\t100\t80\n11__ehxA__ehxA-7__X\t100\t100\t80",
|
565
|
-
"-;H2",
|
566
|
-
"stx1-a;stx2-a;stx2-d",
|
567
|
-
"Positive",
|
568
|
-
"Positive",
|
569
|
-
),
|
570
|
-
(
|
571
|
-
"sample10",
|
572
|
-
"1__adk__adk__X\t100\t100\t70\n2__fliC__H4__X\t100\t100\t70",
|
573
|
-
"-;H4",
|
574
|
-
"-",
|
575
|
-
"-",
|
576
|
-
"-",
|
577
|
-
),
|
578
|
-
(
|
579
|
-
"sample11",
|
580
|
-
"1__eae__eae-1__X\t100\t94\t70\n2__fliC__H6__X\t100\t100\t70",
|
581
|
-
"-;H6",
|
582
|
-
"-",
|
583
|
-
"-",
|
584
|
-
"-",
|
585
|
-
),
|
586
|
-
(
|
587
|
-
"sample12",
|
588
|
-
"1__stx1__stx1a__X\t100\t100\t80\n2__stx2__stx2c__X\t100\t100\t85\n3__fli__H21__X\t100\t100\t70",
|
589
|
-
"-;H21",
|
590
|
-
"stx1a;stx2c",
|
591
|
-
"-",
|
592
|
-
"-",
|
593
|
-
),
|
594
|
-
]
|
595
|
-
|
596
|
-
for (
|
597
|
-
sample_name,
|
598
|
-
res_content,
|
599
|
-
expected_oh,
|
600
|
-
expected_stx,
|
601
|
-
expected_eae,
|
602
|
-
expected_ehxA,
|
603
|
-
) in test_cases:
|
604
|
-
with TemporaryDirectory() as tmpdir:
|
605
|
-
tmpdir = Path(tmpdir)
|
606
|
-
os.chdir(tmpdir)
|
607
|
-
|
608
|
-
res_dir = tmpdir / f"examples/Results/{sample_name}/kma"
|
609
|
-
res_dir.mkdir(parents=True)
|
610
|
-
res_file = res_dir / f"{sample_name}.res"
|
611
|
-
res_file.write_text(
|
612
|
-
"#Template\tTemplate_Coverage\tQuery_Identity\tDepth\n" + res_content
|
613
|
-
)
|
614
|
-
|
615
|
-
sheet = tmpdir / "samplesheet.tsv"
|
616
|
-
sheet.write_text(
|
617
|
-
"sample_name\tIllumina_read_files\tNanopore_read_file\tassembly_file\torganism\tvariant\tnotes\n"
|
618
|
-
f"{sample_name}\tread1.fastq,read2.fastq\t-\t-\tEcoli\t-\t-\n"
|
619
|
-
)
|
620
|
-
|
621
|
-
results = EcoliResults.from_samplesheet(sheet)
|
622
|
-
df = results.results_df
|
623
|
-
row = df.iloc[0]
|
624
|
-
|
625
|
-
# general output and functionality test
|
626
|
-
assert row["sample_name"] == sample_name
|
627
|
-
|
628
|
-
if row["OH"] != expected_oh:
|
629
|
-
raise AssertionError(
|
630
|
-
f"\nSample: {sample_name}\nExpected OH: {expected_oh}\nActual OH: {row['OH']}"
|
631
|
-
)
|
632
|
-
assert row["OH"] == expected_oh
|
633
|
-
|
634
|
-
if row["stx"] != expected_stx:
|
635
|
-
raise AssertionError(
|
636
|
-
f"\nSample: {sample_name}\nExpected stx: {expected_stx}\nActual stx: {row['stx']}"
|
637
|
-
)
|
638
|
-
assert row["stx"] == expected_stx
|
639
|
-
|
640
|
-
if row["eae"] != expected_eae:
|
641
|
-
raise AssertionError(
|
642
|
-
f"\nSample: {sample_name}\nExpected eae: {expected_eae}\nActual eae: {row['eae']}"
|
643
|
-
)
|
644
|
-
assert row["eae"] == expected_eae
|
645
|
-
|
646
|
-
if row["ehxA"] != expected_ehxA:
|
647
|
-
raise AssertionError(
|
648
|
-
f"\nSample: {sample_name}\nExpected ehxA: {expected_ehxA}\nActual ehxA: {row['ehxA']}"
|
649
|
-
)
|
650
|
-
assert row["ehxA"] == expected_ehxA
|
651
|
-
|
652
|
-
# sample specific information tests
|
653
|
-
|
654
|
-
# without confliciting O and H typing, the OH column should be filled and the remaining four genes empty
|
655
|
-
if sample_name == "sample1":
|
656
|
-
assert row["wzx"] == "-"
|
657
|
-
assert row["wzy"] == "-"
|
658
|
-
assert row["wzt"] == "-"
|
659
|
-
assert row["wzm"] == "-"
|
660
|
-
# with conflicts the OH should remain empty and the four 'conflicting' gene information remain filled
|
661
|
-
elif sample_name == "sample6":
|
662
|
-
assert row["wzx"] == "O157"
|
663
|
-
assert row["wzy"] == "O157"
|
664
|
-
assert row["wzt"] == "O8"
|
665
|
-
assert row["wzm"] == "O8"
|
666
|
-
elif sample_name == "sample10":
|
667
|
-
assert row["Other"] == "adk"
|
668
|
-
|
669
|
-
print("All 12 syntehtic E. coli sample inline tests passed.")
|
@@ -0,0 +1,302 @@
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/38_Hinfluenzae_parser.ipynb.
|
2
|
+
|
3
|
+
# %% auto 0
|
4
|
+
__all__ = ['get_biotype_from_gene_presence', 'extract_hicap_results', 'extract_ariba_ftsI_snps', 'HinfluenzaeResults',
|
5
|
+
'Hinfluenzae_parser', 'Hinfluenzae_batch_parser']
|
6
|
+
|
7
|
+
# %% ../nbs/38_Hinfluenzae_parser.ipynb 3
|
8
|
+
# standard libs
|
9
|
+
import os
|
10
|
+
import re
|
11
|
+
|
12
|
+
# Common to template
|
13
|
+
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
|
14
|
+
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
|
15
|
+
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
|
16
|
+
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
|
17
|
+
from fastcore import (
|
18
|
+
test,
|
19
|
+
)
|
20
|
+
from fastcore.script import (
|
21
|
+
call_parse,
|
22
|
+
) # for @call_parse, https://fastcore.fast.ai/script
|
23
|
+
import json # for nicely printing json and yaml
|
24
|
+
|
25
|
+
# import functions from core module (optional, but most likely needed).
|
26
|
+
from ssi_analysis_result_parsers import (
|
27
|
+
core,
|
28
|
+
blast_parser,
|
29
|
+
)
|
30
|
+
|
31
|
+
# from ssi_analysis_result_parsers.blast_parser import extract_presence_absence
|
32
|
+
|
33
|
+
# Project specific libraries
|
34
|
+
from pathlib import Path
|
35
|
+
import pandas
|
36
|
+
import numpy
|
37
|
+
import sys
|
38
|
+
|
39
|
+
# %% ../nbs/38_Hinfluenzae_parser.ipynb 6
|
40
|
+
def get_biotype_from_gene_presence(biotype_gene_presence_dict: Path) -> dict:
|
41
|
+
"""
|
42
|
+
Convert biotype gene presence dict to biotype
|
43
|
+
"""
|
44
|
+
if biotype_gene_presence_dict:
|
45
|
+
if not biotype_gene_presence_dict["indole"] == "0":
|
46
|
+
if not biotype_gene_presence_dict["urease"] == "0":
|
47
|
+
if not biotype_gene_presence_dict["ODC"] == "0":
|
48
|
+
biotype = "I"
|
49
|
+
else:
|
50
|
+
biotype = "II"
|
51
|
+
else:
|
52
|
+
if not biotype_gene_presence_dict["ODC"] == "0":
|
53
|
+
biotype = "V"
|
54
|
+
else:
|
55
|
+
biotype = "VII"
|
56
|
+
else:
|
57
|
+
if not biotype_gene_presence_dict["urease"] == "0":
|
58
|
+
if not biotype_gene_presence_dict["ODC"] == "0":
|
59
|
+
biotype = "IV"
|
60
|
+
else:
|
61
|
+
biotype = "III"
|
62
|
+
else:
|
63
|
+
if not biotype_gene_presence_dict["ODC"] == "0":
|
64
|
+
biotype = "VI"
|
65
|
+
else:
|
66
|
+
biotype = "VIII"
|
67
|
+
return {"biotype": biotype}
|
68
|
+
else:
|
69
|
+
print(
|
70
|
+
f"Nonetype input provided for biotype gene presence. Cannot determine biotype."
|
71
|
+
)
|
72
|
+
return None
|
73
|
+
|
74
|
+
|
75
|
+
def extract_hicap_results(hicap_tsv: Path):
|
76
|
+
if hicap_tsv.exists():
|
77
|
+
try:
|
78
|
+
df = pandas.read_csv(hicap_tsv, sep="\t")
|
79
|
+
if df.shape[0] > 0:
|
80
|
+
serotype = df.iloc[0]["predicted_serotype"]
|
81
|
+
serotype_attributes = df.iloc[0]["attributes"]
|
82
|
+
serotype_genes = df.iloc[0]["genes_identified"]
|
83
|
+
else:
|
84
|
+
print(f"Hicap output file empty at {hicap_tsv}", file=sys.stderr)
|
85
|
+
return None
|
86
|
+
except pandas.errors.EmptyDataError:
|
87
|
+
print(f"Hicap output file empty at {hicap_tsv}", file=sys.stderr)
|
88
|
+
return None
|
89
|
+
else:
|
90
|
+
serotype = "-"
|
91
|
+
serotype_attributes = "no_capsule_genes_found"
|
92
|
+
serotype_genes = "-"
|
93
|
+
|
94
|
+
return {
|
95
|
+
"serotype": serotype,
|
96
|
+
"serotype_attributes": serotype_attributes,
|
97
|
+
"serotype_genes": serotype_genes,
|
98
|
+
}
|
99
|
+
|
100
|
+
|
101
|
+
def extract_ariba_ftsI_snps(ariba_output_tsv: Path, ftsI_types_tsv: Path):
|
102
|
+
if not ftsI_types_tsv.exists():
|
103
|
+
print(f"Failed to load ftsI types table at {ftsI_types_tsv}")
|
104
|
+
return None
|
105
|
+
elif ariba_output_tsv.exists():
|
106
|
+
ftsI_types = {}
|
107
|
+
ftsI_table_snps = []
|
108
|
+
with open(ftsI_types_tsv) as f:
|
109
|
+
for line in f:
|
110
|
+
line = line.rstrip("\n").split("\t")
|
111
|
+
if line[0] == "pos":
|
112
|
+
positions = line[1:]
|
113
|
+
elif line[0] == "Ref":
|
114
|
+
refs = line[1:]
|
115
|
+
elif line[0] == "Diverse":
|
116
|
+
snps = line[1:]
|
117
|
+
for n in range(len(snps)):
|
118
|
+
snp_split = snps[n].split("/")
|
119
|
+
for snp in snp_split:
|
120
|
+
ftsI_table_snps.append(refs[n] + positions[n] + snp)
|
121
|
+
else:
|
122
|
+
type = line[0]
|
123
|
+
vars = line[1:]
|
124
|
+
type_vars = []
|
125
|
+
for n in range(len(vars)):
|
126
|
+
var = vars[n]
|
127
|
+
if var != "" and var != " ":
|
128
|
+
type_vars.append(refs[n] + positions[n] + var)
|
129
|
+
ftsI_types[type] = type_vars
|
130
|
+
|
131
|
+
change_list = []
|
132
|
+
ftsI_gene_snps = []
|
133
|
+
with open(ariba_output_tsv) as f:
|
134
|
+
for line in f:
|
135
|
+
line = line.rstrip("\n").split("\t")
|
136
|
+
if line[0] != "#ariba_ref_name":
|
137
|
+
change = line[18]
|
138
|
+
ftsI_gene_snps.append(change)
|
139
|
+
if change in ftsI_table_snps:
|
140
|
+
change_list.append(change)
|
141
|
+
|
142
|
+
match_var_counts = {}
|
143
|
+
for type in ftsI_types:
|
144
|
+
var_list = ftsI_types[type]
|
145
|
+
match_var_count = 0
|
146
|
+
for var in var_list:
|
147
|
+
if var in change_list:
|
148
|
+
match_var_count += 1
|
149
|
+
match_var_counts[type] = [match_var_count, len(var_list)]
|
150
|
+
best_match = 0
|
151
|
+
best_type = "-"
|
152
|
+
for type in match_var_counts:
|
153
|
+
check_list = match_var_counts[type]
|
154
|
+
if check_list[0] == check_list[1] and check_list[0] > best_match:
|
155
|
+
best_match = check_list[0]
|
156
|
+
best_type = type
|
157
|
+
else:
|
158
|
+
print(f"No ariba report found at {ariba_output_tsv}")
|
159
|
+
return None
|
160
|
+
return {
|
161
|
+
"ftsI_type": best_type,
|
162
|
+
"key_ftsI_snps": ",".join(change_list),
|
163
|
+
"all_ftsI_snps": ",".join(ftsI_gene_snps),
|
164
|
+
}
|
165
|
+
|
166
|
+
|
167
|
+
class HinfluenzaeResults(core.PipelineResults):
|
168
|
+
|
169
|
+
@classmethod
|
170
|
+
def from_tool_paths(
|
171
|
+
cls,
|
172
|
+
ftsI_ariba_tsv: Path,
|
173
|
+
hicap_tsv: Path,
|
174
|
+
biotype_blast_tsv: Path,
|
175
|
+
ftsI_types_tsv: Path,
|
176
|
+
sample_name=None,
|
177
|
+
):
|
178
|
+
"""
|
179
|
+
Alternative constructor for initializing results for single sample,
|
180
|
+
Initializes HinfluenzaeResults instance provided paths to outputs from tools (legionella sbt and lag1 presence blast)
|
181
|
+
"""
|
182
|
+
hinfluenze_results = cls.summary(
|
183
|
+
ftsI_ariba_tsv=ftsI_ariba_tsv,
|
184
|
+
hicap_tsv=hicap_tsv,
|
185
|
+
biotype_blast_tsv=biotype_blast_tsv,
|
186
|
+
ftsI_types_tsv=ftsI_types_tsv,
|
187
|
+
)
|
188
|
+
return cls({sample_name: hinfluenze_results})
|
189
|
+
|
190
|
+
@classmethod
|
191
|
+
def from_tool_paths_dict(cls, file_paths: dict):
|
192
|
+
"""
|
193
|
+
Alternative constructor for initializing results for multiple samples,
|
194
|
+
Initializes HinfluenzaeResults instance by providing a dictionary of paths to outputs from tools (legionella sbt and lag1 presence blast)
|
195
|
+
"""
|
196
|
+
results_dict = {}
|
197
|
+
for sample_name, path_dict in file_paths.items():
|
198
|
+
hinfluenze_results = cls.summary(
|
199
|
+
ftsI_ariba_tsv=Path(path_dict["ftsI_ariba_results"]),
|
200
|
+
hicap_tsv=Path(path_dict["hicap_results"]),
|
201
|
+
biotype_blast_tsv=Path(path_dict["biotype_results"]),
|
202
|
+
ftsI_types_tsv=Path(path_dict["ftsI_types_tsv"]),
|
203
|
+
)
|
204
|
+
results_dict[sample_name] = hinfluenze_results
|
205
|
+
return cls(results_dict)
|
206
|
+
|
207
|
+
@classmethod
|
208
|
+
def from_tool_paths_dataframe(cls, file_paths_df: pandas.DataFrame):
|
209
|
+
"""
|
210
|
+
Alternative constructor for initializing results for multiple samples,
|
211
|
+
Initializes HinfluenzaeResults instance by providing a DataFrame of paths to outputs from tools (legionella sbt and lag1 presence blast)
|
212
|
+
"""
|
213
|
+
file_paths_df.replace(numpy.nan, None, inplace=True)
|
214
|
+
file_paths = file_paths_df.to_dict(orient="index")
|
215
|
+
results_dict = {}
|
216
|
+
for sample_name, path_dict in file_paths.items():
|
217
|
+
hinfluenzae_results = cls.summary(
|
218
|
+
ftsI_ariba_tsv=Path(path_dict["ftsI_ariba_results"]),
|
219
|
+
hicap_tsv=Path(path_dict["hicap_results"]),
|
220
|
+
biotype_blast_tsv=Path(path_dict["biotype_results"]),
|
221
|
+
ftsI_types_tsv=Path(path_dict["ftsI_types_tsv"]),
|
222
|
+
)
|
223
|
+
results_dict[sample_name] = hinfluenzae_results
|
224
|
+
return cls(results_dict)
|
225
|
+
|
226
|
+
@classmethod
|
227
|
+
def from_tool_paths_tsv(cls, tool_paths_tsv: Path):
|
228
|
+
"""
|
229
|
+
Alternative constructor for initializing results for multiple samples,
|
230
|
+
Initializes HinfluenzaeResults instance by providing a tsv-file with paths to outputs from tools (legionella sbt and lag1 presence blast)
|
231
|
+
"""
|
232
|
+
file_paths_df = pandas.read_csv(tool_paths_tsv, sep="\t")
|
233
|
+
file_paths_df.set_index("sample_name", inplace=True, drop=True)
|
234
|
+
return cls.from_tool_paths_dataframe(file_paths_df)
|
235
|
+
|
236
|
+
@staticmethod
|
237
|
+
def summary(
|
238
|
+
ftsI_ariba_tsv: Path,
|
239
|
+
hicap_tsv: Path,
|
240
|
+
biotype_blast_tsv: Path,
|
241
|
+
ftsI_types_tsv: Path,
|
242
|
+
) -> dict:
|
243
|
+
ftsI_results = extract_ariba_ftsI_snps(
|
244
|
+
ariba_output_tsv=ftsI_ariba_tsv, ftsI_types_tsv=ftsI_types_tsv
|
245
|
+
)
|
246
|
+
hicap_results = extract_hicap_results(hicap_tsv=hicap_tsv)
|
247
|
+
biotype_gene_dict = blast_parser.extract_presence_absence(
|
248
|
+
blast_output_tsv=biotype_blast_tsv,
|
249
|
+
hits_as_string=False,
|
250
|
+
include_match_stats=True,
|
251
|
+
gene_names=["indole", "urease", "ODC"],
|
252
|
+
)
|
253
|
+
biotype_results = get_biotype_from_gene_presence(
|
254
|
+
biotype_gene_presence_dict=biotype_gene_dict
|
255
|
+
)
|
256
|
+
results_dict = core.update_results_dict(
|
257
|
+
ftsI_results, hicap_results, new_duplicate_key_prefix="hicap:"
|
258
|
+
)
|
259
|
+
results_dict = core.update_results_dict(
|
260
|
+
results_dict, biotype_results, old_duplicate_key_prefix="_"
|
261
|
+
)
|
262
|
+
results_dict = core.update_results_dict(
|
263
|
+
results_dict, biotype_gene_dict, old_duplicate_key_prefix="biotype:"
|
264
|
+
)
|
265
|
+
if results_dict is None:
|
266
|
+
return {}
|
267
|
+
return results_dict
|
268
|
+
|
269
|
+
def __repr__(self):
|
270
|
+
return f"< Hinfluenzae analysis results object. {len(self.results_df)} samples with {len(self.results_df.columns)} result variables >"
|
271
|
+
|
272
|
+
# %% ../nbs/38_Hinfluenzae_parser.ipynb 9
|
273
|
+
@call_parse
|
274
|
+
def Hinfluenzae_parser(
|
275
|
+
ftsI_ariba_tsv: Path = None, # Path to report.tsv from ftsI ariba output
|
276
|
+
hicap_tsv: Path = None, # Path to hicap tsv output
|
277
|
+
biotype_blast_tsv: Path = None, # Path to output from biotype gene blast. Generated with blastn -query biotype_genes.fasta -subject assembly.fasta -outfmt "6 qseqid sseqid pident length qlen qstart qend sstart send sseq evalue bitscore"
|
278
|
+
ftsI_types_tsv: Path = None, # Path to table to convert ftsI snps to ftsI types
|
279
|
+
output_file: Path = None, # Path to output tsv
|
280
|
+
sample_name: str = None,
|
281
|
+
) -> None:
|
282
|
+
""" """
|
283
|
+
hinfluenze_results = HinfluenzaeResults.from_tool_paths(
|
284
|
+
ftsI_ariba_tsv=ftsI_ariba_tsv,
|
285
|
+
hicap_tsv=hicap_tsv,
|
286
|
+
biotype_blast_tsv=biotype_blast_tsv,
|
287
|
+
ftsI_types_tsv=ftsI_types_tsv,
|
288
|
+
sample_name=sample_name,
|
289
|
+
)
|
290
|
+
hinfluenze_results.write_tsv(output_file=output_file)
|
291
|
+
|
292
|
+
|
293
|
+
@call_parse
|
294
|
+
def Hinfluenzae_batch_parser(
|
295
|
+
file_path_tsv: Path = None, # Path to tsv containing file paths to the outputs from tools to be parsed. Must contain headers "sample_name", "sbt_results", and "lag1_blast_results"
|
296
|
+
output_file: Path = None, # Path to output tsv
|
297
|
+
) -> None:
|
298
|
+
""" """
|
299
|
+
hinfluenze_results = HinfluenzaeResults.from_tool_paths_tsv(
|
300
|
+
tool_paths_tsv=file_path_tsv
|
301
|
+
)
|
302
|
+
hinfluenze_results.write_tsv(output_file)
|
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.0.
|
1
|
+
__version__ = "0.0.15"
|
@@ -25,6 +25,30 @@ d = { 'settings': { 'branch': 'main',
|
|
25
25
|
'ssi_analysis_result_parsers/Ecoli_parser.py'),
|
26
26
|
'ssi_analysis_result_parsers.Ecoli_parser.setup_logging': ( 'ecoli_parser.html#setup_logging',
|
27
27
|
'ssi_analysis_result_parsers/Ecoli_parser.py')},
|
28
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser': { 'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults': ( 'hinfluenzae_parser.html#hinfluenzaeresults',
|
29
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
30
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults.__repr__': ( 'hinfluenzae_parser.html#hinfluenzaeresults.__repr__',
|
31
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
32
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults.from_tool_paths': ( 'hinfluenzae_parser.html#hinfluenzaeresults.from_tool_paths',
|
33
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
34
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults.from_tool_paths_dataframe': ( 'hinfluenzae_parser.html#hinfluenzaeresults.from_tool_paths_dataframe',
|
35
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
36
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults.from_tool_paths_dict': ( 'hinfluenzae_parser.html#hinfluenzaeresults.from_tool_paths_dict',
|
37
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
38
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults.from_tool_paths_tsv': ( 'hinfluenzae_parser.html#hinfluenzaeresults.from_tool_paths_tsv',
|
39
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
40
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.HinfluenzaeResults.summary': ( 'hinfluenzae_parser.html#hinfluenzaeresults.summary',
|
41
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
42
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.Hinfluenzae_batch_parser': ( 'hinfluenzae_parser.html#hinfluenzae_batch_parser',
|
43
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
44
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.Hinfluenzae_parser': ( 'hinfluenzae_parser.html#hinfluenzae_parser',
|
45
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
46
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.extract_ariba_ftsI_snps': ( 'hinfluenzae_parser.html#extract_ariba_ftsi_snps',
|
47
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
48
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.extract_hicap_results': ( 'hinfluenzae_parser.html#extract_hicap_results',
|
49
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py'),
|
50
|
+
'ssi_analysis_result_parsers.Hinfluenzae_parser.get_biotype_from_gene_presence': ( 'hinfluenzae_parser.html#get_biotype_from_gene_presence',
|
51
|
+
'ssi_analysis_result_parsers/Hinfluenzae_parser.py')},
|
28
52
|
'ssi_analysis_result_parsers.Legionella_parser': { 'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults': ( 'legionella_parser.html#legionellaresults',
|
29
53
|
'ssi_analysis_result_parsers/Legionella_parser.py'),
|
30
54
|
'ssi_analysis_result_parsers.Legionella_parser.LegionellaResults.__repr__': ( 'legionella_parser.html#legionellaresults.__repr__',
|
{ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/RECORD
RENAMED
@@ -1,21 +1,29 @@
|
|
1
|
-
ssi_analysis_result_parsers/Ecoli_parser.py,sha256=
|
1
|
+
ssi_analysis_result_parsers/Ecoli_parser.py,sha256=IQDbTPxvaQBf3el92_76QQ3prA2HG930yFgqYibFmEQ,17437
|
2
|
+
ssi_analysis_result_parsers/Hinfluenzae_parser.py,sha256=ch2pI1NEZPteewOXfeN8w4HW9Rhpr5ngdUIPwPqoA4Y,11876
|
2
3
|
ssi_analysis_result_parsers/Legionella_parser.py,sha256=Yp14u8xqItrIb-uVk786K6jlIi-HNfn0wnQMo_R_X1c,6950
|
3
4
|
ssi_analysis_result_parsers/Nmeningitidis_parser.py,sha256=Jl1ubU2T6XbwffpglMBsvp3tOb5DAG72MbDnN6RB0qc,7310
|
4
5
|
ssi_analysis_result_parsers/Spyogenes_parser.py,sha256=Cjibp7iKGofjSp-igm-jmjBVkQ6-zxYQWVSZT-Vx3Fo,12731
|
5
|
-
ssi_analysis_result_parsers/__init__.py,sha256=
|
6
|
-
ssi_analysis_result_parsers/_modidx.py,sha256=
|
6
|
+
ssi_analysis_result_parsers/__init__.py,sha256=go20U3RCVaJ2N55RnX4tO5rinfUCRV0puFyrHCto8yw,23
|
7
|
+
ssi_analysis_result_parsers/_modidx.py,sha256=odzq7igGyMkQ34NKL08Rr6qT7attmv2HhQPP6bwi51M,27684
|
7
8
|
ssi_analysis_result_parsers/blast_parser.py,sha256=pIzMGk5-VyTy8uzFncTiIsy80wQxl9NbNiGI_K7XMaM,8658
|
8
9
|
ssi_analysis_result_parsers/core.py,sha256=8CzFMDrGJ24D9aoIebLsG8tx-OxvYJod1cxBITqNfaY,12258
|
9
10
|
ssi_analysis_result_parsers/hello_world.py,sha256=jpN94sqYuNHqUbUZMCJ35qGY5iLPB_emucgnDGDUk_U,1895
|
10
11
|
ssi_analysis_result_parsers/some_string.py,sha256=JwmAXKbX_JgY8UGh4FAu5-7ZjezcAEhq4Q2B73pWp2M,923
|
11
12
|
ssi_analysis_result_parsers/config/config.default.env,sha256=Zt6bfPbVV3rYCksoebX1ruAdFgeD9wqAnKDtswhtJJM,1390
|
12
13
|
ssi_analysis_result_parsers/config/config.default.yaml,sha256=3qgUrUtQpxrzYv7WQaHsvz9dQB0RALKNU0idxv7oRqM,460
|
13
|
-
ssi_analysis_result_parsers-0.0.
|
14
|
+
ssi_analysis_result_parsers-0.0.15.dist-info/licenses/LICENSE,sha256=p6aTb6QIfqyZ2Uux2VjV4F2zthdUSHZOjB4mfwGc7fo,1094
|
14
15
|
test_input/.DS_Store,sha256=sdTEvl9DTKPHNPYYjMqDepX7q7ZETlonk21tGEuWLao,6148
|
15
16
|
test_input/empty_file.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
17
|
test_input/Ecoli/ERR14229029.res,sha256=AmVZwbiUTjOQLe7SmSKWt9-URdcrsLSxt9hHUh-nFUY,129
|
17
18
|
test_input/Ecoli/ERR3528110.res,sha256=DmiDRfX9LPypAEzVeO1RHaPoqEpZwq8ZtQDJ1KOWwHc,461
|
18
19
|
test_input/Ecoli/samplesheet.tsv,sha256=sSPrVrloOWvfmnp2Lnn8H6mCkiWsZUFV0wrovk3jH-Q,416
|
20
|
+
test_input/Hinfluenzae/.DS_Store,sha256=esAD8NJsWOrKyrRnaO45PG4-nf0yXOlSxp2Is19cRVA,6148
|
21
|
+
test_input/Hinfluenzae/batch_parser_file_paths.tsv,sha256=K8KLz08wNpK4buW_Q1ifsyFJpua5C-I8YdQ2XUCVKgc,540
|
22
|
+
test_input/Hinfluenzae/ariba_ftsI/ftsI_types_table.txt,sha256=X8GSHK6snQ2O-4hlYRaPg_aw7wunQqhIy2IqYhTzmPA,725
|
23
|
+
test_input/Hinfluenzae/ariba_ftsI/sample1.ftsI.ariba.tsv,sha256=9aOfC082Vcim7-2xUvJ6fE9X3098oOnu-O0_okimiqQ,955
|
24
|
+
test_input/Hinfluenzae/ariba_ftsI/sample2.ftsI.ariba.tsv,sha256=Q_vOwfMzGoK5Cj6yTXz8WRGR8rlD_XQtBcksUu1PTf4,786
|
25
|
+
test_input/Hinfluenzae/biotype/sample1.biotype.blast.tsv,sha256=i6fmGIVzOX-LU6Eev2sr4u8921ULZPOyTLEdRoTORU4,4036
|
26
|
+
test_input/Hinfluenzae/hicap/sample1.hicap.tsv,sha256=wU935eMzMSkg6s9-9c6XJZfe-VfzkLtelMV31T8zVRI,346
|
19
27
|
test_input/Legionella/batch_parser_file_paths.tsv,sha256=AikBS_Ez1xO3UrEQ19AY3z6drBDdMAiSGK66NLeyYj4,356
|
20
28
|
test_input/Legionella/lag-1_blast.tsv,sha256=MN5QL_iBn9gQ8VTYEcTnT0JwKgpkD8G15-QFOrSWxkU,1133
|
21
29
|
test_input/Legionella/lag-1_blast_2.tsv,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -46,8 +54,8 @@ test_output/output_with_sample_name.tsv,sha256=NQG7WaxczuWCCsX2a9MUxCCYpbuAirz9g
|
|
46
54
|
test_output/test.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
47
55
|
test_output/test_batch_output.tsv,sha256=6DGzarXMkUP03Z58vZimc-gu1K2k84zxZLWWF2HROCg,277
|
48
56
|
test_output/Ecoli/KMA_cases_parser.tsv,sha256=Wf3JkSppRN5AK2zRJmFQlwVfCMyJfgyyBpTjb1sK6Uw,586
|
49
|
-
ssi_analysis_result_parsers-0.0.
|
50
|
-
ssi_analysis_result_parsers-0.0.
|
51
|
-
ssi_analysis_result_parsers-0.0.
|
52
|
-
ssi_analysis_result_parsers-0.0.
|
53
|
-
ssi_analysis_result_parsers-0.0.
|
57
|
+
ssi_analysis_result_parsers-0.0.15.dist-info/METADATA,sha256=iVugrZ3dTgq1Ot87eUctd7bpHgNcC1-kPOC60Sl9TY0,2766
|
58
|
+
ssi_analysis_result_parsers-0.0.15.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
59
|
+
ssi_analysis_result_parsers-0.0.15.dist-info/entry_points.txt,sha256=D-ZVsNLN5L6Hew4E1gyNc3sFsiBdswPPQsUpH3NAHnU,1109
|
60
|
+
ssi_analysis_result_parsers-0.0.15.dist-info/top_level.txt,sha256=kVBbBavSQeQsoZCMoiur-3tQ_UT5CeO0L3PwjSdM2yQ,96
|
61
|
+
ssi_analysis_result_parsers-0.0.15.dist-info/RECORD,,
|
@@ -1,6 +1,8 @@
|
|
1
1
|
[console_scripts]
|
2
2
|
blast_parser_allele_matches = ssi_analysis_result_parsers.blast_parser:allele_matches
|
3
3
|
blast_parser_presence_absence = ssi_analysis_result_parsers.blast_parser:presence_absence
|
4
|
+
get_Hinfluenzae_results = ssi_analysis_result_parsers.Hinfluenzae_parser:Hinfluenzae_parser
|
5
|
+
get_Hinfluenzae_results_batch = ssi_analysis_result_parsers.Hinfluenzae_parser:Hinfluenzae_batch_parser
|
4
6
|
get_Nmeningitidis_results = ssi_analysis_result_parsers.Nmeningitidis_parser:Nmeningitidis_parser
|
5
7
|
get_Nmeningitidis_results_batch = ssi_analysis_result_parsers.Nmeningitidis_parser:Nmeningitidis_batch_parser
|
6
8
|
get_Spyogenes_results = ssi_analysis_result_parsers.Spyogenes_parser:Spyogenes_parser
|
Binary file
|
@@ -0,0 +1,12 @@
|
|
1
|
+
pos 311 337 350 352 357 368 377 385 389 437 443 449 490 501 502 511 517 526 528 530 532 547 562 569 586
|
2
|
+
Ref S A D S S A M S L A T I G R A V R N Y A T V V N A
|
3
|
+
I H
|
4
|
+
IIa K
|
5
|
+
IIb V K
|
6
|
+
IIc T K
|
7
|
+
IId V K
|
8
|
+
III T K
|
9
|
+
III+ T F K
|
10
|
+
III like T H
|
11
|
+
III like + T F H
|
12
|
+
Diverse P V/N N T/N/F N T I T F S A V E E/L/H T/V A H K H S S I L S S
|
@@ -0,0 +1,5 @@
|
|
1
|
+
#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
|
2
|
+
ftsI ftsI 1 0 539 1394 cluster 1833 1833 99.51 cluster.l15.c4.ctg.1 2675 74.5 0 . p . 0 A22T NONSYN 64 66 GCA 560 562 ACA 69;69;72 A;C;A,C 58;57;59,1 . .
|
3
|
+
ftsI ftsI 1 0 539 1394 cluster 1833 1833 99.51 cluster.l15.c4.ctg.1 2675 74.5 0 . p . 0 A239E NONSYN 715 717 GCA 1211 1213 GAA 124;124;129 G;A;A 99;99;103 . .
|
4
|
+
ftsI ftsI 1 0 539 1394 cluster 1833 1833 99.51 cluster.l15.c4.ctg.1 2675 74.5 0 . p . 0 I475L NONSYN 1423 1425 ATT 1919 1921 CTT 100;98;98 C;T;T 88;86;85 . .
|
5
|
+
ftsI ftsI 1 0 539 1394 cluster 1833 1833 99.51 cluster.l15.c4.ctg.1 2675 74.5 0 . p . 0 E603D NONSYN 1807 1809 GAA 2303 2305 GAT 84;84;84 G;A;T 68;68;68 . .
|
@@ -0,0 +1,4 @@
|
|
1
|
+
#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
|
2
|
+
ftsI ftsI 1 0 539 758 cluster 1833 1833 99.56 cluster.l15.c4.ctg.1 2736 40.8 0 . p . 0 P31S NONSYN 91 93 CCT 576 578 TCT 38;39;39 T;C;T 35;38;36 . .
|
3
|
+
ftsI ftsI 1 0 539 758 cluster 1833 1833 99.56 cluster.l15.c4.ctg.1 2736 40.8 0 . p . 0 K310E NONSYN 928 930 AAA 1413 1415 GAA 62;62;62 G;A;A 58;58;58 . .
|
4
|
+
ftsI ftsI 1 0 539 758 cluster 1833 1833 99.56 cluster.l15.c4.ctg.1 2736 40.8 0 . p . 0 V547I NONSYN 1639 1641 GTT 2124 2126 ATT 51;51;51 A;T;T 46;45;45 . .
|
@@ -0,0 +1,5 @@
|
|
1
|
+
sample_name biotype_results ftsI_ariba_results hicap_results ftsI_types_tsv
|
2
|
+
sample_1 test_input/Hinfluenzae/biotype/sample1.biotype.blast.tsv test_input/Hinfluenzae/ariba_ftsI/sample1.ftsI.ariba.tsv test_input/Hinfluenzae/hicap/sample1.hicap.tsv test_input/Hinfluenzae/ariba_ftsI/ftsI_types_table.txt
|
3
|
+
sample_2 test_input/empty_file.txt test_input/empty_file.txt test_input/empty_file.txt test_input/empty_file.txt
|
4
|
+
sample_3 test_input/nonexist_file.txt test_input/nonexist_file.txt test_input/nonexist_file.txt test_input/nonexist_file.txt
|
5
|
+
|
@@ -0,0 +1,2 @@
|
|
1
|
+
urease HINF-2024-0115_7_215.085 89.703 1719 1719 1 1719 61211 59493 ATGGCATTAACAATTTCAAGAGCGCAATATGTAGCAACTTATGGCCCAACCGTTGGCGATAAAGTCCGTTTAGGCGATACCAATTTATGGGCAACTATTGAGCAAGATCTATTGACCAAAGGCGACGAGTGTAAATTTGGTGGCGGTAAAAGTGTACGCGATGGCATGGCTCAAAGTGGTACGGCAACTCGCGATAATCCAAATGTATTGGATTTTGTGATCACTAACGTGATGATCATTGATGCAAAATTAGGCATTATCAAAGCGGATATTGGGATTCGAGATGGTCGCATTGTGGGCATTGGTCAAGCAGGTAACCCAGATACGATGGATAATGTCACGCCAAATATGATTATCGGTGCTAGCACAGAAGTACATAACGGTGCACATTTAATTGCAACCGCTGGCGGTATTGATACACACATTCACTTTATTTGCCCTCAACAAGCACAGCATGCGATTGAAAGTGGCGTTACTACGTTAATTGGTGGCGGAACAGGCCCGGCTGACGGCACACACGCGACCACTTGCACACCAGGTGCGTGGTATATGGAGCGTATGTTCCAAGCGGCAGAAGCATTGCCTGTAAACGTAGGATTTTTTGGTAAAGGCAACTGTTCAACTTTAGATCCGCTACGCGAACAAATTGAAGCTGGCGCATTAGGTTTAAAAATCCACGAAGACTGGGGCGCAACGCCTGCAGTAATTGATTCGGCATTAAAAGTGGCAGATGAAATGGATATTCAAGTGGCAATCCACACCGACACTCTGAACGAAAGTGGCTTCTTAGAAGATACCATGAAAGCGATTGATGGACGTGTTATTCATACCTTCCACACGGAAGGAGCTGGTGGCGGTCATGCGCCAGATATCATTAAAGCAGCGATGTATCCAAACGTATTACCGGCTTCAACTAACCCAACTCGTCCGTTTACGAAAAACACTATCGATGAACATTTGGATATGTTGATGGTTTGCCATCATTTAGATAAACGCGTGCCAGAAGATGTAGCTTTTGCCGATAGCCGTATCCGTCCTGAAACTATTGCAGCAGAAGATATTTTGCATGATATGGGTGTTTTCTCGATTATGAGTTCAGACTCTCAAGCGATGGGGCGTATTGGCGAAGTCGTTATTCGTACATGGCAAACCGCAGATAAAATGAAAATGCAACGTGGCGAATTAGGCAATGAAGGAAACGATAACTTCCGTATTAAACGCTACATTGCGAAATATACCATCAATCCAGCGATTGCACACGGCATCTCGGATCATATTGGTTCATTAGAAGTGGGCAAAATTGCAGATATCGTGTTATGGAAACCGATGTTCTTTGGCGTAAAACCAGAAGTGGTAATTAAAAAAGGTTTTATTAGCTACGCAAAAATGGGCGATCCAAATGCCTCTATTCCAACACCACAACCAGTGTTCTACCGTCCAATGTACGGTGCACAAGGCTTAGCAACGGCACAAACTGCAGTTTTCTTTGTTTCACAAGCCGCTGAAAAAGCAGATATTCGTGCTAAATTCGGTTTACACAAAGAAACTATTGCGGTGAAAGGCTGCCGTAGTGTGAGTAAAAAAGATCTTGTTCATAATGATGCAACACCTGAAATTACAGTGGATCCAGAACGCTATGAAGTGCGTGTAGATGGTGAACTAATTACCTGTGAACCTGTGGATACGGTGCCATTGGGGCAACGGTATTTTATGTTCTAA 0.0 2194
|
2
|
+
ODC HINF-2024-0115_85_200.274 93.389 2163 2163 1 2163 12163 10001 ATGCCAAACTTAAAAATTGCATATAATCCAAAAGTTGAACAATATTTCTCTACAAATAGAGAATTAGTTGAAATTACCAAAACAGACTTTACTGATGTTGCAGCTATCATGTTGACATCAGGGGATGTAGGTGAATACCTTGAACGTATTCAAGCAACAAATTTTGGTATCCCAGTATTTGTTGTTCAAACTGAAGAAGAACAAGTTGATCCGAAATTCTATGATGCTATTTATCATATTCAAGATTTAAATGGTTATGACATTAAGCTTTATAGCCGTCAAATCGAAACTGCTGCAAGACTTTATGAAGAAAAAATGTTACCTCCATTCTTCAAAATGTTAAGTGAATATGTAGAAATGGGTAATATTGCTTTTGACTGTCCGGGACATCAAGGTGGTCAATACTACCGTAAACACCCAGCAGGTCGTTTCCTTTATGACTTCTACGGTGAAAACATTTTCCGTTCAGATATTTGTAATGCAGACGTGAAATTAGGTGACTTGTTAATTCACGAAGGTGCAGCTTGTGATGCACAAAAATATGCTGCTCAAGTATTCAATGCAGATAAAACTTACTTCGTATTAAATGGTACATCTTCTTCAAACAAAGTTGCATTAAACGCAGTACTTGCTCTGGGCGATTTAGTATTATTTGACCGTAATAACCATAAATCAAACCACCATGGTGCATTAATTCAAGCGGGTGCAACCCCAATTTACTTAGAAACTGCACGTAATCCATTTGGTTTCATTGGTGGTATCGATAGTCACTGCTTCGAAGAAGATTATTTGAAATCATTAATTAAAGAAGTGGCGCCTGAAAAATTAAACCAAAAACGTCCATTCCGTTTAGCCGTTATTCAATTAGGTACTTATGACGGTACAATTTATAACGCACGTCAAGTGGTAGATAAAATTGGTCATCTTTGTGACTACATCTTATTCGACTCTGCATGGGTAGGTTATGAACAATTTATTCCAATGATGAAAGACTGCTCTCCATTATTGCTTGAATTAAATGAAAATGACCCTGGTATTTTAGTGACTCAATCTGTTCATAAACAACAAGCAGGTTTCTCACAAACTTCACAAATTCACAAAAAAGATAAACACATTAAAGGTCAAGATCGTTATGTAAACCACAAACGTTTCAATAATGCCTTTATGTTACATGCGTCAACCAGTCCGTTCTATCCATTATTTGCGGCGTTAGATGTGAATGCAAAAATTCAAGGTAGCGAAGCGGGTCGTCGCTTATGGCATGAATGTGTGAAAGTAGGTATCGAAGCGCGTAAATTAGTGTTAAATCACTGTGAATTAATTCGTCCATTTATTCCAACCACAATTAAAGGTAAAAAATGGCAAGATTATGACACAGAAGAAATCGCAACTAATCTCGAATTCTTCAAATTTCACCCAACAGATACATGGCATAAATTTGAAGGCTATGCTGATGAACAATACTTCGTTGACCCTTGTAAATTCTTGCTTACCACACCAGGTATTAGCTTAGAAACAGGCGAATATGAAAAATTTGGTGTACCAGCAACTATTCTTGCTAACTACTTACGTGAAAACGGTATTATTCCGGAAAAATGTGACTTGAACTCAATCTTATTCTTGCTCACTCCGGCAGAAACCCTCACCAAAATGCAAACTTTGGTCGCTCAAATCGCCTTGTTTGAAAAACACATTAAACAAGACTCTTTATTAAAAGATGTATTGCCAACTGTGTATAAAAACAATGAAGACCGTTACAAAGACTATACTATCCGTCAATTATGCCAAGAAATGCATGACCTTTATGTAAGCCGCAATGTAAAACAACTTCAAAAAGACCTATTCCGTAAAGCAACATTACCGGAATATGCGTTAAATCCACACGATGCGAATATTGAATTCGTTCGTAACAAAGTTGAACTCGTTCCATTGACTGATATTGTTGGTCGTGTGGCTGCGGAAGGTGCATTACCTTATCCTCCAGGTGTGTTATGTGTGGTGCCTGGAGAAAGATGGAGTCCAACTGCACAAAAATACTTCCTTGCATTAGAAGAAGGTATCAACACATTACCTGGTTTCGCACCTGAAATCCAAGGGGTTTACTTACAAAAAGACCCTGATGGACGTACTCGTGCATATGGCTATGTTTTAACTGACTATTAA 0.0 3203
|
@@ -0,0 +1,2 @@
|
|
1
|
+
#isolate predicted_serotype attributes genes_identified locus_location region_I_genes region_II_genes region_III_genes IS1016_hits
|
2
|
+
HINF-2024-0115 type_e full_gene_complement,fragmented_locus bexA,bexB,bexC,bexD,ecs1,ecs2,ecs3;ecs4,ecs5,ecs6,ecs7,ecs8,hcsA,hcsB HINF-2024-0115_61_149.787:10271-19835;HINF-2024-0115_2_192.991:20-8457 4/4 8/8 2/2 1
|
{ssi_analysis_result_parsers-0.0.14.dist-info → ssi_analysis_result_parsers-0.0.15.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|