varvamp 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
varvamp/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Tool to design amplicons for highly variable virusgenomes"""
2
+ _program = "varvamp"
3
+ __version__ = "0.3"
varvamp/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env python3
2
+ from varvamp import command
3
+
4
+ if __name__ == '__main__':
5
+ command.main()
varvamp/command.py ADDED
@@ -0,0 +1,263 @@
1
+ """
2
+ main workflow
3
+ """
4
+
5
+ # BUILT-INS
6
+ import sys
7
+ import os
8
+ import time
9
+ import argparse
10
+
11
+ # varVAMP
12
+ from . import _program
13
+ from varvamp import __version__
14
+ from varvamp.scripts import logging
15
+ from varvamp.scripts import alignment
16
+ from varvamp.scripts import config
17
+ from varvamp.scripts import consensus
18
+ from varvamp.scripts import conserved
19
+ from varvamp.scripts import primers
20
+ from varvamp.scripts import reporting
21
+ from varvamp.scripts import scheme
22
+
23
+
24
+ # DEFs
25
+ def get_args(sysargs):
26
+ """
27
+ arg parsing for varvamp
28
+ """
29
+ parser = argparse.ArgumentParser(
30
+ prog=_program,
31
+ description='varvamp: variable virus amplicon design',
32
+ usage='''varvamp <alignment> <output dir> [options]''')
33
+
34
+ parser.add_argument(
35
+ "input",
36
+ nargs=2,
37
+ help="alignment file and dir to write results"
38
+ )
39
+ parser.add_argument(
40
+ "-ol",
41
+ "--opt-length",
42
+ help="optimal length of the amplicons",
43
+ type=int,
44
+ default=config.AMPLICON_OPT_LENGTH
45
+ )
46
+ parser.add_argument(
47
+ "-ml",
48
+ "--max-length",
49
+ help="max length of the amplicons",
50
+ type=int,
51
+ default=config.AMPLICON_MAX_LENGTH
52
+ )
53
+ parser.add_argument(
54
+ "-o",
55
+ "--overlap",
56
+ type=float,
57
+ default=config.AMPLICON_MIN_OVERLAP,
58
+ help="min overlap of the amplicons"
59
+ )
60
+ parser.add_argument(
61
+ "-t",
62
+ "--threshold",
63
+ type=float,
64
+ default=config.FREQUENCY_THRESHOLD,
65
+ help="threshold for nucleotides in alignment to be considered conserved"
66
+ )
67
+ parser.add_argument(
68
+ "-a",
69
+ "--allowed-ambiguous",
70
+ type=int,
71
+ default=config.PRIMER_ALLOWED_N_AMB,
72
+ help="number of ambiguous characters that are allowed within a primer"
73
+ )
74
+ parser.add_argument(
75
+ "--console",
76
+ action=argparse.BooleanOptionalAction,
77
+ default=True,
78
+ help="show varvamp console output"
79
+ )
80
+ parser.add_argument(
81
+ "-v",
82
+ "--version",
83
+ action='version',
84
+ version=f"varvamp {__version__}"
85
+ )
86
+
87
+ if len(sysargs) < 1:
88
+ parser.print_help()
89
+ sys.exit(-1)
90
+ else:
91
+ return parser.parse_args(sysargs)
92
+
93
+
94
+ def main(sysargs=sys.argv[1:]):
95
+ """
96
+ main varvamp workflow
97
+ """
98
+ # start varVAMP
99
+ args = get_args(sysargs)
100
+ if not args.console:
101
+ sys.stdout = open(os.devnull, 'w')
102
+ start_time = time.process_time()
103
+ results_dir, data_dir, log_file = logging.create_dir_structure(args.input[1])
104
+ logging.raise_arg_errors(args, log_file)
105
+ logging.varvamp_progress(log_file)
106
+ # config check
107
+ logging.confirm_config(args, log_file)
108
+ logging.varvamp_progress(
109
+ log_file,
110
+ progress=0.1,
111
+ job="Checking config.",
112
+ progress_text="config file passed"
113
+ )
114
+ # preprocess and clean alignment of gaps
115
+ alignment_cleaned, gaps_to_mask = alignment.process_alignment(
116
+ args.input[0],
117
+ args.threshold
118
+ )
119
+ logging.varvamp_progress(
120
+ log_file,
121
+ progress=0.2,
122
+ job="Preprocessing alignment and cleaning gaps.",
123
+ progress_text=f"{len(gaps_to_mask)} gaps with {alignment.calculate_total_masked_gaps(gaps_to_mask)} nucleotides"
124
+ )
125
+ # create consensus sequences
126
+ majority_consensus, ambiguous_consensus = consensus.create_consensus(
127
+ alignment_cleaned,
128
+ args.threshold
129
+ )
130
+ logging.varvamp_progress(
131
+ log_file,
132
+ progress=0.3,
133
+ job="Creating consensus sequences.",
134
+ progress_text=f"length of the consensus is {len(majority_consensus)} nt"
135
+ )
136
+ # generate conserved region list
137
+ conserved_regions = conserved.find_regions(
138
+ ambiguous_consensus,
139
+ args.allowed_ambiguous
140
+ )
141
+ if not conserved_regions:
142
+ logging.raise_error(
143
+ "nothing conserved. Lower the threshold!",
144
+ log_file,
145
+ exit=True
146
+ )
147
+ logging.varvamp_progress(
148
+ log_file,
149
+ progress=0.4,
150
+ job="Finding conserved regions.",
151
+ progress_text=f"{conserved.mean(conserved_regions, majority_consensus)} % conserved"
152
+ )
153
+ # produce kmers for all conserved regions
154
+ kmers = conserved.produce_kmers(
155
+ conserved_regions,
156
+ majority_consensus
157
+ )
158
+ logging.varvamp_progress(
159
+ log_file,
160
+ progress=0.5,
161
+ job="Digesting into kmers.",
162
+ progress_text=f"{len(kmers)} kmers"
163
+ )
164
+ # find potential primers
165
+ left_primer_candidates, right_primer_candidates = primers.find_primers(
166
+ kmers,
167
+ ambiguous_consensus,
168
+ alignment_cleaned
169
+ )
170
+ for type, primer_candidates in [("+", left_primer_candidates), ("-", right_primer_candidates)]:
171
+ if not primer_candidates:
172
+ logging.raise_error(
173
+ f"no {type} primers found.\n",
174
+ log_file,
175
+ exit=True
176
+ )
177
+ logging.varvamp_progress(
178
+ log_file,
179
+ progress=0.6,
180
+ job="Filtering for primers.",
181
+ progress_text=f"{len(left_primer_candidates)} fw and {len(right_primer_candidates)} rw potential primers"
182
+ )
183
+ # find best primers and create primer dict
184
+ all_primers = primers.find_best_primers(left_primer_candidates, right_primer_candidates)
185
+ logging.varvamp_progress(
186
+ log_file,
187
+ progress=0.7,
188
+ job="Considering only high scoring primers.",
189
+ progress_text=f"{len(all_primers['+'])} fw and {len(all_primers['-'])} rw primers"
190
+ )
191
+ # find all possible amplicons
192
+ amplicons = scheme.find_amplicons(
193
+ all_primers,
194
+ args.opt_length,
195
+ args.max_length
196
+ )
197
+ if not amplicons:
198
+ logging.raise_error(
199
+ "no amplicons found. Increase the max "
200
+ "amplicon length or lower threshold!\n",
201
+ log_file,
202
+ exit=True
203
+ )
204
+ amplicon_graph = scheme.create_amplicon_graph(amplicons, args.overlap)
205
+ logging.varvamp_progress(
206
+ log_file,
207
+ progress=0.8,
208
+ job="Finding potential amplicons.",
209
+ progress_text=str(len(amplicons)) + " potential amplicons"
210
+ )
211
+ # search for amplicon scheme
212
+ coverage, amplicon_scheme = scheme.find_best_covering_scheme(
213
+ amplicons,
214
+ amplicon_graph,
215
+ all_primers
216
+ )
217
+ dimers_not_solved = scheme.check_and_solve_heterodimers(
218
+ amplicon_scheme,
219
+ left_primer_candidates,
220
+ right_primer_candidates,
221
+ all_primers)
222
+ if dimers_not_solved:
223
+ logging.raise_error(
224
+ f"varVAMP found {len(dimers_not_solved)} primer dimers without replacements. Check the dimer file and perform the PCR for incomaptible amplicons in a sperate reaction.",
225
+ log_file
226
+ )
227
+ reporting.write_dimers(dir, dimers_not_solved)
228
+ percent_coverage = round(coverage/len(ambiguous_consensus)*100, 2)
229
+ logging.varvamp_progress(
230
+ log_file,
231
+ progress=0.9,
232
+ job="Creating amplicon scheme.",
233
+ progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme[0]) + len(amplicon_scheme[1])} amplicons"
234
+ )
235
+ if percent_coverage < 70:
236
+ logging.raise_error(
237
+ "coverage < 70 %. Possible solutions:\n"
238
+ "\t - lower threshold\n"
239
+ "\t - increase amplicons lengths\n"
240
+ "\t - increase number of ambiguous nucleotides\n"
241
+ "\t - relax primer settings (not recommended)\n",
242
+ log_file
243
+ )
244
+ # write files
245
+ reporting.write_alignment(data_dir, alignment_cleaned)
246
+ reporting.write_fasta(data_dir, "majority_consensus", majority_consensus)
247
+ reporting.write_fasta(results_dir, "ambiguous_consensus", ambiguous_consensus)
248
+ reporting.write_conserved_to_bed(conserved_regions, data_dir)
249
+ reporting.write_all_primers(data_dir, all_primers)
250
+ reporting.write_scheme_to_files(
251
+ results_dir,
252
+ amplicon_scheme,
253
+ ambiguous_consensus
254
+ )
255
+ reporting.varvamp_plot(
256
+ results_dir,
257
+ args.threshold,
258
+ alignment_cleaned,
259
+ conserved_regions,
260
+ all_primers,
261
+ amplicon_scheme,
262
+ )
263
+ logging.varvamp_progress(log_file, progress=1, start_time=start_time)
File without changes
@@ -0,0 +1,223 @@
1
+ """
2
+ alignment preprocessing
3
+ """
4
+
5
+ # BUILT-INS
6
+ import re
7
+
8
+ # LIBS
9
+ from Bio import AlignIO
10
+ from Bio.Seq import Seq
11
+
12
+
13
+ def read_alignment(alignment_path):
14
+ """
15
+ read alignment with AlignIO and
16
+ convert to list of lists
17
+ """
18
+ alignment_list = []
19
+
20
+ for sequence in AlignIO.read(alignment_path, "fasta"):
21
+ alignment_list.append([sequence.id, str(sequence.seq)])
22
+
23
+ return alignment_list
24
+
25
+
26
+ def preprocess(alignment):
27
+ """
28
+ force nucleotides to lower and
29
+ back transcripe if its RNA
30
+ """
31
+ preprocessed_alignment = []
32
+
33
+ for sequence in alignment:
34
+ seq = Seq(sequence[1])
35
+ seq = seq.lower()
36
+ if "u" in seq:
37
+ seq = seq.back_transcribe()
38
+ preprocessed_alignment.append([sequence[0], str(seq)])
39
+
40
+ return preprocessed_alignment
41
+
42
+
43
+ def find_gaps_in_alignment(alignment):
44
+ """
45
+ find all gaps for each sequence in alignment
46
+ """
47
+ all_gaps = []
48
+
49
+ for seq in alignment:
50
+ # find all gaps for all sequences with regular expression -{min}
51
+ all_gaps.append(
52
+ [(gap.start(0), gap.end(0)-1) for gap in re.finditer(
53
+ "-{1,}", seq[1])]
54
+ )
55
+
56
+ return all_gaps
57
+
58
+
59
+ def find_unique_gaps(all_gaps):
60
+ """
61
+ get all unique gaps
62
+ """
63
+ result = list(set(gaps for gap_list in all_gaps for gaps in gap_list))
64
+ return result
65
+
66
+
67
+ def find_internal_gaps(unique_gaps, gap):
68
+ """
69
+ find all unique gaps that
70
+ lie within the current gap
71
+ """
72
+ overlapping_gaps = []
73
+
74
+ if gap[1] - gap[0] == 0:
75
+ # if the gap length = 1 there are
76
+ # no overlapping gaps
77
+ overlapping_gaps = [gap]
78
+ else:
79
+ # for each unique gap check if the intersection with the
80
+ # gap is the same as the unique gap -> internal gap of
81
+ # the current gap
82
+ for unique_gap in unique_gaps:
83
+ unique_set = set(range(unique_gap[0], unique_gap[1]))
84
+ current_range = range(gap[0], gap[1])
85
+ intersection = unique_set.intersection(current_range)
86
+ if not intersection:
87
+ continue
88
+ if min(intersection) == unique_gap[0] and max(intersection)+1 == unique_gap[1]:
89
+ overlapping_gaps.append(unique_gap)
90
+
91
+ return overlapping_gaps
92
+
93
+
94
+ def create_gap_dictionary(unique_gaps, all_gaps):
95
+ """
96
+ creates a dictionary with gap counts.
97
+ counts also all overlapping gaps per gap.
98
+ """
99
+
100
+ gap_dict = {}
101
+
102
+ for gap_list in all_gaps:
103
+ for gap in gap_list:
104
+ overlapping_gaps = find_internal_gaps(unique_gaps, gap)
105
+ for overlapping_gap in overlapping_gaps:
106
+ if overlapping_gap in gap_dict:
107
+ gap_dict[overlapping_gap] += 1
108
+ else:
109
+ gap_dict[overlapping_gap] = 1
110
+
111
+ return gap_dict
112
+
113
+
114
+ def find_gaps_to_mask(gap_dict, cutoff):
115
+ """
116
+ filters gaps for their freq cutoff.
117
+ condenses final gaps if there is
118
+ an overlap.
119
+ """
120
+ gaps_to_mask = []
121
+ potential_gaps = []
122
+
123
+ # check for each region if it is covered
124
+ # by enough sequences
125
+ for gap in gap_dict:
126
+ if gap_dict[gap] > cutoff:
127
+ potential_gaps.append(gap)
128
+
129
+ # sort by start and stop
130
+ potential_gaps = sorted(potential_gaps)
131
+
132
+ # get the min and max of overlapping gaps
133
+ opened_region = []
134
+ gaps_to_mask = []
135
+ for i, region in enumerate(potential_gaps):
136
+ region = list(region)
137
+ if opened_region:
138
+ # write the opened region if the start of the current region
139
+ # > opened_region[stop] and the last still opened region
140
+ if region[0] > opened_region[1] or i == len(potential_gaps)-1:
141
+ gaps_to_mask.append(opened_region)
142
+ opened_region = region
143
+ else:
144
+ # 1 case: same start and further stop -> new stop
145
+ if region[0] == opened_region[0]:
146
+ opened_region[1] = region[1]
147
+ # 2 case: further start and further stop -> new stop
148
+ if region[0] > opened_region[0] and region[1] > opened_region[1]:
149
+ opened_region[1] = region[1]
150
+ else:
151
+ opened_region = region
152
+
153
+ return gaps_to_mask
154
+
155
+
156
+ def clean_gaps(alignment, gaps_to_mask):
157
+ """
158
+ clean an alignment of large common deletions.
159
+ """
160
+ cleaned_alignment = []
161
+
162
+ for sequence in alignment:
163
+ start = 0
164
+ masked_seq = str()
165
+ for region in gaps_to_mask:
166
+ stop = region[0]
167
+ masked_seq_temp = sequence[1][start:stop]
168
+ # check if the deletion is at the start
169
+ if len(masked_seq_temp) != 0:
170
+ masked_seq = (masked_seq + "N" + masked_seq_temp)
171
+ start = region[1]+1
172
+ if max(gaps_to_mask)[1] < len(sequence[1])-1:
173
+ # append the last gaps if it is not
174
+ # the end of the sequence
175
+ start = max(gaps_to_mask)[1]
176
+ stop = len(sequence[1])-1
177
+ masked_seq_temp = sequence[1][start:stop]
178
+ masked_seq = (masked_seq + "N" + masked_seq_temp)
179
+ else:
180
+ # append the mask to the end of the seq
181
+ masked_seq = masked_seq + "N"
182
+
183
+ cleaned_alignment.append([sequence[0], masked_seq])
184
+
185
+ return cleaned_alignment
186
+
187
+
188
+ def process_alignment(alignment_path, threshold):
189
+ """
190
+ proprocesses alignment and cleans gaps
191
+ """
192
+ alignment = read_alignment(alignment_path)
193
+ gap_cutoff = len(alignment)*(1-threshold)
194
+
195
+ alignment_preprocessed = preprocess(alignment)
196
+ all_gaps = find_gaps_in_alignment(alignment_preprocessed)
197
+ unique_gaps = find_unique_gaps(all_gaps)
198
+
199
+ if unique_gaps:
200
+ gap_dic = create_gap_dictionary(unique_gaps, all_gaps)
201
+ gaps_to_mask = find_gaps_to_mask(gap_dic, gap_cutoff)
202
+ alignment_cleaned = clean_gaps(
203
+ alignment_preprocessed, gaps_to_mask
204
+ )
205
+ else:
206
+ gaps_to_mask = []
207
+ alignment_cleaned = alignment_preprocessed
208
+
209
+ return alignment_cleaned, gaps_to_mask
210
+
211
+
212
+ def calculate_total_masked_gaps(gaps_to_mask):
213
+ """
214
+ calculates the cummulative length of gaps
215
+ that were masked.
216
+ """
217
+ if gaps_to_mask:
218
+ sum_gaps = 0
219
+ for region in gaps_to_mask:
220
+ sum_gaps += region[1] - region[0] + 1
221
+ return sum_gaps
222
+ else:
223
+ return 0
@@ -0,0 +1,59 @@
1
+ """
2
+ This contains all varVAMP parameters. Options that can be adjusted by arguments
3
+ are FREQUENCY_THRESHOLD, PRIMER_ALLOWED_N_AMB, AMPLICON_MIN_OVERLAP, AMPLICON_OPT_LENGTH,
4
+ AMPLICON_MAX_LENGTH.
5
+ """
6
+
7
+ # CAN BE CHANGED
8
+
9
+ # alignment and consensus creation threshold
10
+ FREQUENCY_THRESHOLD = 0.9 # freq at which a nucleotide is considered conserved
11
+ PRIMER_ALLOWED_N_AMB = 4 # allowed number of ambiguous chars in primer
12
+
13
+ # basic primer parameters
14
+ PRIMER_TMP = (57, 63, 60) # temperatur (min, max, opt)
15
+ PRIMER_GC_RANGE = (40, 60, 50) # gc (min, max, opt)
16
+ PRIMER_SIZES = (17, 27, 20) # size (min, max, opt)
17
+ PRIMER_MAX_POLYX = 4 # max number of polyx repeats
18
+ PRIMER_MAX_DINUC_REPEATS = 4 # max number of dinucleotide repeats
19
+ PRIMER_HAIRPIN = 47 # max melting temp for secondary structures
20
+ PRIMER_MAX_GC_END = 3 # max GCs in the last 5 bases of the primer
21
+ PRIMER_GC_CLAMP = 1 # min number of GC nucleotides at the very 3' end
22
+ PRIMER_MIN_3_WITHOUT_AMB = 2 # min len of 3' without ambiguous charaters
23
+ PRIMER_MAX_DIMER_TMP = 47 # max melting temp for dimers (homo- or heterodimers)
24
+
25
+ # PCR parameters
26
+ PCR_MV_CONC = 50 # monovalent cations mM
27
+ PCR_DV_CONC = 2 # divalent cations mM
28
+ PCR_DNTP_CONC = 0.8 # dntp concentration mM
29
+ PCR_DNA_CONC = 50 # primer concentration nM
30
+
31
+ # multipliers for primer base penalties
32
+ PRIMER_TM_PENALTY = 2 # temperature penalty
33
+ PRIMER_GC_PENALTY = 0.2 # gc penalty
34
+ PRIMER_SIZE_PENALTY = 0.5 # size penalty
35
+ PRIMER_MAX_BASE_PENALTY = 8 # max base penalty for a primer
36
+ PRIMER_3_PENALTY = (10, 10, 10) # penalties for 3' mismatches
37
+ PRIMER_PERMUTATION_PENALTY = 0.1 # penalty for the number of permutations
38
+
39
+ # amplicon settings
40
+ AMPLICON_MIN_OVERLAP = 100
41
+ AMPLICON_OPT_LENGTH = 1000
42
+ AMPLICON_MAX_LENGTH = 2000
43
+
44
+ # DO NOT CHANGE
45
+ # nucleotide definitions
46
+ nucs = set("atcg")
47
+ ambig_nucs = {
48
+ "r": ["a", "g"],
49
+ "y": ["c", "t"],
50
+ "s": ["g", "c"],
51
+ "w": ["a", "t"],
52
+ "k": ["g", "t"],
53
+ "m": ["a", "c"],
54
+ "b": ["c", "g", "t"],
55
+ "d": ["a", "g", "t"],
56
+ "h": ["a", "c", "t"],
57
+ "v": ["a", "c", "g"],
58
+ "n": ["a", "c", "g", "t"]
59
+ }
@@ -0,0 +1,111 @@
1
+ """
2
+ consensus creation
3
+ """
4
+
5
+ # BUILT-INS
6
+ import collections
7
+
8
+ # varVAMP
9
+ from varvamp.scripts import config
10
+
11
+
12
+ def determine_nucleotide_counts(alignment, idx):
13
+ """
14
+ count the number of each nucleotides at
15
+ an idx of the alignment. return sorted dic.
16
+ handels ambiguous nucleotides in sequences.
17
+ also handels gaps.
18
+ """
19
+ nucleotide_list = []
20
+
21
+ # get all nucleotides
22
+ for sequence in alignment:
23
+ nucleotide_list.append(sequence[1][idx])
24
+ # count occurences of nucleotides
25
+ counter = dict(collections.Counter(nucleotide_list))
26
+ # get permutations of an ambiguous nucleotide
27
+ to_delete = []
28
+ temp_dict = {}
29
+ for nucleotide in counter:
30
+ if nucleotide in config.ambig_nucs:
31
+ to_delete.append(nucleotide)
32
+ permutations = config.ambig_nucs[nucleotide]
33
+ adjusted_freq = 1/len(permutations)
34
+ for permutation in permutations:
35
+ if permutation in temp_dict:
36
+ temp_dict[permutation] += adjusted_freq
37
+ else:
38
+ temp_dict[permutation] = adjusted_freq
39
+ if nucleotide == "-":
40
+ to_delete.append(nucleotide)
41
+
42
+ # drop ambiguous entrys and add adjusted freqs to
43
+ if to_delete:
44
+ for i in to_delete:
45
+ counter.pop(i)
46
+ for nucleotide in temp_dict:
47
+ if nucleotide in counter:
48
+ counter[nucleotide] += temp_dict[nucleotide]
49
+ else:
50
+ counter[nucleotide] = temp_dict[nucleotide]
51
+
52
+ return dict(sorted(counter.items(), key=lambda x: x[1], reverse=True))
53
+
54
+
55
+ def get_consensus_nucleotides(nucleotide_counts, consensus_cutoff):
56
+ """
57
+ get a list of nucleotides for the consensus seq
58
+ """
59
+ n = 0
60
+
61
+ consensus_nucleotides = []
62
+ for nuc in nucleotide_counts:
63
+ n += nucleotide_counts[nuc]
64
+ consensus_nucleotides.append(nuc)
65
+ if n >= consensus_cutoff:
66
+ break
67
+
68
+ return consensus_nucleotides
69
+
70
+
71
+ def get_ambiguous_char(nucleotides):
72
+ """
73
+ get ambiguous char from a list of nucleotides
74
+ """
75
+ for ambiguous, permutations in config.ambig_nucs.items():
76
+ if set(permutations) == set(nucleotides):
77
+ return ambiguous
78
+
79
+
80
+ def create_consensus(alignment, threshold):
81
+ """
82
+ build a majority sequence and a sequence that
83
+ has ambiguous chars as determined by the freq
84
+ threshold.
85
+ """
86
+
87
+ # ini the consensus seq
88
+ ambiguous_consensus = str()
89
+ majority_consensus = str()
90
+
91
+ # define consensus cut-off
92
+ consensus_cutoff = len(alignment)*threshold
93
+ # define length of the consensus from the first seq in alignment
94
+ length_consensus = len(alignment[0][1])
95
+
96
+ # built consensus sequences
97
+ for idx in range(length_consensus):
98
+ nucleotide_counts = determine_nucleotide_counts(alignment, idx)
99
+ consensus_nucleotide = get_consensus_nucleotides(
100
+ nucleotide_counts,
101
+ consensus_cutoff
102
+ )
103
+ if len(consensus_nucleotide) > 1:
104
+ amb_consensus_nucleotide = get_ambiguous_char(consensus_nucleotide)
105
+ ambiguous_consensus = ambiguous_consensus + amb_consensus_nucleotide
106
+ else:
107
+ ambiguous_consensus = ambiguous_consensus + consensus_nucleotide[0]
108
+
109
+ majority_consensus = majority_consensus + consensus_nucleotide[0]
110
+
111
+ return majority_consensus, ambiguous_consensus