varvamp 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,390 @@
1
+ """
2
+ amplicon search
3
+ """
4
+
5
+ # BUILT-INS
6
+ import heapq
7
+
8
+ # varVAMP
9
+ from varvamp.scripts import config, primers
10
+
11
+
12
+ class Graph(object):
13
+ """
14
+ a graph class
15
+ """
16
+
17
+ def __init__(self, nodes, init_graph):
18
+ self.nodes = nodes
19
+ self.graph = self.construct_graph(nodes, init_graph)
20
+
21
+ def construct_graph(self, nodes, init_graph):
22
+ """
23
+ This method makes sure that the graph is symmetrical, but sets the score
24
+ for nodes in the reverse direction to infinity to make sure dijkstra
25
+ never goes to an amplicon that is in the wrong direction.
26
+ """
27
+ graph = {}
28
+ for node in nodes:
29
+ graph[node] = {}
30
+
31
+ graph.update(init_graph)
32
+
33
+ for node, neighbors in graph.items():
34
+ for neighbor in neighbors.keys():
35
+ if graph[neighbor].get(node, False) is False:
36
+ graph[neighbor][node] = float("infinity")
37
+
38
+ return graph
39
+
40
+ def get_nodes(self):
41
+ """
42
+ Returns the nodes of the graph.
43
+ """
44
+ return self.nodes
45
+
46
+ def get_neighbors(self, node):
47
+ """
48
+ Returns the neighbors of a node.
49
+ """
50
+ neighbors = []
51
+ for out_node in self.nodes:
52
+ if self.graph[node].get(out_node, False) is not False:
53
+ neighbors.append(out_node)
54
+ return neighbors
55
+
56
+ def value(self, node1, node2):
57
+ """
58
+ Returns the value of an edge between two nodes.
59
+ """
60
+ return self.graph[node1][node2]
61
+
62
+
63
+ def find_amplicons(all_primers, opt_len, max_len):
64
+ """
65
+ finds all possible amplicons, creates a dictionary
66
+ """
67
+ amplicon_number = 0
68
+ amplicon_dict = {}
69
+
70
+ for left_name in all_primers["+"]:
71
+ left_primer = all_primers["+"][left_name]
72
+ for right_name in all_primers["-"]:
73
+ right_primer = all_primers["-"][right_name]
74
+ amplicon_length = right_primer[2] - left_primer[1]
75
+ if not opt_len <= amplicon_length <= max_len:
76
+ continue
77
+ if primers.calc_dimer(left_primer[0], right_primer[0]).tm > config.PRIMER_MAX_DIMER_TMP:
78
+ continue
79
+ # calculate length dependend amplicon costs as the cumulative primer
80
+ # score multiplied by the fold length of the optimal length.
81
+ amplicon_costs = (right_primer[3] + left_primer[3])*(amplicon_length/opt_len)
82
+ amplicon_name = "amplicon_"+str(amplicon_number)
83
+ amplicon_dict[amplicon_name] = [
84
+ left_primer[1], # start
85
+ right_primer[2], # stop
86
+ left_name, # name left primer
87
+ right_name, # name right primer
88
+ amplicon_length, # amplicon length
89
+ amplicon_costs # costs
90
+ ]
91
+ amplicon_number += 1
92
+
93
+ return amplicon_dict
94
+
95
+
96
+ def create_amplicon_graph(amplicons, min_overlap):
97
+ """
98
+ creates the amplicon graph.
99
+ """
100
+ # ini graph and vertices
101
+ amplicon_graph = {}
102
+ nodes = []
103
+
104
+ # add the maximum len of a primer to ensure that possible amplicon starts
105
+ # before the min overlap
106
+ min_overlap = min_overlap + config.PRIMER_SIZES[2]
107
+
108
+ for current in amplicons:
109
+ # remember all vertices
110
+ nodes.append(current)
111
+ current_amplicon = amplicons[current]
112
+ start = current_amplicon[0] + current_amplicon[4]/2
113
+ stop = current_amplicon[1] - min_overlap
114
+ for next in amplicons:
115
+ next_amplicon = amplicons[next]
116
+ # check if the next amplicon lies within the start/stop range of
117
+ # the current amplicon and if its non-overlapping part is large
118
+ # enough to ensure space for a primer and the min overlap of the
119
+ # following amplicon.
120
+ if not all((start <= next_amplicon[0] <= stop, next_amplicon[1] > current_amplicon[1] + next_amplicon[4]/2)):
121
+ continue
122
+ if current not in amplicon_graph:
123
+ amplicon_graph[current] = {next: next_amplicon[5]}
124
+ else:
125
+ amplicon_graph[current][next] = next_amplicon[5]
126
+
127
+ # return a graph object
128
+ return Graph(nodes, amplicon_graph)
129
+
130
+
131
+ def dijkstra_algorithm(graph, start_node):
132
+ """
133
+ implementation of the dijkstra algorithm
134
+ """
135
+
136
+ previous_nodes = {}
137
+ shortest_path = {node: float('infinity') for node in graph.get_nodes()}
138
+ shortest_path[start_node] = 0
139
+
140
+ nodes_to_test = [(0, start_node)]
141
+
142
+ while nodes_to_test:
143
+ current_distance, current_node = heapq.heappop(nodes_to_test)
144
+ if current_distance > shortest_path[current_node]:
145
+ continue
146
+ for neighbor in graph.get_neighbors(current_node):
147
+ distance = current_distance + graph.value(current_node, neighbor)
148
+ # Only consider this new path if it's a better path
149
+ if not distance < shortest_path[neighbor]:
150
+ continue
151
+ shortest_path[neighbor] = distance
152
+ previous_nodes[neighbor] = current_node
153
+ heapq.heappush(nodes_to_test, (distance, neighbor))
154
+
155
+ return previous_nodes, shortest_path
156
+
157
+
158
+ def get_end_node(previous_nodes, shortest_path, amplicons):
159
+ """
160
+ get the target node with the lowest score out of all
161
+ nodes that have the same maximum end position
162
+ """
163
+ stop_nucleotide = 0
164
+
165
+ for node in previous_nodes.keys():
166
+ # check if an node has a larger stop -> empty dict and set new
167
+ # best stop nucleotide
168
+ if amplicons[node][1] > stop_nucleotide:
169
+ possible_end_nodes = {}
170
+ possible_end_nodes[node] = shortest_path[node]
171
+ stop_nucleotide = amplicons[node][1]
172
+ # if nodes have the same stop nucleotide, add to dictionary
173
+ elif amplicons[node][1] == stop_nucleotide:
174
+ possible_end_nodes[node] = shortest_path[node]
175
+
176
+ # return the end node with the lowest score
177
+ return min(possible_end_nodes.items(), key=lambda x: x[1])
178
+
179
+
180
+ def get_min_path(previous_nodes, shortest_path, start_node, target_node):
181
+ """
182
+ get the min path from the start to stop node from the
183
+ previosuly calculated shortest path
184
+ """
185
+ path = []
186
+ node = target_node
187
+
188
+ while node != start_node:
189
+ path.append(node)
190
+ node = previous_nodes[node]
191
+ # Add the start node manually
192
+ path.append(start_node)
193
+
194
+ # return the inverse list
195
+ return path[::-1]
196
+
197
+
198
+ def create_scheme_dic(amplicon_scheme, amplicons, all_primers):
199
+ """
200
+ creates the final scheme dictionary
201
+ """
202
+
203
+ scheme_dictionary = {
204
+ 0: {},
205
+ 1: {}
206
+ }
207
+
208
+ for pool in (0, 1):
209
+ for amp in amplicon_scheme[pool::2]:
210
+ scheme_dictionary[pool][amp] = {}
211
+ primers = [amplicons[amp][2], amplicons[amp][3]]
212
+ scheme_dictionary[pool][amp][primers[0]] = all_primers["+"][primers[0]]
213
+ scheme_dictionary[pool][amp][primers[1]] = all_primers["-"][primers[1]]
214
+
215
+ return scheme_dictionary
216
+
217
+
218
+ def find_best_covering_scheme(amplicons, amplicon_graph, all_primers):
219
+ """
220
+ this brute forces the amplicon scheme search until the largest
221
+ coverage with the minimal costs is achieved.
222
+ """
223
+ # ini
224
+ coverage = 0
225
+ best_coverage = 0
226
+ max_stop = max(amplicons.items(), key=lambda x: x[1])[1][1]
227
+ best_score = float('infinity')
228
+
229
+ for start_node in amplicons:
230
+ # if the currently best coverage + start nucleotide of the currently tested amplicon
231
+ # is smaller than the maximal stop nucleotide there might be a better amplicon
232
+ # scheme that covers more of the genome
233
+ if amplicons[start_node][0] + best_coverage <= max_stop:
234
+ previous_nodes, shortest_path = dijkstra_algorithm(amplicon_graph, start_node)
235
+ # only continue if there are previous_nodes
236
+ if previous_nodes:
237
+ target_node, score = get_end_node(previous_nodes, shortest_path, amplicons)
238
+ coverage = amplicons[target_node][1] - amplicons[start_node][0]
239
+ # if the new coverage is larger, go for the larger coverage
240
+ if coverage > best_coverage:
241
+ best_start_node = start_node
242
+ best_target_node = target_node
243
+ best_previous_nodes = previous_nodes
244
+ best_shortest_path = shortest_path
245
+ best_score = score
246
+ best_coverage = coverage
247
+ # if the coverages are identical, go for the lowest costs
248
+ elif coverage == best_coverage:
249
+ if score < best_score:
250
+ best_start_node = start_node
251
+ best_target_node = target_node
252
+ best_previous_nodes = previous_nodes
253
+ best_shortest_path = shortest_path
254
+ best_score = score
255
+ best_coverage = coverage
256
+ else:
257
+ # check if the single amplicon has the largest coverage so far
258
+ coverage = amplicons[start_node][1] - amplicons[start_node][0]
259
+ if coverage > best_coverage:
260
+ best_start_node = start_node
261
+ best_previous_nodes = previous_nodes
262
+ best_shortest_path = shortest_path
263
+ best_score = amplicons[start_node][5]
264
+ best_coverage = coverage
265
+ # no need to check more, the best covering amplicon scheme was found and
266
+ # has the minimal score compared to the schemes with the same coverage
267
+ else:
268
+ break
269
+
270
+ if best_previous_nodes:
271
+ amplicon_scheme = get_min_path(best_previous_nodes, best_shortest_path, best_start_node, best_target_node)
272
+ else:
273
+ # if no previous nodes are found but the single amplicon results in the largest
274
+ # coverage - return as the best scheme
275
+ amplicon_scheme = [best_start_node]
276
+
277
+ return best_coverage, create_scheme_dic(amplicon_scheme, amplicons, all_primers)
278
+
279
+
280
+ def test_scheme_for_dimers(amplicon_scheme):
281
+ """
282
+ test the best scoring scheme for primer dimers
283
+ """
284
+
285
+ primer_dimers = []
286
+
287
+ for pool in amplicon_scheme:
288
+ # test the primer dimers only within the respective pools
289
+ tested_primers = []
290
+ for amp in amplicon_scheme[pool]:
291
+ for primer in amplicon_scheme[pool][amp]:
292
+ # remember where the currrent primer was in the scheme
293
+ current_primer = (pool, amp, primer, amplicon_scheme[pool][amp][primer])
294
+ current_seq = current_primer[3][0]
295
+ for tested in tested_primers:
296
+ tested_seq = tested[3][0]
297
+ if primers.calc_dimer(current_seq, tested_seq).tm <= config.PRIMER_MAX_DIMER_TMP:
298
+ continue
299
+ primer_dimers.append((current_primer, tested))
300
+ # and remember all tested primers
301
+ tested_primers.append(current_primer)
302
+
303
+ return primer_dimers
304
+
305
+
306
+ def get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates):
307
+ """
308
+ get overlapping primers of a primer dimer pair that have been previously
309
+ excluded. returns list of list with possible primers for each primer
310
+ in primer dimer.
311
+ """
312
+
313
+ overlapping_primers = []
314
+ # test each primer in dimer
315
+ for primer in dimer:
316
+ overlapping_primers_temp = []
317
+ # check in which list to look for them
318
+ overlap_range = range(primer[3][1], primer[3][2]+1)
319
+ overlap_set = set(overlap_range)
320
+ if "RIGHT" in primer[2]:
321
+ primers_to_test = right_primer_candidates
322
+ else:
323
+ primers_to_test = left_primer_candidates
324
+ # and check this list for all primers that overlap
325
+ for potential_new in primers_to_test:
326
+ primer_positions = list(range(potential_new[1], potential_new[2]+1))
327
+ if not any(x in primer_positions for x in overlap_set):
328
+ continue
329
+ overlapping_primers_temp.append((primer[0], primer[1], primer[2], potential_new))
330
+
331
+ overlapping_primers.append(overlapping_primers_temp)
332
+
333
+ return overlapping_primers
334
+
335
+
336
+ def test_overlaps_for_dimers(overlapping_primers):
337
+ """
338
+ test the overlapping primers for dimers. return new primers.
339
+ """
340
+ for first_overlap in overlapping_primers[0]:
341
+ for second_overlap in overlapping_primers[1]:
342
+ # return the first match. primers are sorted by score.
343
+ # first pair that makes it has the lowest score
344
+ if primers.calc_dimer(first_overlap[3][0], second_overlap[3][0]).tm <= config.PRIMER_MAX_DIMER_TMP:
345
+ return [first_overlap, second_overlap]
346
+
347
+
348
+ def check_and_solve_heterodimers(amplicon_scheme, left_primer_candidates, right_primer_candidates, all_primers):
349
+ """
350
+ check scheme for heterodimers, try to find
351
+ new primers that overlap and replace the existing ones.
352
+ this can lead to new primer dimers. therefore the
353
+ process is repeated until no primer dimers are found
354
+ in the updated scheme or all found primer dimers have
355
+ no replacements.
356
+ """
357
+ not_solvable = []
358
+
359
+ primer_dimers = test_scheme_for_dimers(amplicon_scheme)
360
+
361
+ while primer_dimers:
362
+ for dimer in primer_dimers:
363
+ # skip the primer dimers that have not been previously solved
364
+ if dimer in not_solvable:
365
+ continue
366
+ overlapping_primers = get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates)
367
+ # test all possible primers against each other for dimers
368
+ new_primers = test_overlaps_for_dimers(overlapping_primers)
369
+ # now change these primers in the scheme
370
+ if new_primers:
371
+ for new in new_primers:
372
+ # overwrite in final scheme
373
+ amplicon_scheme[new[0]][new[1]][new[2]] = new[3]
374
+ # and in all primers
375
+ if "LEFT" in new[2]:
376
+ strand = "+"
377
+ else:
378
+ strand = "-"
379
+ all_primers[strand][new[2]] = new[3]
380
+ # or remember the dimers for which varvamp did not find a replacement.
381
+ else:
382
+ not_solvable.append(dimer)
383
+ # none of the primer dimers of this iteration could be solved
384
+ if all(x in not_solvable for x in primer_dimers):
385
+ break
386
+ # some could be solved. lets check the updated scheme again.
387
+ else:
388
+ primer_dimers = test_scheme_for_dimers(amplicon_scheme)
389
+
390
+ return not_solvable
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.1
2
+ Name: varvamp
3
+ Version: 0.3
4
+ Summary: varvamp
5
+ Home-page: https://github.com/jonas-fuchs/varVAMP
6
+ Author: Dr. Jonas Fuchs
7
+ Author-email: jonas.fuchs@uniklinik-freiburg.de
8
+ Classifier: Programming Language :: Python :: 3.9
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: biopython (>=1.79)
13
+ Requires-Dist: matplotlib (>=3.5.1)
14
+ Requires-Dist: primer3-py (>=1.1.0)
15
+ Requires-Dist: pandas (>=1.4.4)
16
+ Requires-Dist: numpy (>=1.23.3)
17
+
18
+ **var**iable **V**irus**AMP**licons (varVAMP) is a tool to design primers for highly diverse viruses. The input is an alignment of your viral (full-genome) sequences.
19
+
20
+ # varVAMP
21
+
22
+ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
23
+
24
+ For a lot of virus genera it is difficult to design pan-specific primers. varVAMP solves this, by introducing ambiguous characters into primers and minimizes mismatches at the 3' end. Primers might not work for some sequences of your input alignment but should recognize the large majority.
25
+
26
+ **varVAMP comes in three different flavors:**
27
+
28
+ <img src="./docs/varvamp.png" alt="varVAMP logo" />
29
+
30
+ **SANGER** *(coming soon)*: varVAMP searches for the very best primers and reports back an amplicon which can be used for PCR-based screening approaches.
31
+
32
+ **TILED**: varVAMP uses a graph based approach to design overlapping amplicons that tile the entire viral genome. This designs amplicons that are suitable for Oxford Nanopore or Illumina based full-genome sequencing.
33
+
34
+ **QPCR** *(coming soon)*: varVAMP searches for small amplicons with an internal primer for the probe. It minimizes temperature differences between the primers.
35
+
36
+ This program is currently being developed and in an alpha state. You are welcome to use this software. If you successfully design primers, drop me a mail. It might be possible to collaborate!
37
+
38
+ # Documentation
39
+
40
+ * [Installation](docs/installation.md)
41
+ * [Preparing the data](docs/preparing_the_data.md)
42
+ * [Usage](docs/usage.md)
43
+ * [Output](docs/output.md)
44
+ * [How it works](docs/how_varvamp_works.md)
45
+ * [FAQ](docs/FAQ.md)
46
+
47
+ ---
48
+
49
+ **Important disclaimer:**
50
+ *For the primer design, varVAMP uses [primer3](https://pypi.org/project/primer3-py/) to check if digested kmers of a sequence are potential primers. Some of the functions for this were adapted from [primalscheme](www.github.com/aresti/primalscheme) and I do not claim credit.*
51
+
52
+ *The remaing code is under the GPLv3 licence. The code is WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or
53
+ (at your option) any later version.*
@@ -0,0 +1,17 @@
1
+ varvamp/__init__.py,sha256=E30_W1z1l7QXGydNiTfAPOyYCPn5e8PCdg035eP5Q68,105
2
+ varvamp/__main__.py,sha256=9R3mbX2_Q5LByPx8WLoTbvZ-G2dbRSMpDlgze0Wm5Fc,98
3
+ varvamp/command.py,sha256=JrzxZoQGav42hIud-_EyEdXPHWfLrRQSv9WtnT0RLNg,8167
4
+ varvamp/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ varvamp/scripts/alignment.py,sha256=gJAyZ-J3prLiaPWNPFDBHpA8FdTQGjLecw_lrHCd2d8,6307
6
+ varvamp/scripts/config.py,sha256=koQSBfeEe-Nj3gI-O8z_5tIHBoB9OLIVx2MPimML2DA,2074
7
+ varvamp/scripts/consensus.py,sha256=I6q6GPQFif1xqStcbblFFdwo5l6MoyAOsNGKNvaxK4U,3374
8
+ varvamp/scripts/conserved.py,sha256=xDu8cR7wpmTt-1hjeYuJGfFy5O6WiDKUHwnmaQOh13w,4483
9
+ varvamp/scripts/logging.py,sha256=ImQHLiGUMQ8fOezkPOBIrfD5iJndNSkJ3pjBnjuZHTg,10303
10
+ varvamp/scripts/primers.py,sha256=WJnTOjS_a0NoN99Yvrqlcuj2I9a6B8CtjZ9pl4WVyvs,13241
11
+ varvamp/scripts/reporting.py,sha256=nXcMnv31C3AQmpbqZ5rgqvKNVP3dNSpflef75sRZVsA,12731
12
+ varvamp/scripts/scheme.py,sha256=FstJRz2TTgnlNAGKOP7xxQV672vHgqhjCOfd7DHSSYg,14569
13
+ varvamp-0.3.dist-info/METADATA,sha256=K_MVCTC8JsOnlUSGCSVyYHJBf-9-897AgljHXIeFKHw,2915
14
+ varvamp-0.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
15
+ varvamp-0.3.dist-info/entry_points.txt,sha256=puzW-basyBexZT4JVRUfUEqobvFmEyfqRQaqFjp7rB0,49
16
+ varvamp-0.3.dist-info/top_level.txt,sha256=11oVwE3SBUB9aTmvpvEDru95Tc5GZqQikzzFjw2eVGc,8
17
+ varvamp-0.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.40.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ varvamp = varvamp.command:main
@@ -0,0 +1 @@
1
+ varvamp