PyPI - varvamp - Versions diffs - 0.3__py3-none-any.whl - Mend

varvamp 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

varvamp/__init__.py +3 -0
varvamp/__main__.py +5 -0
varvamp/command.py +263 -0
varvamp/scripts/__init__.py +0 -0
varvamp/scripts/alignment.py +223 -0
varvamp/scripts/config.py +59 -0
varvamp/scripts/consensus.py +111 -0
varvamp/scripts/conserved.py +118 -0
varvamp/scripts/logging.py +321 -0
varvamp/scripts/primers.py +417 -0
varvamp/scripts/reporting.py +353 -0
varvamp/scripts/scheme.py +390 -0
varvamp-0.3.dist-info/METADATA +53 -0
varvamp-0.3.dist-info/RECORD +17 -0
varvamp-0.3.dist-info/WHEEL +5 -0
varvamp-0.3.dist-info/entry_points.txt +2 -0
varvamp-0.3.dist-info/top_level.txt +1 -0

varvamp/scripts/scheme.py ADDED Viewed

@@ -0,0 +1,390 @@
+"""
+amplicon search
+"""
+# BUILT-INS
+import heapq
+# varVAMP
+from varvamp.scripts import config, primers
+class Graph(object):
+    """
+    a graph class
+    """
+    def __init__(self, nodes, init_graph):
+        self.nodes = nodes
+        self.graph = self.construct_graph(nodes, init_graph)
+    def construct_graph(self, nodes, init_graph):
+        """
+        This method makes sure that the graph is symmetrical, but sets the score
+        for nodes in the reverse direction to infinity to make sure dijkstra
+        never goes to an amplicon that is in the wrong direction.
+        """
+        graph = {}
+        for node in nodes:
+            graph[node] = {}
+        graph.update(init_graph)
+        for node, neighbors in graph.items():
+            for neighbor in neighbors.keys():
+                if graph[neighbor].get(node, False) is False:
+                    graph[neighbor][node] = float("infinity")
+        return graph
+    def get_nodes(self):
+        """
+        Returns the nodes of the graph.
+        """
+        return self.nodes
+    def get_neighbors(self, node):
+        """
+        Returns the neighbors of a node.
+        """
+        neighbors = []
+        for out_node in self.nodes:
+            if self.graph[node].get(out_node, False) is not False:
+                neighbors.append(out_node)
+        return neighbors
+    def value(self, node1, node2):
+        """
+        Returns the value of an edge between two nodes.
+        """
+        return self.graph[node1][node2]
+def find_amplicons(all_primers, opt_len, max_len):
+    """
+    finds all possible amplicons, creates a dictionary
+    """
+    amplicon_number = 0
+    amplicon_dict = {}
+    for left_name in all_primers["+"]:
+        left_primer = all_primers["+"][left_name]
+        for right_name in all_primers["-"]:
+            right_primer = all_primers["-"][right_name]
+            amplicon_length = right_primer[2] - left_primer[1]
+            if not opt_len <= amplicon_length <= max_len:
+                continue
+            if primers.calc_dimer(left_primer[0], right_primer[0]).tm > config.PRIMER_MAX_DIMER_TMP:
+                continue
+            # calculate length dependend amplicon costs as the cumulative primer
+            # score multiplied by the fold length of the optimal length.
+            amplicon_costs = (right_primer[3] + left_primer[3])*(amplicon_length/opt_len)
+            amplicon_name = "amplicon_"+str(amplicon_number)
+            amplicon_dict[amplicon_name] = [
+                left_primer[1],  # start
+                right_primer[2],  # stop
+                left_name,  # name left primer
+                right_name,  # name right primer
+                amplicon_length,  # amplicon length
+                amplicon_costs  # costs
+            ]
+            amplicon_number += 1
+    return amplicon_dict
+def create_amplicon_graph(amplicons, min_overlap):
+    """
+    creates the amplicon graph.
+    """
+    # ini graph and vertices
+    amplicon_graph = {}
+    nodes = []
+    # add the maximum len of a primer to ensure that possible amplicon starts
+    # before the min overlap
+    min_overlap = min_overlap + config.PRIMER_SIZES[2]
+    for current in amplicons:
+        # remember all vertices
+        nodes.append(current)
+        current_amplicon = amplicons[current]
+        start = current_amplicon[0] + current_amplicon[4]/2
+        stop = current_amplicon[1] - min_overlap
+        for next in amplicons:
+            next_amplicon = amplicons[next]
+            # check if the next amplicon lies within the start/stop range of
+            # the current amplicon and if its non-overlapping part is large
+            # enough to ensure space for a primer and the min overlap of the
+            # following amplicon.
+            if not all((start <= next_amplicon[0] <= stop, next_amplicon[1] > current_amplicon[1] + next_amplicon[4]/2)):
+                continue
+            if current not in amplicon_graph:
+                amplicon_graph[current] = {next: next_amplicon[5]}
+            else:
+                amplicon_graph[current][next] = next_amplicon[5]
+    # return a graph object
+    return Graph(nodes, amplicon_graph)
+def dijkstra_algorithm(graph, start_node):
+    """
+    implementation of the dijkstra algorithm
+    """
+    previous_nodes = {}
+    shortest_path = {node: float('infinity') for node in graph.get_nodes()}
+    shortest_path[start_node] = 0
+    nodes_to_test = [(0, start_node)]
+    while nodes_to_test:
+        current_distance, current_node = heapq.heappop(nodes_to_test)
+        if current_distance > shortest_path[current_node]:
+            continue
+        for neighbor in graph.get_neighbors(current_node):
+            distance = current_distance + graph.value(current_node, neighbor)
+            # Only consider this new path if it's a better path
+            if not distance < shortest_path[neighbor]:
+                continue
+            shortest_path[neighbor] = distance
+            previous_nodes[neighbor] = current_node
+            heapq.heappush(nodes_to_test, (distance, neighbor))
+    return previous_nodes, shortest_path
+def get_end_node(previous_nodes, shortest_path, amplicons):
+    """
+    get the target node with the lowest score out of all
+    nodes that have the same maximum end position
+    """
+    stop_nucleotide = 0
+    for node in previous_nodes.keys():
+        # check if an node has a larger stop -> empty dict and set new
+        # best stop nucleotide
+        if amplicons[node][1] > stop_nucleotide:
+            possible_end_nodes = {}
+            possible_end_nodes[node] = shortest_path[node]
+            stop_nucleotide = amplicons[node][1]
+        # if nodes have the same stop nucleotide, add to dictionary
+        elif amplicons[node][1] == stop_nucleotide:
+            possible_end_nodes[node] = shortest_path[node]
+    # return the end node with the lowest score
+    return min(possible_end_nodes.items(), key=lambda x: x[1])
+def get_min_path(previous_nodes, shortest_path, start_node, target_node):
+    """
+    get the min path from the start to stop node from the
+    previosuly calculated shortest path
+    """
+    path = []
+    node = target_node
+    while node != start_node:
+        path.append(node)
+        node = previous_nodes[node]
+    # Add the start node manually
+    path.append(start_node)
+    # return the inverse list
+    return path[::-1]
+def create_scheme_dic(amplicon_scheme, amplicons, all_primers):
+    """
+    creates the final scheme dictionary
+    """
+    scheme_dictionary = {
+        0: {},
+        1: {}
+    }
+    for pool in (0, 1):
+        for amp in amplicon_scheme[pool::2]:
+            scheme_dictionary[pool][amp] = {}
+            primers = [amplicons[amp][2], amplicons[amp][3]]
+            scheme_dictionary[pool][amp][primers[0]] = all_primers["+"][primers[0]]
+            scheme_dictionary[pool][amp][primers[1]] = all_primers["-"][primers[1]]
+    return scheme_dictionary
+def find_best_covering_scheme(amplicons, amplicon_graph, all_primers):
+    """
+    this brute forces the amplicon scheme search until the largest
+    coverage with the minimal costs is achieved.
+    """
+    # ini
+    coverage = 0
+    best_coverage = 0
+    max_stop = max(amplicons.items(), key=lambda x: x[1])[1][1]
+    best_score = float('infinity')
+    for start_node in amplicons:
+        # if the currently best coverage + start nucleotide of the currently tested amplicon
+        # is smaller than the maximal stop nucleotide there might be a better amplicon
+        # scheme that covers more of the genome
+        if amplicons[start_node][0] + best_coverage <= max_stop:
+            previous_nodes, shortest_path = dijkstra_algorithm(amplicon_graph, start_node)
+            # only continue if there are previous_nodes
+            if previous_nodes:
+                target_node, score = get_end_node(previous_nodes, shortest_path, amplicons)
+                coverage = amplicons[target_node][1] - amplicons[start_node][0]
+                # if the new coverage is larger, go for the larger coverage
+                if coverage > best_coverage:
+                    best_start_node = start_node
+                    best_target_node = target_node
+                    best_previous_nodes = previous_nodes
+                    best_shortest_path = shortest_path
+                    best_score = score
+                    best_coverage = coverage
+                # if the coverages are identical, go for the lowest costs
+                elif coverage == best_coverage:
+                    if score < best_score:
+                        best_start_node = start_node
+                        best_target_node = target_node
+                        best_previous_nodes = previous_nodes
+                        best_shortest_path = shortest_path
+                        best_score = score
+                        best_coverage = coverage
+            else:
+                # check if the single amplicon has the largest coverage so far
+                coverage = amplicons[start_node][1] - amplicons[start_node][0]
+                if coverage > best_coverage:
+                    best_start_node = start_node
+                    best_previous_nodes = previous_nodes
+                    best_shortest_path = shortest_path
+                    best_score = amplicons[start_node][5]
+                    best_coverage = coverage
+        # no need to check more, the best covering amplicon scheme was found and
+        # has the minimal score compared to the schemes with the same coverage
+        else:
+            break
+    if best_previous_nodes:
+        amplicon_scheme = get_min_path(best_previous_nodes, best_shortest_path, best_start_node, best_target_node)
+    else:
+        # if no previous nodes are found but the single amplicon results in the largest
+        # coverage - return as the best scheme
+        amplicon_scheme = [best_start_node]
+    return best_coverage, create_scheme_dic(amplicon_scheme, amplicons, all_primers)
+def test_scheme_for_dimers(amplicon_scheme):
+    """
+    test the best scoring scheme for primer dimers
+    """
+    primer_dimers = []
+    for pool in amplicon_scheme:
+        # test the primer dimers only within the respective pools
+        tested_primers = []
+        for amp in amplicon_scheme[pool]:
+            for primer in amplicon_scheme[pool][amp]:
+                # remember where the currrent primer was in the scheme
+                current_primer = (pool, amp, primer, amplicon_scheme[pool][amp][primer])
+                current_seq = current_primer[3][0]
+                for tested in tested_primers:
+                    tested_seq = tested[3][0]
+                    if primers.calc_dimer(current_seq, tested_seq).tm <= config.PRIMER_MAX_DIMER_TMP:
+                        continue
+                    primer_dimers.append((current_primer, tested))
+                # and remember all tested primers
+                tested_primers.append(current_primer)
+    return primer_dimers
+def get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates):
+    """
+    get overlapping primers of a primer dimer pair that have been previously
+    excluded. returns list of list with possible primers for each primer
+    in primer dimer.
+    """
+    overlapping_primers = []
+    # test each primer in dimer
+    for primer in dimer:
+        overlapping_primers_temp = []
+        # check in which list to look for them
+        overlap_range = range(primer[3][1], primer[3][2]+1)
+        overlap_set = set(overlap_range)
+        if "RIGHT" in primer[2]:
+            primers_to_test = right_primer_candidates
+        else:
+            primers_to_test = left_primer_candidates
+        # and check this list for all primers that overlap
+        for potential_new in primers_to_test:
+            primer_positions = list(range(potential_new[1], potential_new[2]+1))
+            if not any(x in primer_positions for x in overlap_set):
+                continue
+            overlapping_primers_temp.append((primer[0], primer[1], primer[2], potential_new))
+        overlapping_primers.append(overlapping_primers_temp)
+    return overlapping_primers
+def test_overlaps_for_dimers(overlapping_primers):
+    """
+    test the overlapping primers for dimers. return new primers.
+    """
+    for first_overlap in overlapping_primers[0]:
+        for second_overlap in overlapping_primers[1]:
+            # return the first match. primers are sorted by score.
+            # first pair that makes it has the lowest score
+            if primers.calc_dimer(first_overlap[3][0], second_overlap[3][0]).tm <= config.PRIMER_MAX_DIMER_TMP:
+                return [first_overlap, second_overlap]
+def check_and_solve_heterodimers(amplicon_scheme, left_primer_candidates, right_primer_candidates, all_primers):
+    """
+    check scheme for heterodimers, try to find
+    new primers that overlap and replace the existing ones.
+    this can lead to new primer dimers. therefore the
+    process is repeated until no primer dimers are found
+    in the updated scheme or all found primer dimers have
+    no replacements.
+    """
+    not_solvable = []
+    primer_dimers = test_scheme_for_dimers(amplicon_scheme)
+    while primer_dimers:
+        for dimer in primer_dimers:
+            # skip the primer dimers that have not been previously solved
+            if dimer in not_solvable:
+                continue
+            overlapping_primers = get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates)
+            # test all possible primers against each other for dimers
+            new_primers = test_overlaps_for_dimers(overlapping_primers)
+            # now change these primers in the scheme
+            if new_primers:
+                for new in new_primers:
+                    # overwrite in final scheme
+                    amplicon_scheme[new[0]][new[1]][new[2]] = new[3]
+                    # and in all primers
+                    if "LEFT" in new[2]:
+                        strand = "+"
+                    else:
+                        strand = "-"
+                    all_primers[strand][new[2]] = new[3]
+            # or remember the dimers for which varvamp did not find a replacement.
+            else:
+                not_solvable.append(dimer)
+        # none of the primer dimers of this iteration could be solved
+        if all(x in not_solvable for x in primer_dimers):
+            break
+        # some could be solved. lets check the updated scheme again.
+        else:
+            primer_dimers = test_scheme_for_dimers(amplicon_scheme)
+    return not_solvable

varvamp-0.3.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,53 @@
+Metadata-Version: 2.1
+Name: varvamp
+Version: 0.3
+Summary: varvamp
+Home-page: https://github.com/jonas-fuchs/varVAMP
+Author: Dr. Jonas Fuchs
+Author-email: jonas.fuchs@uniklinik-freiburg.de
+Classifier: Programming Language :: Python :: 3.9
+Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: biopython (>=1.79)
+Requires-Dist: matplotlib (>=3.5.1)
+Requires-Dist: primer3-py (>=1.1.0)
+Requires-Dist: pandas (>=1.4.4)
+Requires-Dist: numpy (>=1.23.3)
+**var**iable **V**irus**AMP**licons (varVAMP) is a tool to design primers for highly diverse viruses. The input is an alignment of your viral (full-genome) sequences.
+# varVAMP
+[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
+For a lot of virus genera it is difficult to design pan-specific primers. varVAMP solves this, by introducing ambiguous characters into primers and minimizes mismatches at the 3' end. Primers might not work for some sequences of your input alignment but should recognize the large majority.
+**varVAMP comes in three different flavors:**
+<img src="./docs/varvamp.png" alt="varVAMP logo" />
+**SANGER** *(coming soon)*: varVAMP searches for the very best primers and reports back an amplicon which can be used for PCR-based screening approaches.
+**TILED**: varVAMP uses a graph based approach to design overlapping amplicons that tile the entire viral genome. This designs amplicons that are suitable for Oxford Nanopore or Illumina based full-genome sequencing.
+**QPCR** *(coming soon)*: varVAMP searches for small amplicons with an internal primer for the probe. It minimizes temperature differences between the primers.
+This program is currently being developed and in an alpha state. You are welcome to use this software. If you successfully design primers, drop me a mail. It might be possible to collaborate!
+# Documentation
+* [Installation](docs/installation.md)
+* [Preparing the data](docs/preparing_the_data.md)
+* [Usage](docs/usage.md)
+* [Output](docs/output.md)
+* [How it works](docs/how_varvamp_works.md)
+* [FAQ](docs/FAQ.md)
+---
+**Important disclaimer:**
+*For the primer design, varVAMP uses [primer3](https://pypi.org/project/primer3-py/) to check if digested kmers of a sequence are potential primers. Some of the functions for this were adapted from [primalscheme](www.github.com/aresti/primalscheme) and I do not claim credit.*
+*The remaing code is under the GPLv3 licence. The code is WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.*

varvamp-0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+varvamp/__init__.py,sha256=E30_W1z1l7QXGydNiTfAPOyYCPn5e8PCdg035eP5Q68,105
+varvamp/__main__.py,sha256=9R3mbX2_Q5LByPx8WLoTbvZ-G2dbRSMpDlgze0Wm5Fc,98
+varvamp/command.py,sha256=JrzxZoQGav42hIud-_EyEdXPHWfLrRQSv9WtnT0RLNg,8167
+varvamp/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+varvamp/scripts/alignment.py,sha256=gJAyZ-J3prLiaPWNPFDBHpA8FdTQGjLecw_lrHCd2d8,6307
+varvamp/scripts/config.py,sha256=koQSBfeEe-Nj3gI-O8z_5tIHBoB9OLIVx2MPimML2DA,2074
+varvamp/scripts/consensus.py,sha256=I6q6GPQFif1xqStcbblFFdwo5l6MoyAOsNGKNvaxK4U,3374
+varvamp/scripts/conserved.py,sha256=xDu8cR7wpmTt-1hjeYuJGfFy5O6WiDKUHwnmaQOh13w,4483
+varvamp/scripts/logging.py,sha256=ImQHLiGUMQ8fOezkPOBIrfD5iJndNSkJ3pjBnjuZHTg,10303
+varvamp/scripts/primers.py,sha256=WJnTOjS_a0NoN99Yvrqlcuj2I9a6B8CtjZ9pl4WVyvs,13241
+varvamp/scripts/reporting.py,sha256=nXcMnv31C3AQmpbqZ5rgqvKNVP3dNSpflef75sRZVsA,12731
+varvamp/scripts/scheme.py,sha256=FstJRz2TTgnlNAGKOP7xxQV672vHgqhjCOfd7DHSSYg,14569
+varvamp-0.3.dist-info/METADATA,sha256=K_MVCTC8JsOnlUSGCSVyYHJBf-9-897AgljHXIeFKHw,2915
+varvamp-0.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
+varvamp-0.3.dist-info/entry_points.txt,sha256=puzW-basyBexZT4JVRUfUEqobvFmEyfqRQaqFjp7rB0,49
+varvamp-0.3.dist-info/top_level.txt,sha256=11oVwE3SBUB9aTmvpvEDru95Tc5GZqQikzzFjw2eVGc,8
+varvamp-0.3.dist-info/RECORD,,

varvamp-0.3.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.40.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

varvamp-0.3.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ varvamp = varvamp.command:main

varvamp-0.3.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ varvamp