PyPI - varvamp - Versions diffs - 1.2.1__py3-none-any.whl → 1.3__py3-none-any.whl - Mend

varvamp 1.2.1py3-none-any.whl → 1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

varvamp/__init__.py +6 -3
varvamp/command.py +134 -60
varvamp/scripts/alignment.py +54 -164
varvamp/scripts/default_config.py +5 -3
varvamp/scripts/logging.py +67 -21
varvamp/scripts/param_estimation.py +84 -62
varvamp/scripts/primers.py +190 -46
varvamp/scripts/qpcr.py +141 -117
varvamp/scripts/reporting.py +45 -40
varvamp/scripts/scheme.py +101 -52
varvamp-1.3.dist-info/METADATA +760 -0
varvamp-1.3.dist-info/RECORD +22 -0
{varvamp-1.2.1.dist-info → varvamp-1.3.dist-info}/WHEEL +1 -1
varvamp-1.3.dist-info/licenses/LICENSE +674 -0
varvamp-1.2.1.dist-info/METADATA +0 -78
varvamp-1.2.1.dist-info/RECORD +0 -21
{varvamp-1.2.1.dist-info → varvamp-1.3.dist-info}/entry_points.txt +0 -0
{varvamp-1.2.1.dist-info → varvamp-1.3.dist-info}/top_level.txt +0 -0

varvamp/scripts/primers.py CHANGED Viewed

@@ -2,10 +2,16 @@
 primer creation and evaluation
 """
+# BUILTIN
+import itertools
+import re
+import multiprocessing
+import functools
 # LIBS
-from Bio.Seq import Seq
+from Bio.Seq import MutableSeq
+from Bio import SeqIO
 import primer3 as p3
-import math
 # varVAMP
 from varvamp.scripts import config
@@ -60,6 +66,50 @@ def calc_dimer(seq1, seq2, structure=False):
     )
+def has_end_overlap(dimer_result):
+    """
+    checks if two oligos overlap at their ends
+    Example:
+        xxxxxxxxtagc-------
+        --------atcgxxxxxxx
+    """
+    if dimer_result.structure_found:
+        # clean structure
+        structure = [x[4:] for x in dimer_result.ascii_structure_lines]
+        # check if we have an overlap that is large enough
+        overlap = len(structure[1].replace(" ", ""))
+        if overlap <= config.END_OVERLAP:
+            return False
+        # not more than one conseq. internal mismatch
+        if '  ' in structure[1].lstrip(" "):
+            return False
+        # The alignment length of the ACII structure is equal to the first part of the structure
+        # and the maximum possible alignment length is the cumulative length of both primers (-> no overlap at all)
+        alignment_length = len(structure[0])
+        maximum_alignment_length = len(re.findall("[ATCG]", "".join(structure)))
+        # this means that for a perfect end overlap the alignment length is equal to:
+        # len(primer1) + len(primer2) - overlap.
+        if alignment_length == maximum_alignment_length - overlap:
+            return True
+    return False
+def is_dimer(seq1, seq2):
+    """
+    check if two sequences dimerize above threshold or are overlapping at their ends
+    """
+    dimer_result = calc_dimer(seq1, seq2, structure=True)
+    # check both the temperature and the deltaG
+    if dimer_result.tm > config.PRIMER_MAX_DIMER_TMP or dimer_result.dg < config.PRIMER_MAX_DIMER_DELTAG:
+        return True
+    # check for perfect end overlaps (this can result in primer extensions even though the tm/dg are okay)
+    if has_end_overlap(dimer_result):
+        return True
+    return False
 def calc_max_polyx(seq):
     """
     calculate maximum polyx of a seq
@@ -126,7 +176,7 @@ def rev_complement(seq):
     """
     reverse complement a sequence
     """
-    return str(Seq(seq).reverse_complement())
+    return str(MutableSeq(seq).reverse_complement(inplace=True))
 def calc_permutation_penalty(amb_seq):
@@ -262,13 +312,14 @@ def filter_kmer_direction_independent(seq, primer_temps=config.PRIMER_TMP, gc_ra
     filter kmer for temperature, gc content,
     poly x, dinucleotide repeats and homodimerization
     """
     return(
         (primer_temps[0] <= calc_temp(seq) <= primer_temps[1])
         and (gc_range[0] <= calc_gc(seq) <= gc_range[1])
         and (calc_max_polyx(seq) <= config.PRIMER_MAX_POLYX)
         and (calc_max_dinuc_repeats(seq) <= config.PRIMER_MAX_DINUC_REPEATS)
         and (calc_base_penalty(seq, primer_temps, gc_range, primer_sizes) <= config.PRIMER_MAX_BASE_PENALTY)
-        and (calc_dimer(seq, seq).tm <= config.PRIMER_MAX_DIMER_TMP)
+        and not is_dimer(seq, seq)
     )
@@ -292,51 +343,66 @@ def filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
     )
-def find_primers(kmers, ambiguous_consensus, alignment):
+def _process_kmer_batch(ambiguous_consensus, alignment, kmers):
     """
-    filter kmers direction specific and append penalties
-    --> potential primers
+    Helper function for multiprocessing: process a batch of kmers.
+    Returns (left_primers, right_primers) tuples.
     """
-    left_primer_candidates = []
-    right_primer_candidates = []
+    left_primers = []
+    right_primers = []
     for kmer in kmers:
-        # filter kmers based on their direction independend stats
         if not filter_kmer_direction_independent(kmer[0]):
             continue
-        # calc base penalty
-        base_penalty = calc_base_penalty(kmer[0],config.PRIMER_TMP, config.PRIMER_GC_RANGE, config.PRIMER_SIZES)
-        # calculate per base mismatches
-        per_base_mismatches = calc_per_base_mismatches(
-                                kmer,
-                                alignment,
-                                ambiguous_consensus
-                            )
-        # calculate permutation penealty
-        permutation_penalty = calc_permutation_penalty(
-                                ambiguous_consensus[kmer[1]:kmer[2]]
-                            )
-        # now check direction specific
+        # calc penalties
+        base_penalty = calc_base_penalty(kmer[0], config.PRIMER_TMP, config.PRIMER_GC_RANGE, config.PRIMER_SIZES)
+        per_base_mismatches = calc_per_base_mismatches(kmer, alignment, ambiguous_consensus)
+        permutation_penalty = calc_permutation_penalty(ambiguous_consensus[kmer[1]:kmer[2]])
+        # some filters depend on the direction of each primer
         for direction in ["+", "-"]:
-            # check if kmer passes direction filter
             if not filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
                 continue
-            # calculate the 3' penalty
-            three_prime_penalty = calc_3_prime_penalty(
-                                    direction,
-                                    per_base_mismatches
-                                )
-            # add all penalties
+            # calc penalties
+            three_prime_penalty = calc_3_prime_penalty(direction, per_base_mismatches)
             primer_penalty = base_penalty + permutation_penalty + three_prime_penalty
-            # sort into lists
+            # add to lists depending on their direction
             if direction == "+":
-                left_primer_candidates.append(
-                    [kmer[0], kmer[1], kmer[2], primer_penalty, per_base_mismatches]
-                )
-            if direction == "-":
-                right_primer_candidates.append(
-                    [rev_complement(kmer[0]), kmer[1], kmer[2], primer_penalty, per_base_mismatches]
-                )
+                left_primers.append([kmer[0], kmer[1], kmer[2], primer_penalty, per_base_mismatches])
+            else:
+                right_primers.append([rev_complement(kmer[0]), kmer[1], kmer[2], primer_penalty, per_base_mismatches])
+    return left_primers, right_primers
+def find_primers(kmers, ambiguous_consensus, alignment, num_processes):
+    """
+    Filter kmers direction specific and append penalties --> potential primers.
+    Uses multiprocessing to process kmers in parallel.
+    """
+    if not kmers:
+        return [], []
+    # Convert kmers set to list for slicing
+    kmers = list(kmers)
+    batch_size = max(1, int(len(kmers)/num_processes))
+    # Split kmers into batches
+    batches = [kmers[i:i + batch_size] for i in range(0, len(kmers), batch_size)]
+    callable_f = functools.partial(
+        _process_kmer_batch,
+        ambiguous_consensus, alignment
+    )
+    # Solve dimers in parallel
+    with multiprocessing.Pool(processes=num_processes) as pool:
+        results = pool.map(callable_f, batches)
+    # Aggregate results
+    left_primer_candidates = []
+    right_primer_candidates = []
+    for left_primers, right_primers in results:
+        left_primer_candidates.extend(left_primers)
+        right_primer_candidates.extend(right_primers)
     return left_primer_candidates, right_primer_candidates
@@ -351,7 +417,7 @@ def create_primer_dictionary(primer_candidates, direction):
     for primer in primer_candidates:
         if direction == "+":
             direction_name = "LEFT"
-        elif direction == "-":
+        else:
             direction_name = "RIGHT"
         primer_name = f"{direction_name}_{primer_idx}"
         primer_dict[primer_name] = primer
@@ -360,7 +426,7 @@ def create_primer_dictionary(primer_candidates, direction):
     return primer_dict
-def find_best_primers(left_primer_candidates, right_primer_candidates):
+def find_best_primers(left_primer_candidates, right_primer_candidates, high_conservation:bool=False):
     """
     Primer candidates are likely overlapping. Here, the list of primers
     is sorted for the lowest to highest penalty. Then, the next lowest
@@ -386,16 +452,20 @@ def find_best_primers(left_primer_candidates, right_primer_candidates):
         primer_candidates.sort(key=lambda x: (x[3], x[1]))
         # ini everything with the primer with the lowest penalty
         to_retain = [primer_candidates[0]]
-        primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]))
-        primer_set = set(primer_ranges)
+        primer_set = set(range(primer_candidates[0][1], primer_candidates[0][2]))
-        for primer in primer_candidates:
+        for primer in primer_candidates[1:]:
+            # for highly conserved alignments exclude everything that overlaps with the best primer
+            # this reduces graph complexity by quite a large margin
+            if high_conservation:
+                primer_positions =set(range(primer[1], primer[2]))
             # get the thirds of the primer, only consider the middle
-            thirds_len = int((primer[2] - primer[1])/3)
-            primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len))
+            else:
+                thirds_len = int((primer[2] - primer[1])/3)
+                primer_positions = set(range(primer[1] + thirds_len, primer[2] - thirds_len))
             # check if none of the nucleotides of the next primer
             # are already covered by a better primer
-            if not any(x in primer_positions for x in primer_set):
+            if primer_set.isdisjoint(primer_positions):
                 # update the primer set
                 primer_set.update(primer_positions)
                 # append this primer as it has a low penalty and is not overlapping
@@ -409,3 +479,77 @@ def find_best_primers(left_primer_candidates, right_primer_candidates):
     # and create a dict
     return all_primers
+def get_permutations(seq):
+    """
+    get all permutations of an ambiguous sequence.
+    """
+    splits = [config.AMBIG_NUCS.get(nuc, [nuc]) for nuc in seq]
+    return[''.join(p) for p in itertools.product(*splits)]
+def parse_primer_fasta(fasta_path):
+    """
+    Parse a primer FASTA file and return a list of sequences using BioPython.
+    """
+    sequences = []
+    for record in SeqIO.parse(fasta_path, "fasta"):
+        seq = str(record.seq).lower()
+        # Only include primers up to 40 nucleotides
+        if len(seq) <= 40:
+            sequences += get_permutations(seq)
+    return list(set(sequences))  # deduplication
+def check_primer_against_externals(external_sequences, primer):
+    """
+    Worker function to check a single primer against all external sequences.
+    Returns the primer if it passes, None otherwise.
+    Handles both list format and dict format (name, data) tuples.
+    """
+    # Extract sequence based on input format
+    if isinstance(primer, tuple):
+        name, data = primer
+        seq = data[0]
+    else:
+        seq = primer[0]
+    for ext_seq in external_sequences:
+        if is_dimer(seq, ext_seq):
+            return None
+    return primer
+def filter_non_dimer_candidates(primer_candidates, external_sequences, n_processes):
+    """
+    Filter out primer candidates that form dimers with external sequences.
+    Uses multiprocessing to speed up checks.
+    """
+    is_dict = isinstance(primer_candidates, dict)
+    callable_f = functools.partial(
+        check_primer_against_externals,
+        external_sequences
+    )
+    with multiprocessing.Pool(processes=n_processes) as pool:
+        # Prepare arguments based on input type
+        # qpcr probes are stored in dictionaries --> result in tuples when unpacked
+        if is_dict:
+            results = pool.map(callable_f, primer_candidates.items())
+        else:
+            results = pool.map(callable_f, primer_candidates)
+    # Filter and restore original format
+    if is_dict:
+        filtered_results = [result for result in results if result is not None]
+        return {name: data for name, data in filtered_results}
+    else:
+        return [primer for primer in results if primer is not None]

varvamp/scripts/qpcr.py CHANGED Viewed

@@ -7,11 +7,11 @@ import re
 import seqfold
 import itertools
 import multiprocessing
+import functools
 # varVAMP
 from varvamp.scripts import config
 from varvamp.scripts import primers
-from varvamp.scripts import reporting
 def choose_probe_direction(seq):
@@ -51,35 +51,25 @@ def filter_probe_direction_dependent(seq):
     )
-def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned):
+def _process_kmer_batch_probes(ambiguous_consensus, alignment_cleaned, kmers):
     """
-    find potential qPCR probes
+    Helper function for multiprocessing: process a batch of kmers for probes.
+    Returns probe_candidates dictionary.
     """
     probe_candidates = {}
     probe_idx = 0
     for kmer in kmers:
-        # filter probe for base params
-        if not primers.filter_kmer_direction_independent(kmer[0], config.QPROBE_TMP, config.QPROBE_GC_RANGE,
-                                                         config.QPROBE_SIZES):
+        if not primers.filter_kmer_direction_independent(kmer[0], config.QPROBE_TMP, config.QPROBE_GC_RANGE, config.QPROBE_SIZES):
             continue
-        # do not allow ambiguous chars at both ends
         if ambiguous_ends(ambiguous_consensus[kmer[1]:kmer[2]]):
             continue
-        # calc penalties analogous to primer search
-        base_penalty = primers.calc_base_penalty(kmer[0], config.QPROBE_TMP, config.QPROBE_GC_RANGE,
-                                                 config.QPROBE_SIZES)
-        per_base_mismatches = primers.calc_per_base_mismatches(
-            kmer,
-            alignment_cleaned,
-            ambiguous_consensus
-        )
-        permutation_penalty = primers.calc_permutation_penalty(
-            ambiguous_consensus[kmer[1]:kmer[2]]
-        )
-        # determine the direction with more cytosine or set both if 50 %
+        base_penalty = primers.calc_base_penalty(kmer[0], config.QPROBE_TMP, config.QPROBE_GC_RANGE, config.QPROBE_SIZES)
+        per_base_mismatches = primers.calc_per_base_mismatches(kmer, alignment_cleaned, ambiguous_consensus)
+        permutation_penalty = primers.calc_permutation_penalty(ambiguous_consensus[kmer[1]:kmer[2]])
         direction = choose_probe_direction(kmer[0])
-        # create probe dictionary
         if "+" in direction:
             if filter_probe_direction_dependent(kmer[0]):
                 probe_name = f"PROBE_{probe_idx}_LEFT"
@@ -96,7 +86,44 @@ def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned):
                                                 base_penalty + permutation_penalty + three_prime_penalty,
                                                 per_base_mismatches, direction]
                 probe_idx += 1
-    # sort by penalty
+    return probe_candidates
+def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned, num_processes):
+    """
+    Find potential qPCR probes using multiprocessing.
+    """
+    # Convert kmers set to list for batching
+    kmers = list(kmers)
+    # Split kmers into batches
+    batch_size = max(1, int(len(kmers) / num_processes))
+    batches = [kmers[i:i + batch_size] for i in range(0, len(kmers), batch_size)]
+    # Prepare arguments for each dimer
+    callable_f = functools.partial(
+        _process_kmer_batch_probes,
+        ambiguous_consensus, alignment_cleaned
+    )
+    with multiprocessing.Pool(processes=num_processes) as pool:
+        results = pool.map(callable_f, batches)
+    # Aggregate results and re-index probe names
+    probe_candidates = {}
+    probe_idx = 0
+    for batch_probes in results:
+        if batch_probes is None:
+            continue
+        for probe_name, probe_data in batch_probes.items():
+            # Extract direction from original probe name
+            direction = "LEFT" if "LEFT" in probe_name else "RIGHT"
+            new_probe_name = f"PROBE_{probe_idx}_{direction}"
+            probe_candidates[new_probe_name] = probe_data
+            probe_idx += 1
+    # Sort by penalty
     probe_candidates = dict(sorted(probe_candidates.items(), key=lambda x: x[1][3]))
     return probe_candidates
@@ -139,54 +166,30 @@ def hardfilter_amplicon(majority_consensus, left_primer, right_primer):
     )
-def check_end_overlap(dimer_result):
-    """
-    checks if two oligos overlap at their ends (pretty rare)
-    Example:
-        xxxxxxxxtagc-------
-        --------atcgxxxxxxx
-    """
-    if dimer_result.structure_found:
-        # clean structure
-        structure = [x[4:] for x in dimer_result.ascii_structure_lines]
-        # calc overlap and the cumulative len of the oligos
-        overlap = len(structure[1].replace(" ", ""))
-        nt_count = len(re.findall("[ATCG]", "".join(structure)))
-        # check for overlaps at the ends and the min overlap (allows for some amount of mismatches)
-        if overlap > config.END_OVERLAP and nt_count <= len(structure[0]) + overlap + 1 and "  " not in structure[1].lstrip(" "):
-            return True
-    return False
-def forms_dimer_or_overhangs(right_primer, left_primer, probe, ambiguous_consensus):
+def dimer_in_combinations(right_primer, left_primer, probe, ambiguous_consensus):
     """
-    checks if combinations of primers/probe form dimers or overhangs
+    checks if primers cause dimers and if combinations of primers/probe including all permutations form dimers
     """
     forms_structure = False
     # first check if there are dimers between the two flanking primers
-    if primers.calc_dimer(left_primer[0], right_primer[0]).tm > config.PRIMER_MAX_DIMER_TMP:
+    if primers.is_dimer(left_primer[0], right_primer[0]):
         return True
     # for the probe check all permutations and possible overhangs to ensure
     # that none of the primers could cause unspecific probe binding.
     # first get all permutations
-    probe_per = reporting.get_permutations(ambiguous_consensus[probe[1]:probe[2]])
-    left_per = reporting.get_permutations(ambiguous_consensus[left_primer[1]:left_primer[2]])
-    right_per = reporting.get_permutations(ambiguous_consensus[right_primer[1]:right_primer[2]])
+    probe_per = primers.get_permutations(ambiguous_consensus[probe[1]:probe[2]])
+    left_per = primers.get_permutations(ambiguous_consensus[left_primer[1]:left_primer[2]])
+    right_per = primers.get_permutations(ambiguous_consensus[right_primer[1]:right_primer[2]])
     # then check all permutations
     for combination in [(probe_per, left_per), (probe_per, right_per)]:
-        for oligo1 in combination[0]:
-            for oligo2 in combination[1]:
-                dimer_result = primers.calc_dimer(oligo1, oligo2, structure=True)
-                if dimer_result.tm >= config.PRIMER_MAX_DIMER_TMP or check_end_overlap(dimer_result):
-                    forms_structure = True
-                    break
-            # break all loops because we found an unwanted structure in one of the permutations
-            # (either dimer formation or a too long overlap at the ends of the primer)
-            if forms_structure:
+        for oligo1, oligo2 in itertools.product(*combination):
+            if primers.is_dimer(oligo1, oligo2):
+                forms_structure = True
                 break
+        # break also outer loop because we found an unwanted structure in one of the permutations
+        # (either dimer formation or a too long overlap at the ends of the primer)
         if forms_structure:
             break
@@ -231,7 +234,7 @@ def assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_con
                     [config.QPROBE_TEMP_DIFF[0] <= probe_temp - x <= config.QPROBE_TEMP_DIFF[1] for x in primer_temps]):
                 continue
             # .... all combination of oligos do not form dimers or overhangs.
-            if forms_dimer_or_overhangs(right_primer, left_primer, qpcr_probes[probe], ambiguous_consensus):
+            if dimer_in_combinations(right_primer, left_primer, qpcr_probes[probe], ambiguous_consensus):
                 continue
             # append to list and break as this is the primer combi
             # with the lowest penalty (primers are sorted by penalty)
@@ -245,54 +248,74 @@ def assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_con
     return primer_combinations
-def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidates, majority_consensus,
-                     ambiguous_consensus):
+def find_single_qpcr_scheme(left_primer_candidates, right_primer_candidates, qpcr_probes,
+                            majority_consensus, ambiguous_consensus, probe):
     """
-    this finds the final qPCR schemes. it slices for primers flanking a probe and
-    test all left/right combinations whether they are potential amplicons. as primers
-    are sorted by penalty, only the very first match is considered as this has the
-    lowest penalty. however, probes are overlapping and there is a high chance that
-    left and right primers are found multiple times. to consider only one primer-probe
-    combination the probes are also sorted by penalty. therefore, if a primer
-    combination has been found already the optimal probe was already selected and
-    there is no need to consider this primer probe combination.
+    Find a qPCR scheme for a single probe.
     """
+    probe_name, probe_data = probe
+    # Generate flanking subsets within the worker process
+    left_subset = flanking_primer_subset(left_primer_candidates, "+", probe_data)
+    right_subset = flanking_primer_subset(right_primer_candidates, "-", probe_data)
+    if not left_subset or not right_subset:
+        return probe_name, None
+    primer_combination = assess_amplicons(
+        left_subset, right_subset, qpcr_probes, probe_name,
+        majority_consensus, ambiguous_consensus
+    )
+    return probe_name, primer_combination
+def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidates,
+                     majority_consensus, ambiguous_consensus, num_processes):
+    """
+    Find final qPCR schemes using multiprocessing to evaluate probes in parallel.
+    Probes are sorted by penalty, ensuring optimal probe selection.
+    """
     qpcr_scheme_candidates = []
     found_amplicons = []
     amplicon_nr = -1
-    for probe in qpcr_probes:
-        left_subset = flanking_primer_subset(left_primer_candidates, "+", qpcr_probes[probe])
-        right_subset = flanking_primer_subset(right_primer_candidates, "-", qpcr_probes[probe])
-        # consider if there are primers flanking the probe ...
-        if not left_subset or not right_subset:
-            continue
-        primer_combination = assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_consensus,
-                                              ambiguous_consensus)
-        # ... a combi has been found, ...
+    # Prepare arguments for parallel processing - pass full primer lists
+    batch_size = max(1, int(len(qpcr_probes) / num_processes))
+    callable_f = functools.partial(
+        find_single_qpcr_scheme,
+        left_primer_candidates, right_primer_candidates, qpcr_probes, majority_consensus, ambiguous_consensus
+    )
+    # Process probes in parallel
+    with multiprocessing.Pool(processes=num_processes) as pool:
+        results = pool.map(callable_f, qpcr_probes.items(), chunksize=batch_size)
+    # Aggregate results in original probe order (sorted by penalty)
+    for probe_name, primer_combination in results:
         if not primer_combination:
             continue
-        # ...and this combi is not already present for a probe with a better penalty.
         if primer_combination in found_amplicons:
             continue
-        # populate the primer dictionary:
         amplicon_nr += 1
         found_amplicons.append(primer_combination)
-        qpcr_scheme_candidates.append(
-            {
-                "id": f"AMPLICON_{amplicon_nr}",
-                "penalty": qpcr_probes[probe][3] + primer_combination[0][3] + primer_combination[1][3],
-                "PROBE": qpcr_probes[probe],
-                "LEFT": primer_combination[0],
-                "RIGHT": primer_combination[1]
-            }
-        )
-    # and again sort by total penalty (left + right + probe)
+        qpcr_scheme_candidates.append({
+            "id": f"AMPLICON_{amplicon_nr}",
+            "penalty": qpcr_probes[probe_name][3] + primer_combination[0][3] + primer_combination[1][3],
+            "PROBE": qpcr_probes[probe_name],
+            "LEFT": primer_combination[0],
+            "RIGHT": primer_combination[1]
+        })
+    # Sort by total penalty
+    qpcr_scheme_candidates.sort(key=lambda x: x["penalty"])
     return qpcr_scheme_candidates
-def process_single_amplicon_deltaG(amplicon, majority_consensus):
+def process_single_amplicon_deltaG(majority_consensus, amplicon):
     """
     Process a single amplicon to test its deltaG and apply filtering.
     This function will be called concurrently by multiple threads.
@@ -310,7 +333,7 @@ def process_single_amplicon_deltaG(amplicon, majority_consensus):
     return amplicon
-def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_threads):
+def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_processes):
     """
     Test all amplicon deltaGs for the top n hits at the lowest primer temperature
     and filters if they fall below the cutoff. Multiple processes are used
@@ -318,32 +341,33 @@ def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n
     """
     final_amplicons = []
-    # Create a pool of processes to handle the concurrent processing
-    with multiprocessing.Pool(processes=n_threads) as pool:
-        # Create a list of the first n amplicon tuples for processing
-        # The list is sorted first on whether offset targets were predicted for the amplicon,
-        # then by penalty. This ensures that amplicons with offset targets are always considered last
-        amplicons = itertools.islice(
-            sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])),
-            n_to_test
-        )
-        # process amplicons concurrently
-        results = pool.starmap(process_single_amplicon_deltaG, [(amp, majority_consensus) for amp in amplicons])
-        # Process the results
-        retained_ranges = []
-        for amp in results:
-            # check if the amplicon overlaps with an amplicon that was previously
-            # found and had a high enough deltaG
-            if amp["deltaG"] <= deltaG_cutoff:
-                continue
-            amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
-            overlaps_retained = False
-            for r in retained_ranges:
-                if amp_range.start < r.stop and r.start < amp_range.stop:
-                    overlaps_retained = True
-                    break
-            if not overlaps_retained:
-                final_amplicons.append(amp)
-                retained_ranges.append(amp_range)
+    # Create a list of the first n amplicon tuples for processing
+    # The list is sorted first on whether offset targets were predicted for the amplicon,
+    # then by penalty. This ensures that amplicons with offset targets are always considered last
+    amplicons = list(sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])))[:n_to_test]
+    # process amplicons concurrently
+    batch_size = max(1, int(n_to_test / n_processes))
+    callable_f = functools.partial(
+        process_single_amplicon_deltaG,
+        majority_consensus
+    )
+    with multiprocessing.Pool(processes=n_processes) as pool:
+        results = pool.map(callable_f, amplicons, chunksize=batch_size)
+    # Process the results
+    retained_ranges = []
+    for amp in results:
+        # check if the amplicon overlaps with an amplicon that was previously
+        # found and had a high enough deltaG
+        if amp["deltaG"] <= deltaG_cutoff:
+            continue
+        amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
+        overlaps_retained = False
+        for r in retained_ranges:
+            if amp_range.start < r.stop and r.start < amp_range.stop:
+                overlaps_retained = True
+                break
+        if not overlaps_retained:
+            final_amplicons.append(amp)
+            retained_ranges.append(amp_range)
     return final_amplicons

varvamp 1.2.1__py3-none-any.whl → 1.3__py3-none-any.whl

varvamp 1.2.1py3-none-any.whl → 1.3py3-none-any.whl