varvamp 1.2.1__py3-none-any.whl → 1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- varvamp/__init__.py +6 -3
- varvamp/command.py +134 -60
- varvamp/scripts/alignment.py +54 -164
- varvamp/scripts/default_config.py +5 -3
- varvamp/scripts/logging.py +67 -21
- varvamp/scripts/param_estimation.py +84 -62
- varvamp/scripts/primers.py +190 -46
- varvamp/scripts/qpcr.py +141 -117
- varvamp/scripts/reporting.py +45 -40
- varvamp/scripts/scheme.py +101 -52
- varvamp-1.3.dist-info/METADATA +760 -0
- varvamp-1.3.dist-info/RECORD +22 -0
- {varvamp-1.2.1.dist-info → varvamp-1.3.dist-info}/WHEEL +1 -1
- varvamp-1.3.dist-info/licenses/LICENSE +674 -0
- varvamp-1.2.1.dist-info/METADATA +0 -78
- varvamp-1.2.1.dist-info/RECORD +0 -21
- {varvamp-1.2.1.dist-info → varvamp-1.3.dist-info}/entry_points.txt +0 -0
- {varvamp-1.2.1.dist-info → varvamp-1.3.dist-info}/top_level.txt +0 -0
varvamp/scripts/reporting.py
CHANGED
|
@@ -4,7 +4,6 @@ data writing and visualization.
|
|
|
4
4
|
# BUILT-INS
|
|
5
5
|
import os
|
|
6
6
|
import math
|
|
7
|
-
import itertools
|
|
8
7
|
|
|
9
8
|
# LIBS
|
|
10
9
|
import pandas as pd
|
|
@@ -53,7 +52,7 @@ def write_regions_to_bed(primer_regions, scheme_name, path, mode=None):
|
|
|
53
52
|
with open(outfile, 'w') as o:
|
|
54
53
|
for counter, region in enumerate(primer_regions):
|
|
55
54
|
print(
|
|
56
|
-
f"{scheme_name}
|
|
55
|
+
f"{scheme_name}_ambiguous_consensus",
|
|
57
56
|
region[0],
|
|
58
57
|
region[1],
|
|
59
58
|
"REGION_"+str(counter),
|
|
@@ -68,9 +67,7 @@ def write_primers_to_bed(outfile, scheme_name, primer_name, primer_properties, n
|
|
|
68
67
|
"""
|
|
69
68
|
with open(outfile, 'a') as o:
|
|
70
69
|
# write header for primer bed
|
|
71
|
-
|
|
72
|
-
print("#chrom\tchromStart\tchromEnd\tprimer-name\tpool\tstrand\tprimer-sequence", file=o)
|
|
73
|
-
data = [f"{scheme_name}_consensus",
|
|
70
|
+
data = [f"{scheme_name}_ambiguous_consensus",
|
|
74
71
|
primer_properties[1], # start
|
|
75
72
|
primer_properties[2], # stop
|
|
76
73
|
primer_name,
|
|
@@ -96,20 +93,6 @@ def write_all_primers(path, scheme_name, all_primers):
|
|
|
96
93
|
write_primers_to_bed(outfile, scheme_name, primer, all_primers[direction][primer], round(all_primers[direction][primer][3], 2), direction)
|
|
97
94
|
|
|
98
95
|
|
|
99
|
-
def get_permutations(seq):
|
|
100
|
-
"""
|
|
101
|
-
get all permutations of an ambiguous sequence. needed to
|
|
102
|
-
correctly report the gc and the temperature.
|
|
103
|
-
"""
|
|
104
|
-
groups = itertools.groupby(seq, lambda char: char not in config.AMBIG_NUCS)
|
|
105
|
-
splits = []
|
|
106
|
-
for b, group in groups:
|
|
107
|
-
if b:
|
|
108
|
-
splits.extend([[g] for g in group])
|
|
109
|
-
else:
|
|
110
|
-
for nuc in group:
|
|
111
|
-
splits.append(config.AMBIG_NUCS[nuc])
|
|
112
|
-
return[''.join(p) for p in itertools.product(*splits)]
|
|
113
96
|
|
|
114
97
|
|
|
115
98
|
def calc_mean_stats(permutations):
|
|
@@ -150,7 +133,7 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, scheme_name, l
|
|
|
150
133
|
amp_name = f"{scheme_name}_{n}"
|
|
151
134
|
# write bed amplicon file
|
|
152
135
|
print(
|
|
153
|
-
f"{scheme_name}
|
|
136
|
+
f"{scheme_name}_ambiguous_consensus",
|
|
154
137
|
amp["LEFT"][1],
|
|
155
138
|
amp["RIGHT"][2],
|
|
156
139
|
amp_name,
|
|
@@ -192,7 +175,7 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, scheme_name, l
|
|
|
192
175
|
else:
|
|
193
176
|
direction = "+"
|
|
194
177
|
|
|
195
|
-
permutations = get_permutations(seq)
|
|
178
|
+
permutations = primers.get_permutations(seq)
|
|
196
179
|
gc, temp = calc_mean_stats(permutations)
|
|
197
180
|
primer_name = f"{amp_name}_{oligo_type}"
|
|
198
181
|
|
|
@@ -226,7 +209,7 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, scheme_name, l
|
|
|
226
209
|
print(f">{primer_name}\n{seq.upper()}", file=fasta)
|
|
227
210
|
|
|
228
211
|
|
|
229
|
-
def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_name, mode, log_file):
|
|
212
|
+
def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_name, mode, log_file, primer_dimers=None):
|
|
230
213
|
"""
|
|
231
214
|
write all relevant bed files and a tsv file with all primer stats
|
|
232
215
|
"""
|
|
@@ -235,6 +218,9 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
235
218
|
amplicon_bed_file = os.path.join(path, "amplicons.bed")
|
|
236
219
|
tabular_file = os.path.join(path, "primer_to_amplicon_assignment.tabular")
|
|
237
220
|
|
|
221
|
+
# Map old primer names to new amplicon-based names
|
|
222
|
+
name_mapping = {}
|
|
223
|
+
|
|
238
224
|
# open files to write
|
|
239
225
|
with open(tsv_file, "w") as tsv, open(amplicon_bed_file, "w") as bed, open(tabular_file, "w") as tabular:
|
|
240
226
|
# write header for primer tsv
|
|
@@ -250,11 +236,11 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
250
236
|
if mode == "single":
|
|
251
237
|
primer_fasta_file = os.path.join(path, "primers.fasta")
|
|
252
238
|
else:
|
|
253
|
-
primer_fasta_file = os.path.join(path, f"primers_pool_{pool+1}.fasta")
|
|
239
|
+
primer_fasta_file = os.path.join(path, f"primers_pool_{pool + 1}.fasta")
|
|
254
240
|
with open(primer_fasta_file, "w") as primer_fasta:
|
|
255
241
|
for counter, amp in enumerate(amplicon_scheme[pool::len(pools)]):
|
|
256
242
|
# give a new amplicon name
|
|
257
|
-
amplicon_index = counter*len(pools) + pool
|
|
243
|
+
amplicon_index = counter * len(pools) + pool
|
|
258
244
|
amp_name = f"{scheme_name}_{amplicon_index}"
|
|
259
245
|
# get left and right primers and their names
|
|
260
246
|
amp_length = amp["RIGHT"][2] - amp["LEFT"][1]
|
|
@@ -268,7 +254,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
268
254
|
amplicon_has_off_target = "n.d."
|
|
269
255
|
# write amplicon bed
|
|
270
256
|
if mode == "tiled":
|
|
271
|
-
bed_score = pool+1
|
|
257
|
+
bed_score = pool + 1
|
|
272
258
|
elif mode == "single":
|
|
273
259
|
bed_score = round(amp["LEFT"][3] + amp["RIGHT"][3], 1)
|
|
274
260
|
amplicon_bed_records.append(
|
|
@@ -286,6 +272,10 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
286
272
|
(f"{amp_name}_LEFT", f"{amp_name}_RIGHT")
|
|
287
273
|
)
|
|
288
274
|
)
|
|
275
|
+
# Build name mapping for dimers
|
|
276
|
+
name_mapping[amp["LEFT"][-1]] = f"{amp_name}_LEFT"
|
|
277
|
+
name_mapping[amp["RIGHT"][-1]] = f"{amp_name}_RIGHT"
|
|
278
|
+
|
|
289
279
|
# write primer tsv and primer bed
|
|
290
280
|
for direction, primer in [("+", amp["LEFT"]), ("-", amp["RIGHT"])]:
|
|
291
281
|
seq = ambiguous_consensus[primer[1]:primer[2]]
|
|
@@ -297,7 +287,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
297
287
|
# write primers to fasta pool file
|
|
298
288
|
print(f">{primer_name}\n{seq.upper()}", file=primer_fasta)
|
|
299
289
|
# calc primer parameters for all permutations
|
|
300
|
-
permutations = get_permutations(seq)
|
|
290
|
+
permutations = primers.get_permutations(seq)
|
|
301
291
|
gc, temp = calc_mean_stats(permutations)
|
|
302
292
|
# write tsv file
|
|
303
293
|
print(
|
|
@@ -305,7 +295,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
305
295
|
amp_length,
|
|
306
296
|
primer_name,
|
|
307
297
|
primer[-1],
|
|
308
|
-
pool+1,
|
|
298
|
+
pool + 1,
|
|
309
299
|
primer[1] + 1,
|
|
310
300
|
primer[2],
|
|
311
301
|
seq.upper(),
|
|
@@ -323,13 +313,13 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
323
313
|
(
|
|
324
314
|
# will need amplicon_index for sorting
|
|
325
315
|
amplicon_index,
|
|
326
|
-
(primer_name, primer, pool+1, direction, seq.upper())
|
|
316
|
+
(primer_name, primer, pool + 1, direction, seq.upper())
|
|
327
317
|
)
|
|
328
318
|
)
|
|
329
319
|
# write amplicon bed with amplicons sorted by start position
|
|
330
320
|
for record in sorted(amplicon_bed_records, key=lambda x: x[0]):
|
|
331
321
|
print(
|
|
332
|
-
f"{scheme_name}
|
|
322
|
+
f"{scheme_name}_ambiguous_consensus",
|
|
333
323
|
*record,
|
|
334
324
|
".",
|
|
335
325
|
sep="\t",
|
|
@@ -350,26 +340,41 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
|
|
|
350
340
|
*record[1]
|
|
351
341
|
)
|
|
352
342
|
|
|
343
|
+
# Write dimers with renamed primers
|
|
344
|
+
if primer_dimers:
|
|
345
|
+
write_dimers(path, primer_dimers, name_mapping)
|
|
353
346
|
|
|
354
|
-
|
|
347
|
+
|
|
348
|
+
def write_dimers(path, primer_dimers, name_mapping):
|
|
355
349
|
"""
|
|
356
350
|
write dimers for which no replacement was found to file
|
|
357
351
|
"""
|
|
358
|
-
|
|
359
|
-
with open(
|
|
360
|
-
print(
|
|
361
|
-
"pool\tprimer_name_1\tprimer_name_2\tdimer melting temp",
|
|
362
|
-
file=tsv
|
|
363
|
-
)
|
|
352
|
+
file = os.path.join(path, "unsolvable_primer_dimers.txt")
|
|
353
|
+
with open(file, "w") as f:
|
|
364
354
|
for pool, primer1, primer2 in primer_dimers:
|
|
355
|
+
dimer_result = primers.calc_dimer(primer1[2][0], primer2[2][0], structure=True)
|
|
356
|
+
print(
|
|
357
|
+
"pool\tprimer 1\tprimer 2\tdimer melting temp\tdeltaG",
|
|
358
|
+
file=f
|
|
359
|
+
)
|
|
365
360
|
print(
|
|
366
361
|
pool+1,
|
|
367
|
-
primer1[1],
|
|
368
|
-
primer2[1],
|
|
369
|
-
round(
|
|
362
|
+
name_mapping[primer1[1]],
|
|
363
|
+
name_mapping[primer2[1]],
|
|
364
|
+
round(dimer_result.tm, 1),
|
|
365
|
+
dimer_result.dg,
|
|
370
366
|
sep="\t",
|
|
371
|
-
file=
|
|
367
|
+
file=f
|
|
372
368
|
)
|
|
369
|
+
structure = [x[4:] for x in dimer_result.ascii_structure_lines]
|
|
370
|
+
print("\nDimer structure:", file=f)
|
|
371
|
+
for line in structure:
|
|
372
|
+
print(
|
|
373
|
+
line,
|
|
374
|
+
file=f
|
|
375
|
+
)
|
|
376
|
+
print(file=f)
|
|
377
|
+
|
|
373
378
|
|
|
374
379
|
def entropy(chars, states):
|
|
375
380
|
"""
|
varvamp/scripts/scheme.py
CHANGED
|
@@ -5,6 +5,8 @@ amplicon search
|
|
|
5
5
|
# BUILT-INS
|
|
6
6
|
import heapq
|
|
7
7
|
import math
|
|
8
|
+
import multiprocessing
|
|
9
|
+
import functools
|
|
8
10
|
|
|
9
11
|
# varVAMP
|
|
10
12
|
from varvamp.scripts import config, primers
|
|
@@ -73,7 +75,7 @@ def find_amplicons(all_primers, opt_len, max_len):
|
|
|
73
75
|
amplicon_length = right_primer[2] - left_primer[1]
|
|
74
76
|
if not opt_len <= amplicon_length <= max_len:
|
|
75
77
|
continue
|
|
76
|
-
if primers.
|
|
78
|
+
if primers.is_dimer(left_primer[0], right_primer[0]):
|
|
77
79
|
continue
|
|
78
80
|
# calculate length dependend amplicon costs as the cumulative primer
|
|
79
81
|
# penalty multiplied by the e^(fold length of the optimal length).
|
|
@@ -92,6 +94,26 @@ def find_amplicons(all_primers, opt_len, max_len):
|
|
|
92
94
|
return amplicons
|
|
93
95
|
|
|
94
96
|
|
|
97
|
+
def has_qualifying_overlap(current_amplicon, next_amplicon, min_overlap):
|
|
98
|
+
"""
|
|
99
|
+
check if two amplicons overlap sufficiently to connect them in the graph
|
|
100
|
+
"""
|
|
101
|
+
# connect amplicons if they sufficiently overlap because:
|
|
102
|
+
# ... the start of next amplicon lies in the second half of the prior amplicon
|
|
103
|
+
if next_amplicon["LEFT"][1] < current_amplicon["LEFT"][1] + current_amplicon["length"] / 2:
|
|
104
|
+
return False
|
|
105
|
+
# ... the stop of the left primer of the next amplicon does not lie in the minimum amplicon insert
|
|
106
|
+
if next_amplicon["LEFT"][2] > current_amplicon["RIGHT"][1] - min_overlap:
|
|
107
|
+
return False
|
|
108
|
+
# ... half of the next amplicon does not overlap with the previous amplicon --> enough space for a
|
|
109
|
+
# further amplicon that lies in the second half next amplicon and cannot overlap with a primer of the
|
|
110
|
+
# current amplicon
|
|
111
|
+
if next_amplicon["RIGHT"][2] <= current_amplicon["RIGHT"][2] + next_amplicon["length"] / 2:
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
return True
|
|
115
|
+
|
|
116
|
+
|
|
95
117
|
def create_amplicon_graph(amplicons, min_overlap):
|
|
96
118
|
"""
|
|
97
119
|
creates the amplicon graph.
|
|
@@ -100,34 +122,26 @@ def create_amplicon_graph(amplicons, min_overlap):
|
|
|
100
122
|
amplicon_graph = {}
|
|
101
123
|
nodes = []
|
|
102
124
|
|
|
103
|
-
# add the maximum len of a primer to ensure that possible amplicon starts
|
|
104
|
-
# before the min overlap
|
|
105
|
-
min_overlap = min_overlap + config.PRIMER_SIZES[2]
|
|
106
|
-
|
|
107
125
|
for current_amplicon in amplicons:
|
|
108
126
|
# remember all vertices
|
|
109
127
|
amplicon_id = current_amplicon["id"]
|
|
110
128
|
nodes.append(amplicon_id)
|
|
111
|
-
start = current_amplicon["LEFT"][1] + current_amplicon["length"]/2
|
|
112
|
-
stop = current_amplicon["RIGHT"][2] - min_overlap
|
|
113
129
|
for next_amplicon in amplicons:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
#
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
amplicon_graph[amplicon_id] = {
|
|
121
|
-
next_amplicon["id"]: (
|
|
122
|
-
next_amplicon.get("off_targets", False),
|
|
123
|
-
next_amplicon["penalty"]
|
|
124
|
-
)
|
|
125
|
-
}
|
|
126
|
-
else:
|
|
127
|
-
amplicon_graph[amplicon_id][next_amplicon["id"]] = (
|
|
130
|
+
if not has_qualifying_overlap(current_amplicon, next_amplicon, min_overlap):
|
|
131
|
+
continue
|
|
132
|
+
# --> write to graph
|
|
133
|
+
if amplicon_id not in amplicon_graph:
|
|
134
|
+
amplicon_graph[amplicon_id] = {
|
|
135
|
+
next_amplicon["id"]: (
|
|
128
136
|
next_amplicon.get("off_targets", False),
|
|
129
137
|
next_amplicon["penalty"]
|
|
130
138
|
)
|
|
139
|
+
}
|
|
140
|
+
else:
|
|
141
|
+
amplicon_graph[amplicon_id][next_amplicon["id"]] = (
|
|
142
|
+
next_amplicon.get("off_targets", False),
|
|
143
|
+
next_amplicon["penalty"]
|
|
144
|
+
)
|
|
131
145
|
|
|
132
146
|
# return a graph object
|
|
133
147
|
return Graph(nodes, amplicon_graph)
|
|
@@ -274,6 +288,7 @@ def find_best_covering_scheme(amplicons, amplicon_graph):
|
|
|
274
288
|
# if no previous nodes are found but the single amplicon results in the largest
|
|
275
289
|
# coverage - return as the best scheme
|
|
276
290
|
amplicon_path = [best_start_node]
|
|
291
|
+
|
|
277
292
|
return best_coverage, create_scheme(amplicon_path, amps_by_id)
|
|
278
293
|
|
|
279
294
|
|
|
@@ -283,8 +298,15 @@ def test_scheme_for_dimers(amplicon_scheme):
|
|
|
283
298
|
"""
|
|
284
299
|
|
|
285
300
|
primer_dimers = []
|
|
286
|
-
|
|
287
|
-
|
|
301
|
+
non_dimers = {amp["pool"]:set() for amp in amplicon_scheme}
|
|
302
|
+
# write all primer sequences in the respective pools -->
|
|
303
|
+
# these primers should not be violated by primer switching
|
|
304
|
+
# and primers are only switched later if no primer dimers
|
|
305
|
+
# with the existing 'good' scheme are created
|
|
306
|
+
for amp in amplicon_scheme:
|
|
307
|
+
non_dimers[amp["pool"]].add(amp["LEFT"][0])
|
|
308
|
+
non_dimers[amp["pool"]].add(amp["RIGHT"][0])
|
|
309
|
+
for pool in non_dimers:
|
|
288
310
|
# test the primer dimers only within the respective pools
|
|
289
311
|
tested_primers = []
|
|
290
312
|
for amp_index, amp in enumerate(amplicon_scheme):
|
|
@@ -297,13 +319,16 @@ def test_scheme_for_dimers(amplicon_scheme):
|
|
|
297
319
|
current_seq = current_primer[2][0]
|
|
298
320
|
for tested in tested_primers:
|
|
299
321
|
tested_seq = tested[2][0]
|
|
300
|
-
if primers.
|
|
322
|
+
if not primers.is_dimer(current_seq, tested_seq):
|
|
301
323
|
continue
|
|
302
324
|
primer_dimers.append((current_primer, tested))
|
|
325
|
+
non_dimers[pool].discard(current_seq)
|
|
326
|
+
non_dimers[pool].discard(tested_seq)
|
|
303
327
|
# and remember all tested primers
|
|
304
328
|
tested_primers.append(current_primer)
|
|
305
329
|
|
|
306
|
-
|
|
330
|
+
# report both dimers and non-dimers
|
|
331
|
+
return primer_dimers, non_dimers
|
|
307
332
|
|
|
308
333
|
|
|
309
334
|
def get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates):
|
|
@@ -317,13 +342,16 @@ def get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidat
|
|
|
317
342
|
# test each primer in dimer
|
|
318
343
|
for amp_index, primer_name, primer in dimer:
|
|
319
344
|
overlapping_primers_temp = []
|
|
320
|
-
|
|
321
|
-
#
|
|
322
|
-
|
|
323
|
-
#
|
|
345
|
+
# as switching could violate overlap criteria,
|
|
346
|
+
# only consider primers that overlap in the left half (LEFT primers)
|
|
347
|
+
# or right half (RIGHT primers) respectively, however this can result in slightly
|
|
348
|
+
# longer amplicons than allowed.
|
|
349
|
+
half_length = int((primer[2] - primer[1]) / 2)
|
|
324
350
|
if "RIGHT" in primer_name:
|
|
351
|
+
overlap_set = set(range(primer[1] + half_length, primer[2]))
|
|
325
352
|
primers_to_test = right_primer_candidates
|
|
326
353
|
else:
|
|
354
|
+
overlap_set = set(range(primer[1], primer[1] + half_length))
|
|
327
355
|
primers_to_test = left_primer_candidates
|
|
328
356
|
# and check this list for all primers that overlap
|
|
329
357
|
for potential_new in primers_to_test:
|
|
@@ -337,40 +365,60 @@ def get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidat
|
|
|
337
365
|
return overlapping_primers
|
|
338
366
|
|
|
339
367
|
|
|
340
|
-
def test_overlaps_for_dimers(overlapping_primers):
|
|
368
|
+
def test_overlaps_for_dimers(overlapping_primers, non_dimers):
|
|
341
369
|
"""
|
|
342
|
-
test
|
|
370
|
+
test all possible overlapping primers against each other for dimers
|
|
371
|
+
and return the first pair that doesn't form a dimer with each other
|
|
372
|
+
and with all non-dimer forming primers in the pool.
|
|
343
373
|
"""
|
|
344
374
|
for first_overlap in overlapping_primers[0]:
|
|
375
|
+
if any(primers.is_dimer(seq, first_overlap[2][0]) for seq in non_dimers):
|
|
376
|
+
continue
|
|
345
377
|
for second_overlap in overlapping_primers[1]:
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
if primers.
|
|
378
|
+
if any(primers.is_dimer(seq, second_overlap[2][0]) for seq in non_dimers):
|
|
379
|
+
continue
|
|
380
|
+
if not primers.is_dimer(first_overlap[2][0], second_overlap[2][0]):
|
|
349
381
|
return [first_overlap, second_overlap]
|
|
350
382
|
|
|
351
383
|
|
|
352
|
-
def
|
|
384
|
+
def _solve_single_dimer(amplicon_scheme, left_primer_candidates, right_primer_candidates, non_dimers_all_pools, dimer):
|
|
385
|
+
"""
|
|
386
|
+
Helper function for multiprocessing: solve a single dimer independently.
|
|
387
|
+
Returns (amp_index, primer_name, new_primer) tuples or empty list if no solution.
|
|
388
|
+
"""
|
|
389
|
+
pool = amplicon_scheme[dimer[0][0]]["pool"]
|
|
390
|
+
non_dimers = non_dimers_all_pools[pool]
|
|
391
|
+
|
|
392
|
+
overlapping_primers = get_overlapping_primers(dimer, left_primer_candidates, right_primer_candidates)
|
|
393
|
+
new_primers = test_overlaps_for_dimers(overlapping_primers, non_dimers)
|
|
394
|
+
|
|
395
|
+
return new_primers if new_primers else []
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def check_and_solve_heterodimers(amplicon_scheme, left_primer_candidates, right_primer_candidates, all_primers, num_processes):
|
|
353
399
|
"""
|
|
354
400
|
check scheme for heterodimers, try to find
|
|
355
401
|
new primers that overlap and replace the existing ones.
|
|
356
|
-
|
|
357
|
-
is checked a second time. if there are still primer dimers
|
|
358
|
-
present the non-solvable dimers are returned
|
|
402
|
+
Uses multiprocessing to solve dimers in parallel.
|
|
359
403
|
"""
|
|
404
|
+
primer_dimers, non_dimers_all_pools = test_scheme_for_dimers(amplicon_scheme)
|
|
405
|
+
n_initial_dimers = len(primer_dimers)
|
|
360
406
|
|
|
361
|
-
|
|
407
|
+
if not primer_dimers:
|
|
408
|
+
return [], 0
|
|
362
409
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
410
|
+
# Prepare arguments for each dimer
|
|
411
|
+
callable_f = functools.partial(
|
|
412
|
+
_solve_single_dimer,
|
|
413
|
+
amplicon_scheme, left_primer_candidates, right_primer_candidates, non_dimers_all_pools
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Solve dimers in parallel
|
|
417
|
+
with multiprocessing.Pool(processes=num_processes) as pool:
|
|
418
|
+
results = pool.map(callable_f, primer_dimers)
|
|
419
|
+
|
|
420
|
+
# Apply all solutions to the scheme
|
|
421
|
+
for new_primers in results:
|
|
374
422
|
if new_primers:
|
|
375
423
|
for amp_index, primer_name, primer in new_primers:
|
|
376
424
|
# overwrite in final scheme
|
|
@@ -386,12 +434,13 @@ def check_and_solve_heterodimers(amplicon_scheme, left_primer_candidates, right_
|
|
|
386
434
|
# and in all primers
|
|
387
435
|
all_primers[strand][primer_name] = primer
|
|
388
436
|
# get remaining dimers in the revised scheme and add pool identifier for reporting
|
|
437
|
+
remaining_primer_dimers, _ = test_scheme_for_dimers(amplicon_scheme)
|
|
389
438
|
primer_dimers = [
|
|
390
439
|
(amplicon_scheme[primer1[0]]["pool"], primer1, primer2)
|
|
391
|
-
for primer1, primer2 in
|
|
440
|
+
for primer1, primer2 in remaining_primer_dimers
|
|
392
441
|
]
|
|
393
442
|
|
|
394
|
-
return primer_dimers
|
|
443
|
+
return primer_dimers, n_initial_dimers
|
|
395
444
|
|
|
396
445
|
|
|
397
446
|
def find_single_amplicons(amplicons, n):
|