varvamp 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- varvamp/__init__.py +1 -1
- varvamp/command.py +38 -20
- varvamp/scripts/blast.py +36 -66
- varvamp/scripts/default_config.py +1 -2
- varvamp/scripts/logging.py +0 -7
- varvamp/scripts/primers.py +2 -2
- varvamp/scripts/qpcr.py +38 -36
- varvamp/scripts/reporting.py +156 -118
- varvamp/scripts/scheme.py +115 -107
- {varvamp-1.1.3.dist-info → varvamp-1.2.0.dist-info}/METADATA +1 -1
- varvamp-1.2.0.dist-info/RECORD +21 -0
- varvamp-1.1.3.dist-info/RECORD +0 -21
- {varvamp-1.1.3.dist-info → varvamp-1.2.0.dist-info}/WHEEL +0 -0
- {varvamp-1.1.3.dist-info → varvamp-1.2.0.dist-info}/entry_points.txt +0 -0
- {varvamp-1.1.3.dist-info → varvamp-1.2.0.dist-info}/top_level.txt +0 -0
varvamp/__init__.py
CHANGED
varvamp/command.py
CHANGED
|
@@ -314,9 +314,9 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
|
|
|
314
314
|
|
|
315
315
|
if args.database is not None:
|
|
316
316
|
# create blast query
|
|
317
|
-
query_path = blast.create_BLAST_query(
|
|
317
|
+
query_path = blast.create_BLAST_query(amplicons, data_dir)
|
|
318
318
|
# perform primer blast
|
|
319
|
-
amplicons
|
|
319
|
+
amplicons = blast.primer_blast(
|
|
320
320
|
data_dir,
|
|
321
321
|
args.database,
|
|
322
322
|
query_path,
|
|
@@ -326,10 +326,8 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
|
|
|
326
326
|
log_file,
|
|
327
327
|
mode="single_tiled"
|
|
328
328
|
)
|
|
329
|
-
else:
|
|
330
|
-
off_target_amplicons = []
|
|
331
329
|
|
|
332
|
-
return all_primers, amplicons
|
|
330
|
+
return all_primers, amplicons
|
|
333
331
|
|
|
334
332
|
|
|
335
333
|
def single_workflow(args, amplicons, all_primers, log_file):
|
|
@@ -337,12 +335,12 @@ def single_workflow(args, amplicons, all_primers, log_file):
|
|
|
337
335
|
workflow part specific for single mode
|
|
338
336
|
"""
|
|
339
337
|
|
|
340
|
-
amplicon_scheme = scheme.find_single_amplicons(amplicons,
|
|
338
|
+
amplicon_scheme = scheme.find_single_amplicons(amplicons, args.report_n)
|
|
341
339
|
logging.varvamp_progress(
|
|
342
340
|
log_file,
|
|
343
341
|
progress=0.9,
|
|
344
342
|
job="Finding amplicons with low penalties.",
|
|
345
|
-
progress_text=f"{len(amplicon_scheme
|
|
343
|
+
progress_text=f"{len(amplicon_scheme)} amplicons."
|
|
346
344
|
)
|
|
347
345
|
|
|
348
346
|
return amplicon_scheme
|
|
@@ -359,8 +357,7 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
|
|
|
359
357
|
# search for amplicon scheme
|
|
360
358
|
coverage, amplicon_scheme = scheme.find_best_covering_scheme(
|
|
361
359
|
amplicons,
|
|
362
|
-
amplicon_graph
|
|
363
|
-
all_primers
|
|
360
|
+
amplicon_graph
|
|
364
361
|
)
|
|
365
362
|
|
|
366
363
|
# check for dimers
|
|
@@ -377,12 +374,13 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
|
|
|
377
374
|
reporting.write_dimers(results_dir, dimers_not_solved)
|
|
378
375
|
|
|
379
376
|
# evaluate coverage
|
|
377
|
+
# ATTENTION: Genome coverage of the scheme might still change slightly through resolution of primer dimers, but this potential, minor inaccuracy is currently accepted.
|
|
380
378
|
percent_coverage = round(coverage/len(ambiguous_consensus)*100, 2)
|
|
381
379
|
logging.varvamp_progress(
|
|
382
380
|
log_file,
|
|
383
381
|
progress=0.9,
|
|
384
382
|
job="Creating amplicon scheme.",
|
|
385
|
-
progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme
|
|
383
|
+
progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme)} amplicons"
|
|
386
384
|
)
|
|
387
385
|
if percent_coverage < 70:
|
|
388
386
|
logging.raise_error(
|
|
@@ -450,9 +448,9 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
|
|
|
450
448
|
# run blast if db is given
|
|
451
449
|
if args.database is not None:
|
|
452
450
|
# create blast query
|
|
453
|
-
query_path = blast.
|
|
451
|
+
query_path = blast.create_BLAST_query(qpcr_scheme_candidates, data_dir, mode="qpcr")
|
|
454
452
|
# perform primer blast
|
|
455
|
-
|
|
453
|
+
qpcr_scheme_candidates = blast.primer_blast(
|
|
456
454
|
data_dir,
|
|
457
455
|
args.database,
|
|
458
456
|
query_path,
|
|
@@ -470,9 +468,6 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
|
|
|
470
468
|
log_file,
|
|
471
469
|
exit=True
|
|
472
470
|
)
|
|
473
|
-
# report potential blast warnings
|
|
474
|
-
if args.database is not None:
|
|
475
|
-
blast.write_BLAST_warning(off_target_amplicons, final_schemes, log_file)
|
|
476
471
|
logging.varvamp_progress(
|
|
477
472
|
log_file,
|
|
478
473
|
progress=0.9,
|
|
@@ -506,9 +501,21 @@ def main(sysargs=sys.argv[1:]):
|
|
|
506
501
|
reporting.write_fasta(data_dir, "majority_consensus", majority_consensus)
|
|
507
502
|
reporting.write_fasta(results_dir, "ambiguous_consensus", ambiguous_consensus)
|
|
508
503
|
|
|
504
|
+
# Functions called from here on return lists of amplicons that are refined step-wise into final schemes.
|
|
505
|
+
# These lists that are passed between functions and later used for reporting consist of dictionary elemnts,
|
|
506
|
+
# which represent individual amplicons. A minimal amplicon dict could take the form:
|
|
507
|
+
# {
|
|
508
|
+
# "id": amplicon_name,
|
|
509
|
+
# "penalty": amplicon_cost,
|
|
510
|
+
# "length": amplicon_length,
|
|
511
|
+
# "LEFT": [left primer data],
|
|
512
|
+
# "RIGHT": [right primer data]
|
|
513
|
+
# }
|
|
514
|
+
# to which different functions may add additional information.
|
|
515
|
+
|
|
509
516
|
# SINGLE/TILED mode
|
|
510
517
|
if args.mode == "tiled" or args.mode == "single":
|
|
511
|
-
all_primers, amplicons
|
|
518
|
+
all_primers, amplicons = single_and_tiled_shared_workflow(
|
|
512
519
|
args,
|
|
513
520
|
left_primer_candidates,
|
|
514
521
|
right_primer_candidates,
|
|
@@ -533,15 +540,22 @@ def main(sysargs=sys.argv[1:]):
|
|
|
533
540
|
log_file,
|
|
534
541
|
results_dir
|
|
535
542
|
)
|
|
536
|
-
|
|
537
|
-
blast.write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file)
|
|
543
|
+
|
|
538
544
|
# write files
|
|
545
|
+
|
|
546
|
+
if args.mode == "tiled":
|
|
547
|
+
# assign amplicon numbers from 5' to 3' along the genome
|
|
548
|
+
amplicon_scheme.sort(key=lambda x: x["LEFT"][1])
|
|
549
|
+
else:
|
|
550
|
+
# make sure amplicons with no off-target products and with low penalties get the lowest numbers
|
|
551
|
+
amplicon_scheme.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
|
|
539
552
|
reporting.write_all_primers(data_dir, all_primers)
|
|
540
553
|
reporting.write_scheme_to_files(
|
|
541
554
|
results_dir,
|
|
542
555
|
amplicon_scheme,
|
|
543
556
|
ambiguous_consensus,
|
|
544
|
-
args.mode
|
|
557
|
+
args.mode,
|
|
558
|
+
log_file
|
|
545
559
|
)
|
|
546
560
|
reporting.varvamp_plot(
|
|
547
561
|
results_dir,
|
|
@@ -564,9 +578,13 @@ def main(sysargs=sys.argv[1:]):
|
|
|
564
578
|
right_primer_candidates,
|
|
565
579
|
log_file
|
|
566
580
|
)
|
|
581
|
+
|
|
567
582
|
# write files
|
|
583
|
+
|
|
584
|
+
# make sure amplicons with no off-target products and with low penalties get the lowest numbers
|
|
585
|
+
final_schemes.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
|
|
568
586
|
reporting.write_regions_to_bed(probe_regions, data_dir, "probe")
|
|
569
|
-
reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus)
|
|
587
|
+
reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, log_file)
|
|
570
588
|
reporting.varvamp_plot(
|
|
571
589
|
results_dir,
|
|
572
590
|
alignment_cleaned,
|
varvamp/scripts/blast.py
CHANGED
|
@@ -29,41 +29,24 @@ def check_BLAST_installation(log_file):
|
|
|
29
29
|
logging.raise_error("BLASTN is not installed", log_file, exit=True)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def create_BLAST_query(
|
|
32
|
+
def create_BLAST_query(amplicons, data_dir, mode="single_tiled"):
|
|
33
33
|
"""
|
|
34
|
-
create a query for the BLAST search
|
|
34
|
+
create a query for the BLAST search
|
|
35
35
|
"""
|
|
36
|
-
already_written = []
|
|
37
|
-
|
|
38
36
|
query_path = os.path.join(data_dir, "BLAST_query.fasta")
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
already_written.append(fw_primer)
|
|
45
|
-
if rv_primer not in already_written:
|
|
46
|
-
print(f">{rv_primer}\n{all_primers['-'][rv_primer][0]}", file=query)
|
|
47
|
-
already_written.append(rv_primer)
|
|
48
|
-
|
|
49
|
-
return query_path
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def create_BLAST_query_qpcr(qpcr_scheme_candidates, data_dir):
|
|
53
|
-
"""
|
|
54
|
-
create a query for the BLAST search (qpcr mode)
|
|
55
|
-
"""
|
|
56
|
-
already_written = []
|
|
37
|
+
if mode == "single_tiled":
|
|
38
|
+
primer_types = ["LEFT", "RIGHT"]
|
|
39
|
+
elif mode == "qpcr":
|
|
40
|
+
primer_types = ["PROBE", "LEFT", "RIGHT"]
|
|
41
|
+
already_written = set()
|
|
57
42
|
|
|
58
|
-
query_path = os.path.join(data_dir, "BLAST_query.fasta")
|
|
59
43
|
with open(query_path, "w") as query:
|
|
60
|
-
for amp in
|
|
61
|
-
for primer_type in
|
|
62
|
-
name = f"{primer_type}_{
|
|
63
|
-
if name in already_written:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
already_written.append(name)
|
|
44
|
+
for amp in amplicons:
|
|
45
|
+
for primer_type in primer_types:
|
|
46
|
+
name = f"{primer_type}_{amp[primer_type][1]}_{amp[primer_type][2]}"
|
|
47
|
+
if name not in already_written:
|
|
48
|
+
print(f">{name}\n{amp[primer_type][0]}", file=query)
|
|
49
|
+
already_written.add(name)
|
|
67
50
|
return query_path
|
|
68
51
|
|
|
69
52
|
|
|
@@ -168,21 +151,24 @@ def predict_non_specific_amplicons_worker(amp, blast_df, max_length, mode):
|
|
|
168
151
|
"""
|
|
169
152
|
Worker function to predict unspecific targets for a single amplicon.
|
|
170
153
|
"""
|
|
171
|
-
name, data = amp
|
|
172
154
|
# get correct primers
|
|
173
155
|
if mode == "single_tiled":
|
|
174
|
-
|
|
156
|
+
primer_types = ["LEFT", "RIGHT"]
|
|
175
157
|
elif mode == "qpcr":
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
158
|
+
primer_types = ["PROBE", "LEFT", "RIGHT"]
|
|
159
|
+
primers = []
|
|
160
|
+
for primer_type in primer_types:
|
|
161
|
+
primers.append(f"{primer_type}_{amp[primer_type][1]}_{amp[primer_type][2]}")
|
|
179
162
|
# subset df for primers
|
|
180
163
|
df_amp_primers = blast_df[blast_df["query"].isin(primers)]
|
|
181
164
|
# sort by reference and ref start
|
|
182
165
|
df_amp_primers_sorted = df_amp_primers.sort_values(["ref", "ref_start"])
|
|
183
166
|
# check for off-targets for specific primers
|
|
184
167
|
if check_off_targets(df_amp_primers_sorted, max_length, primers):
|
|
185
|
-
|
|
168
|
+
amp["off_targets"] = True
|
|
169
|
+
else:
|
|
170
|
+
amp["off_targets"] = False
|
|
171
|
+
return amp
|
|
186
172
|
|
|
187
173
|
|
|
188
174
|
def predict_non_specific_amplicons(amplicons, blast_df, max_length, mode, n_threads):
|
|
@@ -190,22 +176,16 @@ def predict_non_specific_amplicons(amplicons, blast_df, max_length, mode, n_thre
|
|
|
190
176
|
Main function to predict unspecific targets within a size range and give
|
|
191
177
|
these primers a high penalty. Uses multiprocessing for parallelization.
|
|
192
178
|
"""
|
|
193
|
-
off_targets = []
|
|
194
179
|
# process amplicons concurrently
|
|
195
180
|
with multiprocessing.Pool(processes=n_threads) as pool:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
amplicons[off_target][5] = amplicons[off_target][5] + config.BLAST_PENALTY
|
|
205
|
-
elif mode == "qpcr":
|
|
206
|
-
amplicons[off_target]["penalty"] = amplicons[off_target]["penalty"] + config.BLAST_PENALTY
|
|
207
|
-
|
|
208
|
-
return off_targets, amplicons
|
|
181
|
+
annotated_amps = [
|
|
182
|
+
result for result in pool.starmap(
|
|
183
|
+
predict_non_specific_amplicons_worker,
|
|
184
|
+
[(amp, blast_df, max_length, mode) for amp in amplicons]
|
|
185
|
+
) if result is not None
|
|
186
|
+
]
|
|
187
|
+
n_off_targets = sum(amp["off_targets"] for amp in annotated_amps)
|
|
188
|
+
return n_off_targets, annotated_amps
|
|
209
189
|
|
|
210
190
|
|
|
211
191
|
def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log_file, mode):
|
|
@@ -237,14 +217,17 @@ def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log
|
|
|
237
217
|
|
|
238
218
|
blast_df = parse_and_filter_BLAST_output(blast_out)
|
|
239
219
|
print("Predicting non-specific amplicons...")
|
|
240
|
-
|
|
220
|
+
n_off_targets, amplicons = predict_non_specific_amplicons(
|
|
241
221
|
amplicons,
|
|
242
222
|
blast_df,
|
|
243
223
|
max_length,
|
|
244
224
|
mode,
|
|
245
225
|
n_threads
|
|
246
226
|
)
|
|
247
|
-
|
|
227
|
+
if n_off_targets > 0:
|
|
228
|
+
success_text = f"varVAMP predicted non-specific amplicons:\n\t> {n_off_targets}/{len(amplicons)} amplicons could produce amplicons with the blast db.\n\t> will attempt to avoid them in the final list of amplicons"
|
|
229
|
+
else:
|
|
230
|
+
success_text = f"NO off-target amplicons found with the blast db and a total of {len(amplicons)} amplicons"
|
|
248
231
|
print(success_text)
|
|
249
232
|
with open(log_file, 'a') as f:
|
|
250
233
|
print(
|
|
@@ -253,18 +236,5 @@ def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log
|
|
|
253
236
|
)
|
|
254
237
|
print("\n#### off-target search finished ####\n")
|
|
255
238
|
|
|
256
|
-
return amplicons
|
|
257
|
-
|
|
239
|
+
return amplicons
|
|
258
240
|
|
|
259
|
-
def write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file):
|
|
260
|
-
"""
|
|
261
|
-
for each primer pair that has potential unspecific amplicons
|
|
262
|
-
write warnings to file.
|
|
263
|
-
"""
|
|
264
|
-
for amp in off_target_amplicons:
|
|
265
|
-
if amp in amplicon_scheme:
|
|
266
|
-
logging.raise_error(
|
|
267
|
-
f"{amp} could produce off-targets. No better amplicon in this area was found.",
|
|
268
|
-
log_file,
|
|
269
|
-
exit=False,
|
|
270
|
-
)
|
|
@@ -4,7 +4,7 @@ This contains all varVAMP parameters.
|
|
|
4
4
|
|
|
5
5
|
# List of all known parameters. DO NOT CHANGE!
|
|
6
6
|
__all__ = [
|
|
7
|
-
'BLAST_MAX_DIFF', '
|
|
7
|
+
'BLAST_MAX_DIFF', 'BLAST_SETTINGS', 'BLAST_SIZE_MULTI',
|
|
8
8
|
'END_OVERLAP',
|
|
9
9
|
'PCR_DNA_CONC', 'PCR_DNTP_CONC', 'PCR_DV_CONC', 'PCR_MV_CONC',
|
|
10
10
|
'PRIMER_3_PENALTY', 'PRIMER_GC_END', 'PRIMER_GC_PENALTY',
|
|
@@ -74,7 +74,6 @@ BLAST_SETTINGS = { # blast settings for query search
|
|
|
74
74
|
}
|
|
75
75
|
BLAST_MAX_DIFF = 0.5 # min percent match between primer and BLAST hit (coverage and/or mismatches)
|
|
76
76
|
BLAST_SIZE_MULTI = 2 # multiplier for the max_amp size of off targets (in relation to max amp size)
|
|
77
|
-
BLAST_PENALTY = 50 # amplicon penalty increase -> considered only if no other possibilities
|
|
78
77
|
|
|
79
78
|
# nucleotide definitions, do NOT change
|
|
80
79
|
NUCS = set("atcg")
|
varvamp/scripts/logging.py
CHANGED
|
@@ -291,7 +291,6 @@ def confirm_config(args, log_file):
|
|
|
291
291
|
(
|
|
292
292
|
"BLAST_MAX_DIFF",
|
|
293
293
|
"BLAST_SIZE_MULTI",
|
|
294
|
-
"BLAST_PENALTY"
|
|
295
294
|
)
|
|
296
295
|
]
|
|
297
296
|
|
|
@@ -384,7 +383,6 @@ def confirm_config(args, log_file):
|
|
|
384
383
|
("qpcr deletion size still considered for deltaG calculation", config.QAMPLICON_DEL_CUTOFF),
|
|
385
384
|
("maximum difference between primer and blast db", config.BLAST_MAX_DIFF),
|
|
386
385
|
("multiplier of the maximum length for non-specific amplicons", config.BLAST_SIZE_MULTI),
|
|
387
|
-
("blast penalty for off targets", config.BLAST_PENALTY)
|
|
388
386
|
]
|
|
389
387
|
for var_type, var in non_negative_var:
|
|
390
388
|
if var < 0:
|
|
@@ -468,11 +466,6 @@ def confirm_config(args, log_file):
|
|
|
468
466
|
log_file,
|
|
469
467
|
exit=True
|
|
470
468
|
)
|
|
471
|
-
if config.BLAST_PENALTY < 10:
|
|
472
|
-
raise_error(
|
|
473
|
-
"giving a too small penalty could result in the selection of off-target producing amplicons in the final scheme.",
|
|
474
|
-
log_file,
|
|
475
|
-
)
|
|
476
469
|
# confirm proper BLAST settings in dictionary
|
|
477
470
|
if not isinstance(config.BLAST_SETTINGS, dict):
|
|
478
471
|
raise_error(
|
varvamp/scripts/primers.py
CHANGED
|
@@ -386,13 +386,13 @@ def find_best_primers(left_primer_candidates, right_primer_candidates):
|
|
|
386
386
|
primer_candidates.sort(key=lambda x: (x[3], x[1]))
|
|
387
387
|
# ini everything with the primer with the lowest penalty
|
|
388
388
|
to_retain = [primer_candidates[0]]
|
|
389
|
-
primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]
|
|
389
|
+
primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]))
|
|
390
390
|
primer_set = set(primer_ranges)
|
|
391
391
|
|
|
392
392
|
for primer in primer_candidates:
|
|
393
393
|
# get the thirds of the primer, only consider the middle
|
|
394
394
|
thirds_len = int((primer[2] - primer[1])/3)
|
|
395
|
-
primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len
|
|
395
|
+
primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len))
|
|
396
396
|
# check if none of the nucleotides of the next primer
|
|
397
397
|
# are already covered by a better primer
|
|
398
398
|
if not any(x in primer_positions for x in primer_set):
|
varvamp/scripts/qpcr.py
CHANGED
|
@@ -211,13 +211,13 @@ def assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_con
|
|
|
211
211
|
if "LEFT" in probe:
|
|
212
212
|
if not qpcr_probes[probe][1] in range(
|
|
213
213
|
left_primer[2] + config.QPROBE_DISTANCE[0],
|
|
214
|
-
left_primer[2] + config.QPROBE_DISTANCE[1]
|
|
214
|
+
left_primer[2] + config.QPROBE_DISTANCE[1]
|
|
215
215
|
):
|
|
216
216
|
continue
|
|
217
217
|
elif "RIGHT" in probe:
|
|
218
218
|
if not right_primer[1] in range(
|
|
219
219
|
qpcr_probes[probe][2] + config.QPROBE_DISTANCE[0],
|
|
220
|
-
qpcr_probes[probe][2] + config.QPROBE_DISTANCE[1]
|
|
220
|
+
qpcr_probes[probe][2] + config.QPROBE_DISTANCE[1]
|
|
221
221
|
|
|
222
222
|
):
|
|
223
223
|
continue
|
|
@@ -258,7 +258,7 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
|
|
|
258
258
|
there is no need to consider this primer probe combination.
|
|
259
259
|
"""
|
|
260
260
|
|
|
261
|
-
qpcr_scheme_candidates =
|
|
261
|
+
qpcr_scheme_candidates = []
|
|
262
262
|
found_amplicons = []
|
|
263
263
|
amplicon_nr = -1
|
|
264
264
|
|
|
@@ -279,15 +279,16 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
|
|
|
279
279
|
# populate the primer dictionary:
|
|
280
280
|
amplicon_nr += 1
|
|
281
281
|
found_amplicons.append(primer_combination)
|
|
282
|
-
qpcr_scheme_candidates
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
282
|
+
qpcr_scheme_candidates.append(
|
|
283
|
+
{
|
|
284
|
+
"id": f"AMPLICON_{amplicon_nr}",
|
|
285
|
+
"penalty": qpcr_probes[probe][3] + primer_combination[0][3] + primer_combination[1][3],
|
|
286
|
+
"PROBE": qpcr_probes[probe],
|
|
287
|
+
"LEFT": primer_combination[0],
|
|
288
|
+
"RIGHT": primer_combination[1]
|
|
289
|
+
}
|
|
290
|
+
)
|
|
288
291
|
# and again sort by total penalty (left + right + probe)
|
|
289
|
-
qpcr_scheme_candidates = dict(sorted(qpcr_scheme_candidates.items(), key=lambda x: x[1]["penalty"]))
|
|
290
|
-
|
|
291
292
|
return qpcr_scheme_candidates
|
|
292
293
|
|
|
293
294
|
|
|
@@ -296,21 +297,17 @@ def process_single_amplicon_deltaG(amplicon, majority_consensus):
|
|
|
296
297
|
Process a single amplicon to test its deltaG and apply filtering.
|
|
297
298
|
This function will be called concurrently by multiple threads.
|
|
298
299
|
"""
|
|
299
|
-
|
|
300
|
-
start = data["LEFT"][1]
|
|
301
|
-
stop = data["RIGHT"][2]
|
|
302
|
-
seq = majority_consensus[start:stop]
|
|
300
|
+
seq = majority_consensus[amplicon["LEFT"][1]:amplicon["RIGHT"][2]]
|
|
303
301
|
seq = seq.replace("N", "")
|
|
304
302
|
seq = seq.replace("n", "")
|
|
305
|
-
amp_positions = list(range(start, stop + 1))
|
|
306
303
|
# check if the amplicon overlaps with an amplicon that was previously
|
|
307
304
|
# found and had a high enough deltaG
|
|
308
|
-
min_temp = min((primers.calc_temp(
|
|
309
|
-
primers.calc_temp(
|
|
305
|
+
min_temp = min((primers.calc_temp(amplicon["LEFT"][0]),
|
|
306
|
+
primers.calc_temp(amplicon["RIGHT"][0])))
|
|
310
307
|
# calculate deltaG at the minimal primer temp
|
|
311
|
-
deltaG = seqfold.dg(seq, min_temp)
|
|
308
|
+
amplicon["deltaG"] = seqfold.dg(seq, min_temp)
|
|
312
309
|
|
|
313
|
-
return
|
|
310
|
+
return amplicon
|
|
314
311
|
|
|
315
312
|
|
|
316
313
|
def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_threads):
|
|
@@ -319,29 +316,34 @@ def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n
|
|
|
319
316
|
and filters if they fall below the cutoff. Multiple processes are used
|
|
320
317
|
for processing amplicons in parallel.
|
|
321
318
|
"""
|
|
322
|
-
|
|
323
|
-
passed_counter = 0 # counter for re-naming amplicons that passed deltaG cutoff
|
|
324
|
-
amplicon_set = set()
|
|
319
|
+
final_amplicons = []
|
|
325
320
|
|
|
326
321
|
# Create a pool of processes to handle the concurrent processing
|
|
327
322
|
with multiprocessing.Pool(processes=n_threads) as pool:
|
|
328
323
|
# Create a list of the first n amplicon tuples for processing
|
|
329
|
-
|
|
324
|
+
# The list is sorted first on whether offset targets were predicted for the amplicon,
|
|
325
|
+
# then by penalty. This ensures that amplicons with offset targets are always considered last
|
|
326
|
+
amplicons = itertools.islice(
|
|
327
|
+
sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])),
|
|
328
|
+
n_to_test
|
|
329
|
+
)
|
|
330
330
|
# process amplicons concurrently
|
|
331
331
|
results = pool.starmap(process_single_amplicon_deltaG, [(amp, majority_consensus) for amp in amplicons])
|
|
332
332
|
# Process the results
|
|
333
|
-
|
|
333
|
+
retained_ranges = []
|
|
334
|
+
for amp in results:
|
|
334
335
|
# check if the amplicon overlaps with an amplicon that was previously
|
|
335
336
|
# found and had a high enough deltaG
|
|
336
|
-
if
|
|
337
|
+
if amp["deltaG"] <= deltaG_cutoff:
|
|
337
338
|
continue
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
339
|
+
amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
|
|
340
|
+
overlaps_retained = False
|
|
341
|
+
for r in retained_ranges:
|
|
342
|
+
if amp_range.start < r.stop and r.start < amp_range.stop:
|
|
343
|
+
overlaps_retained = True
|
|
344
|
+
break
|
|
345
|
+
if not overlaps_retained:
|
|
346
|
+
final_amplicons.append(amp)
|
|
347
|
+
retained_ranges.append(amp_range)
|
|
348
|
+
|
|
349
|
+
return final_amplicons
|