varvamp 1.1.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
varvamp/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Tool to design amplicons for highly variable virusgenomes"""
2
2
  _program = "varvamp"
3
- __version__ = "1.1.3"
3
+ __version__ = "1.2.1"
varvamp/command.py CHANGED
@@ -89,6 +89,13 @@ def get_args(sysargs):
89
89
  type=int,
90
90
  default=1
91
91
  )
92
+ par.add_argument(
93
+ "--name",
94
+ help="name of the scheme",
95
+ metavar="varVAMP",
96
+ type=str,
97
+ default="varVAMP"
98
+ )
92
99
  for par in (SINGLE_parser, TILED_parser):
93
100
  par.add_argument(
94
101
  "-ol",
@@ -261,10 +268,10 @@ def shared_workflow(args, log_file):
261
268
  ambiguous_consensus,
262
269
  alignment_cleaned
263
270
  )
264
- for type, primer_candidates in [("+", left_primer_candidates), ("-", right_primer_candidates)]:
271
+ for primer_type, primer_candidates in [("+", left_primer_candidates), ("-", right_primer_candidates)]:
265
272
  if not primer_candidates:
266
273
  logging.raise_error(
267
- f"no {type} primers found.\n",
274
+ f"no {primer_type} primers found.\n",
268
275
  log_file,
269
276
  exit=True
270
277
  )
@@ -314,9 +321,9 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
314
321
 
315
322
  if args.database is not None:
316
323
  # create blast query
317
- query_path = blast.create_BLAST_query(all_primers, amplicons, data_dir)
324
+ query_path = blast.create_BLAST_query(amplicons, data_dir)
318
325
  # perform primer blast
319
- amplicons, off_target_amplicons = blast.primer_blast(
326
+ amplicons = blast.primer_blast(
320
327
  data_dir,
321
328
  args.database,
322
329
  query_path,
@@ -326,23 +333,21 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
326
333
  log_file,
327
334
  mode="single_tiled"
328
335
  )
329
- else:
330
- off_target_amplicons = []
331
336
 
332
- return all_primers, amplicons, off_target_amplicons
337
+ return all_primers, amplicons
333
338
 
334
339
 
335
- def single_workflow(args, amplicons, all_primers, log_file):
340
+ def single_workflow(args, amplicons, log_file):
336
341
  """
337
342
  workflow part specific for single mode
338
343
  """
339
344
 
340
- amplicon_scheme = scheme.find_single_amplicons(amplicons, all_primers, args.report_n)
345
+ amplicon_scheme = scheme.find_single_amplicons(amplicons, args.report_n)
341
346
  logging.varvamp_progress(
342
347
  log_file,
343
348
  progress=0.9,
344
349
  job="Finding amplicons with low penalties.",
345
- progress_text=f"{len(amplicon_scheme[0])} amplicons."
350
+ progress_text=f"{len(amplicon_scheme)} amplicons."
346
351
  )
347
352
 
348
353
  return amplicon_scheme
@@ -359,8 +364,7 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
359
364
  # search for amplicon scheme
360
365
  coverage, amplicon_scheme = scheme.find_best_covering_scheme(
361
366
  amplicons,
362
- amplicon_graph,
363
- all_primers
367
+ amplicon_graph
364
368
  )
365
369
 
366
370
  # check for dimers
@@ -377,12 +381,13 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
377
381
  reporting.write_dimers(results_dir, dimers_not_solved)
378
382
 
379
383
  # evaluate coverage
384
+ # ATTENTION: Genome coverage of the scheme might still change slightly through resolution of primer dimers, but this potential, minor inaccuracy is currently accepted.
380
385
  percent_coverage = round(coverage/len(ambiguous_consensus)*100, 2)
381
386
  logging.varvamp_progress(
382
387
  log_file,
383
388
  progress=0.9,
384
389
  job="Creating amplicon scheme.",
385
- progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme[0]) + len(amplicon_scheme[1])} amplicons"
390
+ progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme)} amplicons"
386
391
  )
387
392
  if percent_coverage < 70:
388
393
  logging.raise_error(
@@ -450,9 +455,9 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
450
455
  # run blast if db is given
451
456
  if args.database is not None:
452
457
  # create blast query
453
- query_path = blast.create_BLAST_query_qpcr(qpcr_scheme_candidates, data_dir)
458
+ query_path = blast.create_BLAST_query(qpcr_scheme_candidates, data_dir, mode="qpcr")
454
459
  # perform primer blast
455
- amplicons, off_target_amplicons = blast.primer_blast(
460
+ qpcr_scheme_candidates = blast.primer_blast(
456
461
  data_dir,
457
462
  args.database,
458
463
  query_path,
@@ -470,9 +475,6 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
470
475
  log_file,
471
476
  exit=True
472
477
  )
473
- # report potential blast warnings
474
- if args.database is not None:
475
- blast.write_BLAST_warning(off_target_amplicons, final_schemes, log_file)
476
478
  logging.varvamp_progress(
477
479
  log_file,
478
480
  progress=0.9,
@@ -482,13 +484,13 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
482
484
  return probe_regions, final_schemes
483
485
 
484
486
 
485
- def main(sysargs=sys.argv[1:]):
487
+ def main():
486
488
  """
487
489
  main varvamp workflow
488
490
  """
489
491
 
490
492
  # start varVAMP
491
- args = get_args(sysargs)
493
+ args = get_args(sys.argv[1:])
492
494
  if not args.verbose:
493
495
  sys.stdout = open(os.devnull, 'w')
494
496
  start_time = datetime.datetime.now()
@@ -501,14 +503,26 @@ def main(sysargs=sys.argv[1:]):
501
503
  alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates = shared_workflow(args, log_file)
502
504
 
503
505
  # write files that are shared in all modes
504
- reporting.write_regions_to_bed(primer_regions, data_dir)
506
+ reporting.write_regions_to_bed(primer_regions, args.name, data_dir)
505
507
  reporting.write_alignment(data_dir, alignment_cleaned)
506
- reporting.write_fasta(data_dir, "majority_consensus", majority_consensus)
507
- reporting.write_fasta(results_dir, "ambiguous_consensus", ambiguous_consensus)
508
+ reporting.write_fasta(data_dir, f"majority_consensus", f"{args.name}_consensus",majority_consensus)
509
+ reporting.write_fasta(results_dir, f"ambiguous_consensus", f"{args.name}_consensus", ambiguous_consensus)
510
+
511
+ # Functions called from here on return lists of amplicons that are refined step-wise into final schemes.
512
+ # These lists that are passed between functions and later used for reporting consist of dictionary elemnts,
513
+ # which represent individual amplicons. A minimal amplicon dict could take the form:
514
+ # {
515
+ # "id": amplicon_name,
516
+ # "penalty": amplicon_cost,
517
+ # "length": amplicon_length,
518
+ # "LEFT": [left primer data],
519
+ # "RIGHT": [right primer data]
520
+ # }
521
+ # to which different functions may add additional information.
508
522
 
509
523
  # SINGLE/TILED mode
510
524
  if args.mode == "tiled" or args.mode == "single":
511
- all_primers, amplicons, off_target_amplicons = single_and_tiled_shared_workflow(
525
+ all_primers, amplicons = single_and_tiled_shared_workflow(
512
526
  args,
513
527
  left_primer_candidates,
514
528
  right_primer_candidates,
@@ -519,7 +533,6 @@ def main(sysargs=sys.argv[1:]):
519
533
  amplicon_scheme = single_workflow(
520
534
  args,
521
535
  amplicons,
522
- all_primers,
523
536
  log_file
524
537
  )
525
538
  elif args.mode == "tiled":
@@ -533,24 +546,33 @@ def main(sysargs=sys.argv[1:]):
533
546
  log_file,
534
547
  results_dir
535
548
  )
536
- if args.database is not None:
537
- blast.write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file)
549
+
538
550
  # write files
539
- reporting.write_all_primers(data_dir, all_primers)
551
+
552
+ if args.mode == "tiled":
553
+ # assign amplicon numbers from 5' to 3' along the genome
554
+ amplicon_scheme.sort(key=lambda x: x["LEFT"][1])
555
+ else:
556
+ # make sure amplicons with no off-target products and with low penalties get the lowest numbers
557
+ amplicon_scheme.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
558
+ reporting.write_all_primers(data_dir, args.name, all_primers)
540
559
  reporting.write_scheme_to_files(
541
560
  results_dir,
542
561
  amplicon_scheme,
543
562
  ambiguous_consensus,
544
- args.mode
563
+ args.name,
564
+ args.mode,
565
+ log_file
545
566
  )
546
567
  reporting.varvamp_plot(
547
568
  results_dir,
548
569
  alignment_cleaned,
549
570
  primer_regions,
571
+ args.name,
550
572
  all_primers=all_primers,
551
573
  amplicon_scheme=amplicon_scheme,
552
574
  )
553
- reporting.per_base_mismatch_plot(results_dir, amplicon_scheme, args.threshold)
575
+ reporting.per_base_mismatch_plot(results_dir, amplicon_scheme, args.threshold, args.name)
554
576
 
555
577
  # QPCR mode
556
578
  if args.mode == "qpcr":
@@ -564,17 +586,22 @@ def main(sysargs=sys.argv[1:]):
564
586
  right_primer_candidates,
565
587
  log_file
566
588
  )
589
+
567
590
  # write files
568
- reporting.write_regions_to_bed(probe_regions, data_dir, "probe")
569
- reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus)
591
+
592
+ # make sure amplicons with no off-target products and with low penalties get the lowest numbers
593
+ final_schemes.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
594
+ reporting.write_regions_to_bed(probe_regions, args.name, data_dir, "probe")
595
+ reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, args.name, log_file)
570
596
  reporting.varvamp_plot(
571
597
  results_dir,
572
598
  alignment_cleaned,
573
599
  primer_regions,
600
+ args.name,
574
601
  probe_regions=probe_regions,
575
602
  amplicon_scheme=final_schemes
576
603
  )
577
- reporting.per_base_mismatch_plot(results_dir, final_schemes, args.threshold, mode="QPCR")
604
+ reporting.per_base_mismatch_plot(results_dir, final_schemes, args.threshold, args.name, mode="QPCR")
578
605
 
579
606
  # varVAMP finished
580
607
  logging.varvamp_progress(log_file, progress=1, start_time=start_time)
varvamp/scripts/blast.py CHANGED
@@ -29,41 +29,24 @@ def check_BLAST_installation(log_file):
29
29
  logging.raise_error("BLASTN is not installed", log_file, exit=True)
30
30
 
31
31
 
32
- def create_BLAST_query(all_primers, amplicons, data_dir):
32
+ def create_BLAST_query(amplicons, data_dir, mode="single_tiled"):
33
33
  """
34
- create a query for the BLAST search (tiled, single mode)
34
+ create a query for the BLAST search
35
35
  """
36
- already_written = []
37
-
38
36
  query_path = os.path.join(data_dir, "BLAST_query.fasta")
39
- with open(query_path, "w") as query:
40
- for amp in amplicons:
41
- fw_primer, rv_primer = amplicons[amp][2], amplicons[amp][3]
42
- if fw_primer not in already_written:
43
- print(f">{fw_primer}\n{all_primers['+'][fw_primer][0]}", file=query)
44
- already_written.append(fw_primer)
45
- if rv_primer not in already_written:
46
- print(f">{rv_primer}\n{all_primers['-'][rv_primer][0]}", file=query)
47
- already_written.append(rv_primer)
48
-
49
- return query_path
50
-
51
-
52
- def create_BLAST_query_qpcr(qpcr_scheme_candidates, data_dir):
53
- """
54
- create a query for the BLAST search (qpcr mode)
55
- """
56
- already_written = []
37
+ if mode == "single_tiled":
38
+ primer_types = ["LEFT", "RIGHT"]
39
+ elif mode == "qpcr":
40
+ primer_types = ["PROBE", "LEFT", "RIGHT"]
41
+ already_written = set()
57
42
 
58
- query_path = os.path.join(data_dir, "BLAST_query.fasta")
59
43
  with open(query_path, "w") as query:
60
- for amp in qpcr_scheme_candidates:
61
- for primer_type in ["PROBE", "LEFT", "RIGHT"]:
62
- name = f"{primer_type}_{qpcr_scheme_candidates[amp][primer_type][1]}_{qpcr_scheme_candidates[amp][primer_type][2]}"
63
- if name in already_written:
64
- continue
65
- print(f">{name}\n{qpcr_scheme_candidates[amp][primer_type][0]}", file=query)
66
- already_written.append(name)
44
+ for amp in amplicons:
45
+ for primer_type in primer_types:
46
+ name = f"{primer_type}_{amp[primer_type][1]}_{amp[primer_type][2]}"
47
+ if name not in already_written:
48
+ print(f">{name}\n{amp[primer_type][0]}", file=query)
49
+ already_written.add(name)
67
50
  return query_path
68
51
 
69
52
 
@@ -168,21 +151,24 @@ def predict_non_specific_amplicons_worker(amp, blast_df, max_length, mode):
168
151
  """
169
152
  Worker function to predict unspecific targets for a single amplicon.
170
153
  """
171
- name, data = amp
172
154
  # get correct primers
173
155
  if mode == "single_tiled":
174
- primers = [data[2], data[3]]
156
+ primer_types = ["LEFT", "RIGHT"]
175
157
  elif mode == "qpcr":
176
- primers = []
177
- for primer_type in ["PROBE", "LEFT", "RIGHT"]:
178
- primers.append(f"{primer_type}_{data[primer_type][1]}_{data[primer_type][2]}")
158
+ primer_types = ["PROBE", "LEFT", "RIGHT"]
159
+ primers = []
160
+ for primer_type in primer_types:
161
+ primers.append(f"{primer_type}_{amp[primer_type][1]}_{amp[primer_type][2]}")
179
162
  # subset df for primers
180
163
  df_amp_primers = blast_df[blast_df["query"].isin(primers)]
181
164
  # sort by reference and ref start
182
165
  df_amp_primers_sorted = df_amp_primers.sort_values(["ref", "ref_start"])
183
166
  # check for off-targets for specific primers
184
167
  if check_off_targets(df_amp_primers_sorted, max_length, primers):
185
- return name
168
+ amp["off_targets"] = True
169
+ else:
170
+ amp["off_targets"] = False
171
+ return amp
186
172
 
187
173
 
188
174
  def predict_non_specific_amplicons(amplicons, blast_df, max_length, mode, n_threads):
@@ -190,22 +176,16 @@ def predict_non_specific_amplicons(amplicons, blast_df, max_length, mode, n_thre
190
176
  Main function to predict unspecific targets within a size range and give
191
177
  these primers a high penalty. Uses multiprocessing for parallelization.
192
178
  """
193
- off_targets = []
194
179
  # process amplicons concurrently
195
180
  with multiprocessing.Pool(processes=n_threads) as pool:
196
- amp_items = amplicons.items()
197
- results = pool.starmap(predict_non_specific_amplicons_worker, [(amp, blast_df, max_length, mode) for amp in amp_items])
198
- # check results
199
- for off_target in results:
200
- if off_target is None:
201
- continue
202
- off_targets.append(off_target)
203
- if mode == "single_tiled":
204
- amplicons[off_target][5] = amplicons[off_target][5] + config.BLAST_PENALTY
205
- elif mode == "qpcr":
206
- amplicons[off_target]["penalty"] = amplicons[off_target]["penalty"] + config.BLAST_PENALTY
207
-
208
- return off_targets, amplicons
181
+ annotated_amps = [
182
+ result for result in pool.starmap(
183
+ predict_non_specific_amplicons_worker,
184
+ [(amp, blast_df, max_length, mode) for amp in amplicons]
185
+ ) if result is not None
186
+ ]
187
+ n_off_targets = sum(amp["off_targets"] for amp in annotated_amps)
188
+ return n_off_targets, annotated_amps
209
189
 
210
190
 
211
191
  def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log_file, mode):
@@ -237,14 +217,17 @@ def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log
237
217
 
238
218
  blast_df = parse_and_filter_BLAST_output(blast_out)
239
219
  print("Predicting non-specific amplicons...")
240
- off_target_amplicons, amplicons = predict_non_specific_amplicons(
220
+ n_off_targets, amplicons = predict_non_specific_amplicons(
241
221
  amplicons,
242
222
  blast_df,
243
223
  max_length,
244
224
  mode,
245
225
  n_threads
246
226
  )
247
- success_text = f"varVAMP successfully predicted non-specific amplicons:\n\t> {len(off_target_amplicons)}/{len(amplicons)} amplicons could produce amplicons with the blast db.\n\t> raised their amplicon penalty by {config.BLAST_PENALTY}"
227
+ if n_off_targets > 0:
228
+ success_text = f"varVAMP predicted non-specific amplicons:\n\t> {n_off_targets}/{len(amplicons)} amplicons could produce amplicons with the blast db.\n\t> will attempt to avoid them in the final list of amplicons"
229
+ else:
230
+ success_text = f"NO off-target amplicons found with the blast db and a total of {len(amplicons)} amplicons"
248
231
  print(success_text)
249
232
  with open(log_file, 'a') as f:
250
233
  print(
@@ -253,18 +236,5 @@ def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log
253
236
  )
254
237
  print("\n#### off-target search finished ####\n")
255
238
 
256
- return amplicons, off_target_amplicons
257
-
239
+ return amplicons
258
240
 
259
- def write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file):
260
- """
261
- for each primer pair that has potential unspecific amplicons
262
- write warnings to file.
263
- """
264
- for amp in off_target_amplicons:
265
- if amp in amplicon_scheme:
266
- logging.raise_error(
267
- f"{amp} could produce off-targets. No better amplicon in this area was found.",
268
- log_file,
269
- exit=False,
270
- )
@@ -4,7 +4,7 @@ This contains all varVAMP parameters.
4
4
 
5
5
  # List of all known parameters. DO NOT CHANGE!
6
6
  __all__ = [
7
- 'BLAST_MAX_DIFF', 'BLAST_PENALTY', 'BLAST_SETTINGS', 'BLAST_SIZE_MULTI',
7
+ 'BLAST_MAX_DIFF', 'BLAST_SETTINGS', 'BLAST_SIZE_MULTI',
8
8
  'END_OVERLAP',
9
9
  'PCR_DNA_CONC', 'PCR_DNTP_CONC', 'PCR_DV_CONC', 'PCR_MV_CONC',
10
10
  'PRIMER_3_PENALTY', 'PRIMER_GC_END', 'PRIMER_GC_PENALTY',
@@ -74,7 +74,6 @@ BLAST_SETTINGS = { # blast settings for query search
74
74
  }
75
75
  BLAST_MAX_DIFF = 0.5 # min percent match between primer and BLAST hit (coverage and/or mismatches)
76
76
  BLAST_SIZE_MULTI = 2 # multiplier for the max_amp size of off targets (in relation to max amp size)
77
- BLAST_PENALTY = 50 # amplicon penalty increase -> considered only if no other possibilities
78
77
 
79
78
  # nucleotide definitions, do NOT change
80
79
  NUCS = set("atcg")
@@ -15,12 +15,12 @@ from varvamp.scripts import config
15
15
  from varvamp import __version__
16
16
 
17
17
 
18
- def create_dir_structure(dir):
18
+ def create_dir_structure(dir_path):
19
19
  """
20
20
  create output folders and log file
21
21
  """
22
22
  cwd = os.getcwd()
23
- results_dir = os.path.join(cwd, dir)
23
+ results_dir = os.path.join(cwd, dir_path)
24
24
  data_dir = os.path.join(results_dir, "data/")
25
25
  # create folders
26
26
  if not os.path.exists(results_dir):
@@ -291,7 +291,6 @@ def confirm_config(args, log_file):
291
291
  (
292
292
  "BLAST_MAX_DIFF",
293
293
  "BLAST_SIZE_MULTI",
294
- "BLAST_PENALTY"
295
294
  )
296
295
  ]
297
296
 
@@ -384,7 +383,6 @@ def confirm_config(args, log_file):
384
383
  ("qpcr deletion size still considered for deltaG calculation", config.QAMPLICON_DEL_CUTOFF),
385
384
  ("maximum difference between primer and blast db", config.BLAST_MAX_DIFF),
386
385
  ("multiplier of the maximum length for non-specific amplicons", config.BLAST_SIZE_MULTI),
387
- ("blast penalty for off targets", config.BLAST_PENALTY)
388
386
  ]
389
387
  for var_type, var in non_negative_var:
390
388
  if var < 0:
@@ -468,11 +466,6 @@ def confirm_config(args, log_file):
468
466
  log_file,
469
467
  exit=True
470
468
  )
471
- if config.BLAST_PENALTY < 10:
472
- raise_error(
473
- "giving a too small penalty could result in the selection of off-target producing amplicons in the final scheme.",
474
- log_file,
475
- )
476
469
  # confirm proper BLAST settings in dictionary
477
470
  if not isinstance(config.BLAST_SETTINGS, dict):
478
471
  raise_error(
@@ -574,7 +567,7 @@ def goodbye_message():
574
567
  "Thank you. Come again.",
575
568
  ">Placeholder for your advertisement<",
576
569
  "Make primers great again!",
577
- "Ciao cacao!"
570
+ "Ciao cacao!",
578
571
  "And now lets pray to the PCR gods.",
579
572
  "**bibobibobop** task finished",
580
573
  "Thank you for traveling with varVAMP.",
@@ -588,5 +581,14 @@ def goodbye_message():
588
581
  "Barba non facit philosophum.",
589
582
  "Task failed successfully.",
590
583
  "Never gonna give you up, never gonna let you down.",
584
+ "Have you tried turning it off and on again?",
585
+ "Look, I am your primer scheme.",
586
+ "Quod erat demonstrandum.",
587
+ "Miau?",
588
+ "This is an automated message informing you that you are awsome.",
589
+ "Why was the negative-sense virus angry at the positive-sense virus?\nBecause he was left stranded!",
590
+ "If you see this message twice, you are an experienced user.",
591
+ "No one expects the spanish inquisition!",
592
+ "Primer design you must."
591
593
  ]
592
594
  print(f"\n{random.choice(messages)}")
@@ -386,13 +386,13 @@ def find_best_primers(left_primer_candidates, right_primer_candidates):
386
386
  primer_candidates.sort(key=lambda x: (x[3], x[1]))
387
387
  # ini everything with the primer with the lowest penalty
388
388
  to_retain = [primer_candidates[0]]
389
- primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]+1))
389
+ primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]))
390
390
  primer_set = set(primer_ranges)
391
391
 
392
392
  for primer in primer_candidates:
393
393
  # get the thirds of the primer, only consider the middle
394
394
  thirds_len = int((primer[2] - primer[1])/3)
395
- primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len + 1))
395
+ primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len))
396
396
  # check if none of the nucleotides of the next primer
397
397
  # are already covered by a better primer
398
398
  if not any(x in primer_positions for x in primer_set):
varvamp/scripts/qpcr.py CHANGED
@@ -211,13 +211,13 @@ def assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_con
211
211
  if "LEFT" in probe:
212
212
  if not qpcr_probes[probe][1] in range(
213
213
  left_primer[2] + config.QPROBE_DISTANCE[0],
214
- left_primer[2] + config.QPROBE_DISTANCE[1] + 1
214
+ left_primer[2] + config.QPROBE_DISTANCE[1]
215
215
  ):
216
216
  continue
217
217
  elif "RIGHT" in probe:
218
218
  if not right_primer[1] in range(
219
219
  qpcr_probes[probe][2] + config.QPROBE_DISTANCE[0],
220
- qpcr_probes[probe][2] + config.QPROBE_DISTANCE[1] + 1
220
+ qpcr_probes[probe][2] + config.QPROBE_DISTANCE[1]
221
221
 
222
222
  ):
223
223
  continue
@@ -258,7 +258,7 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
258
258
  there is no need to consider this primer probe combination.
259
259
  """
260
260
 
261
- qpcr_scheme_candidates = {}
261
+ qpcr_scheme_candidates = []
262
262
  found_amplicons = []
263
263
  amplicon_nr = -1
264
264
 
@@ -279,15 +279,16 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
279
279
  # populate the primer dictionary:
280
280
  amplicon_nr += 1
281
281
  found_amplicons.append(primer_combination)
282
- qpcr_scheme_candidates[f"AMPLICON_{amplicon_nr}"] = {
283
- "penalty": qpcr_probes[probe][3] + primer_combination[0][3] + primer_combination[1][3],
284
- "PROBE": qpcr_probes[probe],
285
- "LEFT": primer_combination[0],
286
- "RIGHT": primer_combination[1]
287
- }
282
+ qpcr_scheme_candidates.append(
283
+ {
284
+ "id": f"AMPLICON_{amplicon_nr}",
285
+ "penalty": qpcr_probes[probe][3] + primer_combination[0][3] + primer_combination[1][3],
286
+ "PROBE": qpcr_probes[probe],
287
+ "LEFT": primer_combination[0],
288
+ "RIGHT": primer_combination[1]
289
+ }
290
+ )
288
291
  # and again sort by total penalty (left + right + probe)
289
- qpcr_scheme_candidates = dict(sorted(qpcr_scheme_candidates.items(), key=lambda x: x[1]["penalty"]))
290
-
291
292
  return qpcr_scheme_candidates
292
293
 
293
294
 
@@ -296,21 +297,17 @@ def process_single_amplicon_deltaG(amplicon, majority_consensus):
296
297
  Process a single amplicon to test its deltaG and apply filtering.
297
298
  This function will be called concurrently by multiple threads.
298
299
  """
299
- name, data = amplicon
300
- start = data["LEFT"][1]
301
- stop = data["RIGHT"][2]
302
- seq = majority_consensus[start:stop]
300
+ seq = majority_consensus[amplicon["LEFT"][1]:amplicon["RIGHT"][2]]
303
301
  seq = seq.replace("N", "")
304
302
  seq = seq.replace("n", "")
305
- amp_positions = list(range(start, stop + 1))
306
303
  # check if the amplicon overlaps with an amplicon that was previously
307
304
  # found and had a high enough deltaG
308
- min_temp = min((primers.calc_temp(data["LEFT"][0]),
309
- primers.calc_temp(data["RIGHT"][0])))
305
+ min_temp = min((primers.calc_temp(amplicon["LEFT"][0]),
306
+ primers.calc_temp(amplicon["RIGHT"][0])))
310
307
  # calculate deltaG at the minimal primer temp
311
- deltaG = seqfold.dg(seq, min_temp)
308
+ amplicon["deltaG"] = seqfold.dg(seq, min_temp)
312
309
 
313
- return deltaG, amp_positions, name
310
+ return amplicon
314
311
 
315
312
 
316
313
  def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_threads):
@@ -319,29 +316,34 @@ def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n
319
316
  and filters if they fall below the cutoff. Multiple processes are used
320
317
  for processing amplicons in parallel.
321
318
  """
322
- final_schemes = {}
323
- passed_counter = 0 # counter for re-naming amplicons that passed deltaG cutoff
324
- amplicon_set = set()
319
+ final_amplicons = []
325
320
 
326
321
  # Create a pool of processes to handle the concurrent processing
327
322
  with multiprocessing.Pool(processes=n_threads) as pool:
328
323
  # Create a list of the first n amplicon tuples for processing
329
- amplicons = itertools.islice(qpcr_schemes_candidates.items(), n_to_test)
324
+ # The list is sorted first on whether offset targets were predicted for the amplicon,
325
+ # then by penalty. This ensures that amplicons with offset targets are always considered last
326
+ amplicons = itertools.islice(
327
+ sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])),
328
+ n_to_test
329
+ )
330
330
  # process amplicons concurrently
331
331
  results = pool.starmap(process_single_amplicon_deltaG, [(amp, majority_consensus) for amp in amplicons])
332
332
  # Process the results
333
- for deltaG, amp_positions, amp_name in results:
333
+ retained_ranges = []
334
+ for amp in results:
334
335
  # check if the amplicon overlaps with an amplicon that was previously
335
336
  # found and had a high enough deltaG
336
- if any(x in amp_positions for x in amplicon_set):
337
+ if amp["deltaG"] <= deltaG_cutoff:
337
338
  continue
338
- # and if this passes cutoff make a dict entry and do not allow further
339
- # amplicons in that region (they will have a lower penalty)
340
- if deltaG > deltaG_cutoff:
341
- new_name = f"QPCR_SCHEME_{passed_counter}"
342
- final_schemes[new_name] = qpcr_schemes_candidates[amp_name]
343
- final_schemes[new_name]["deltaG"] = deltaG
344
- amplicon_set.update(amp_positions)
345
- passed_counter += 1
346
-
347
- return final_schemes
339
+ amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
340
+ overlaps_retained = False
341
+ for r in retained_ranges:
342
+ if amp_range.start < r.stop and r.start < amp_range.stop:
343
+ overlaps_retained = True
344
+ break
345
+ if not overlaps_retained:
346
+ final_amplicons.append(amp)
347
+ retained_ranges.append(amp_range)
348
+
349
+ return final_amplicons