varvamp 1.1.3__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {varvamp-1.1.3 → varvamp-1.2.0}/PKG-INFO +1 -1
  2. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/__init__.py +1 -1
  3. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/command.py +38 -20
  4. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/blast.py +36 -66
  5. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/default_config.py +1 -2
  6. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/logging.py +0 -7
  7. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/primers.py +2 -2
  8. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/qpcr.py +38 -36
  9. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/reporting.py +156 -118
  10. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/scheme.py +115 -107
  11. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/PKG-INFO +1 -1
  12. {varvamp-1.1.3 → varvamp-1.2.0}/README.md +0 -0
  13. {varvamp-1.1.3 → varvamp-1.2.0}/setup.cfg +0 -0
  14. {varvamp-1.1.3 → varvamp-1.2.0}/setup.py +0 -0
  15. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/__main__.py +0 -0
  16. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/__init__.py +0 -0
  17. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/alignment.py +0 -0
  18. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/consensus.py +0 -0
  19. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/get_config.py +0 -0
  20. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/param_estimation.py +0 -0
  21. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp/scripts/regions.py +0 -0
  22. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/SOURCES.txt +0 -0
  23. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/dependency_links.txt +0 -0
  24. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/entry_points.txt +0 -0
  25. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/not-zip-safe +0 -0
  26. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/requires.txt +0 -0
  27. {varvamp-1.1.3 → varvamp-1.2.0}/varvamp.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: varvamp
3
- Version: 1.1.3
3
+ Version: 1.2.0
4
4
  Summary: Variable VirusAMPlicons (varVAMP) is a tool to design primers for highly diverse viruses
5
5
  Home-page: https://github.com/jonas-fuchs/varVAMP
6
6
  Author: Dr. Jonas Fuchs
@@ -1,3 +1,3 @@
1
1
  """Tool to design amplicons for highly variable virusgenomes"""
2
2
  _program = "varvamp"
3
- __version__ = "1.1.3"
3
+ __version__ = "1.2.0"
@@ -314,9 +314,9 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
314
314
 
315
315
  if args.database is not None:
316
316
  # create blast query
317
- query_path = blast.create_BLAST_query(all_primers, amplicons, data_dir)
317
+ query_path = blast.create_BLAST_query(amplicons, data_dir)
318
318
  # perform primer blast
319
- amplicons, off_target_amplicons = blast.primer_blast(
319
+ amplicons = blast.primer_blast(
320
320
  data_dir,
321
321
  args.database,
322
322
  query_path,
@@ -326,10 +326,8 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_
326
326
  log_file,
327
327
  mode="single_tiled"
328
328
  )
329
- else:
330
- off_target_amplicons = []
331
329
 
332
- return all_primers, amplicons, off_target_amplicons
330
+ return all_primers, amplicons
333
331
 
334
332
 
335
333
  def single_workflow(args, amplicons, all_primers, log_file):
@@ -337,12 +335,12 @@ def single_workflow(args, amplicons, all_primers, log_file):
337
335
  workflow part specific for single mode
338
336
  """
339
337
 
340
- amplicon_scheme = scheme.find_single_amplicons(amplicons, all_primers, args.report_n)
338
+ amplicon_scheme = scheme.find_single_amplicons(amplicons, args.report_n)
341
339
  logging.varvamp_progress(
342
340
  log_file,
343
341
  progress=0.9,
344
342
  job="Finding amplicons with low penalties.",
345
- progress_text=f"{len(amplicon_scheme[0])} amplicons."
343
+ progress_text=f"{len(amplicon_scheme)} amplicons."
346
344
  )
347
345
 
348
346
  return amplicon_scheme
@@ -359,8 +357,7 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
359
357
  # search for amplicon scheme
360
358
  coverage, amplicon_scheme = scheme.find_best_covering_scheme(
361
359
  amplicons,
362
- amplicon_graph,
363
- all_primers
360
+ amplicon_graph
364
361
  )
365
362
 
366
363
  # check for dimers
@@ -377,12 +374,13 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
377
374
  reporting.write_dimers(results_dir, dimers_not_solved)
378
375
 
379
376
  # evaluate coverage
377
+ # ATTENTION: Genome coverage of the scheme might still change slightly through resolution of primer dimers, but this potential, minor inaccuracy is currently accepted.
380
378
  percent_coverage = round(coverage/len(ambiguous_consensus)*100, 2)
381
379
  logging.varvamp_progress(
382
380
  log_file,
383
381
  progress=0.9,
384
382
  job="Creating amplicon scheme.",
385
- progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme[0]) + len(amplicon_scheme[1])} amplicons"
383
+ progress_text=f"{percent_coverage} % total coverage with {len(amplicon_scheme)} amplicons"
386
384
  )
387
385
  if percent_coverage < 70:
388
386
  logging.raise_error(
@@ -450,9 +448,9 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
450
448
  # run blast if db is given
451
449
  if args.database is not None:
452
450
  # create blast query
453
- query_path = blast.create_BLAST_query_qpcr(qpcr_scheme_candidates, data_dir)
451
+ query_path = blast.create_BLAST_query(qpcr_scheme_candidates, data_dir, mode="qpcr")
454
452
  # perform primer blast
455
- amplicons, off_target_amplicons = blast.primer_blast(
453
+ qpcr_scheme_candidates = blast.primer_blast(
456
454
  data_dir,
457
455
  args.database,
458
456
  query_path,
@@ -470,9 +468,6 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
470
468
  log_file,
471
469
  exit=True
472
470
  )
473
- # report potential blast warnings
474
- if args.database is not None:
475
- blast.write_BLAST_warning(off_target_amplicons, final_schemes, log_file)
476
471
  logging.varvamp_progress(
477
472
  log_file,
478
473
  progress=0.9,
@@ -506,9 +501,21 @@ def main(sysargs=sys.argv[1:]):
506
501
  reporting.write_fasta(data_dir, "majority_consensus", majority_consensus)
507
502
  reporting.write_fasta(results_dir, "ambiguous_consensus", ambiguous_consensus)
508
503
 
504
+ # Functions called from here on return lists of amplicons that are refined step-wise into final schemes.
505
+ # These lists that are passed between functions and later used for reporting consist of dictionary elemnts,
506
+ # which represent individual amplicons. A minimal amplicon dict could take the form:
507
+ # {
508
+ # "id": amplicon_name,
509
+ # "penalty": amplicon_cost,
510
+ # "length": amplicon_length,
511
+ # "LEFT": [left primer data],
512
+ # "RIGHT": [right primer data]
513
+ # }
514
+ # to which different functions may add additional information.
515
+
509
516
  # SINGLE/TILED mode
510
517
  if args.mode == "tiled" or args.mode == "single":
511
- all_primers, amplicons, off_target_amplicons = single_and_tiled_shared_workflow(
518
+ all_primers, amplicons = single_and_tiled_shared_workflow(
512
519
  args,
513
520
  left_primer_candidates,
514
521
  right_primer_candidates,
@@ -533,15 +540,22 @@ def main(sysargs=sys.argv[1:]):
533
540
  log_file,
534
541
  results_dir
535
542
  )
536
- if args.database is not None:
537
- blast.write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file)
543
+
538
544
  # write files
545
+
546
+ if args.mode == "tiled":
547
+ # assign amplicon numbers from 5' to 3' along the genome
548
+ amplicon_scheme.sort(key=lambda x: x["LEFT"][1])
549
+ else:
550
+ # make sure amplicons with no off-target products and with low penalties get the lowest numbers
551
+ amplicon_scheme.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
539
552
  reporting.write_all_primers(data_dir, all_primers)
540
553
  reporting.write_scheme_to_files(
541
554
  results_dir,
542
555
  amplicon_scheme,
543
556
  ambiguous_consensus,
544
- args.mode
557
+ args.mode,
558
+ log_file
545
559
  )
546
560
  reporting.varvamp_plot(
547
561
  results_dir,
@@ -564,9 +578,13 @@ def main(sysargs=sys.argv[1:]):
564
578
  right_primer_candidates,
565
579
  log_file
566
580
  )
581
+
567
582
  # write files
583
+
584
+ # make sure amplicons with no off-target products and with low penalties get the lowest numbers
585
+ final_schemes.sort(key=lambda x: (x.get("off_targets", False), x["penalty"]))
568
586
  reporting.write_regions_to_bed(probe_regions, data_dir, "probe")
569
- reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus)
587
+ reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, log_file)
570
588
  reporting.varvamp_plot(
571
589
  results_dir,
572
590
  alignment_cleaned,
@@ -29,41 +29,24 @@ def check_BLAST_installation(log_file):
29
29
  logging.raise_error("BLASTN is not installed", log_file, exit=True)
30
30
 
31
31
 
32
- def create_BLAST_query(all_primers, amplicons, data_dir):
32
+ def create_BLAST_query(amplicons, data_dir, mode="single_tiled"):
33
33
  """
34
- create a query for the BLAST search (tiled, single mode)
34
+ create a query for the BLAST search
35
35
  """
36
- already_written = []
37
-
38
36
  query_path = os.path.join(data_dir, "BLAST_query.fasta")
39
- with open(query_path, "w") as query:
40
- for amp in amplicons:
41
- fw_primer, rv_primer = amplicons[amp][2], amplicons[amp][3]
42
- if fw_primer not in already_written:
43
- print(f">{fw_primer}\n{all_primers['+'][fw_primer][0]}", file=query)
44
- already_written.append(fw_primer)
45
- if rv_primer not in already_written:
46
- print(f">{rv_primer}\n{all_primers['-'][rv_primer][0]}", file=query)
47
- already_written.append(rv_primer)
48
-
49
- return query_path
50
-
51
-
52
- def create_BLAST_query_qpcr(qpcr_scheme_candidates, data_dir):
53
- """
54
- create a query for the BLAST search (qpcr mode)
55
- """
56
- already_written = []
37
+ if mode == "single_tiled":
38
+ primer_types = ["LEFT", "RIGHT"]
39
+ elif mode == "qpcr":
40
+ primer_types = ["PROBE", "LEFT", "RIGHT"]
41
+ already_written = set()
57
42
 
58
- query_path = os.path.join(data_dir, "BLAST_query.fasta")
59
43
  with open(query_path, "w") as query:
60
- for amp in qpcr_scheme_candidates:
61
- for primer_type in ["PROBE", "LEFT", "RIGHT"]:
62
- name = f"{primer_type}_{qpcr_scheme_candidates[amp][primer_type][1]}_{qpcr_scheme_candidates[amp][primer_type][2]}"
63
- if name in already_written:
64
- continue
65
- print(f">{name}\n{qpcr_scheme_candidates[amp][primer_type][0]}", file=query)
66
- already_written.append(name)
44
+ for amp in amplicons:
45
+ for primer_type in primer_types:
46
+ name = f"{primer_type}_{amp[primer_type][1]}_{amp[primer_type][2]}"
47
+ if name not in already_written:
48
+ print(f">{name}\n{amp[primer_type][0]}", file=query)
49
+ already_written.add(name)
67
50
  return query_path
68
51
 
69
52
 
@@ -168,21 +151,24 @@ def predict_non_specific_amplicons_worker(amp, blast_df, max_length, mode):
168
151
  """
169
152
  Worker function to predict unspecific targets for a single amplicon.
170
153
  """
171
- name, data = amp
172
154
  # get correct primers
173
155
  if mode == "single_tiled":
174
- primers = [data[2], data[3]]
156
+ primer_types = ["LEFT", "RIGHT"]
175
157
  elif mode == "qpcr":
176
- primers = []
177
- for primer_type in ["PROBE", "LEFT", "RIGHT"]:
178
- primers.append(f"{primer_type}_{data[primer_type][1]}_{data[primer_type][2]}")
158
+ primer_types = ["PROBE", "LEFT", "RIGHT"]
159
+ primers = []
160
+ for primer_type in primer_types:
161
+ primers.append(f"{primer_type}_{amp[primer_type][1]}_{amp[primer_type][2]}")
179
162
  # subset df for primers
180
163
  df_amp_primers = blast_df[blast_df["query"].isin(primers)]
181
164
  # sort by reference and ref start
182
165
  df_amp_primers_sorted = df_amp_primers.sort_values(["ref", "ref_start"])
183
166
  # check for off-targets for specific primers
184
167
  if check_off_targets(df_amp_primers_sorted, max_length, primers):
185
- return name
168
+ amp["off_targets"] = True
169
+ else:
170
+ amp["off_targets"] = False
171
+ return amp
186
172
 
187
173
 
188
174
  def predict_non_specific_amplicons(amplicons, blast_df, max_length, mode, n_threads):
@@ -190,22 +176,16 @@ def predict_non_specific_amplicons(amplicons, blast_df, max_length, mode, n_thre
190
176
  Main function to predict unspecific targets within a size range and give
191
177
  these primers a high penalty. Uses multiprocessing for parallelization.
192
178
  """
193
- off_targets = []
194
179
  # process amplicons concurrently
195
180
  with multiprocessing.Pool(processes=n_threads) as pool:
196
- amp_items = amplicons.items()
197
- results = pool.starmap(predict_non_specific_amplicons_worker, [(amp, blast_df, max_length, mode) for amp in amp_items])
198
- # check results
199
- for off_target in results:
200
- if off_target is None:
201
- continue
202
- off_targets.append(off_target)
203
- if mode == "single_tiled":
204
- amplicons[off_target][5] = amplicons[off_target][5] + config.BLAST_PENALTY
205
- elif mode == "qpcr":
206
- amplicons[off_target]["penalty"] = amplicons[off_target]["penalty"] + config.BLAST_PENALTY
207
-
208
- return off_targets, amplicons
181
+ annotated_amps = [
182
+ result for result in pool.starmap(
183
+ predict_non_specific_amplicons_worker,
184
+ [(amp, blast_df, max_length, mode) for amp in amplicons]
185
+ ) if result is not None
186
+ ]
187
+ n_off_targets = sum(amp["off_targets"] for amp in annotated_amps)
188
+ return n_off_targets, annotated_amps
209
189
 
210
190
 
211
191
  def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log_file, mode):
@@ -237,14 +217,17 @@ def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log
237
217
 
238
218
  blast_df = parse_and_filter_BLAST_output(blast_out)
239
219
  print("Predicting non-specific amplicons...")
240
- off_target_amplicons, amplicons = predict_non_specific_amplicons(
220
+ n_off_targets, amplicons = predict_non_specific_amplicons(
241
221
  amplicons,
242
222
  blast_df,
243
223
  max_length,
244
224
  mode,
245
225
  n_threads
246
226
  )
247
- success_text = f"varVAMP successfully predicted non-specific amplicons:\n\t> {len(off_target_amplicons)}/{len(amplicons)} amplicons could produce amplicons with the blast db.\n\t> raised their amplicon penalty by {config.BLAST_PENALTY}"
227
+ if n_off_targets > 0:
228
+ success_text = f"varVAMP predicted non-specific amplicons:\n\t> {n_off_targets}/{len(amplicons)} amplicons could produce amplicons with the blast db.\n\t> will attempt to avoid them in the final list of amplicons"
229
+ else:
230
+ success_text = f"NO off-target amplicons found with the blast db and a total of {len(amplicons)} amplicons"
248
231
  print(success_text)
249
232
  with open(log_file, 'a') as f:
250
233
  print(
@@ -253,18 +236,5 @@ def primer_blast(data_dir, db, query_path, amplicons, max_length, n_threads, log
253
236
  )
254
237
  print("\n#### off-target search finished ####\n")
255
238
 
256
- return amplicons, off_target_amplicons
257
-
239
+ return amplicons
258
240
 
259
- def write_BLAST_warning(off_target_amplicons, amplicon_scheme, log_file):
260
- """
261
- for each primer pair that has potential unspecific amplicons
262
- write warnings to file.
263
- """
264
- for amp in off_target_amplicons:
265
- if amp in amplicon_scheme:
266
- logging.raise_error(
267
- f"{amp} could produce off-targets. No better amplicon in this area was found.",
268
- log_file,
269
- exit=False,
270
- )
@@ -4,7 +4,7 @@ This contains all varVAMP parameters.
4
4
 
5
5
  # List of all known parameters. DO NOT CHANGE!
6
6
  __all__ = [
7
- 'BLAST_MAX_DIFF', 'BLAST_PENALTY', 'BLAST_SETTINGS', 'BLAST_SIZE_MULTI',
7
+ 'BLAST_MAX_DIFF', 'BLAST_SETTINGS', 'BLAST_SIZE_MULTI',
8
8
  'END_OVERLAP',
9
9
  'PCR_DNA_CONC', 'PCR_DNTP_CONC', 'PCR_DV_CONC', 'PCR_MV_CONC',
10
10
  'PRIMER_3_PENALTY', 'PRIMER_GC_END', 'PRIMER_GC_PENALTY',
@@ -74,7 +74,6 @@ BLAST_SETTINGS = { # blast settings for query search
74
74
  }
75
75
  BLAST_MAX_DIFF = 0.5 # min percent match between primer and BLAST hit (coverage and/or mismatches)
76
76
  BLAST_SIZE_MULTI = 2 # multiplier for the max_amp size of off targets (in relation to max amp size)
77
- BLAST_PENALTY = 50 # amplicon penalty increase -> considered only if no other possibilities
78
77
 
79
78
  # nucleotide definitions, do NOT change
80
79
  NUCS = set("atcg")
@@ -291,7 +291,6 @@ def confirm_config(args, log_file):
291
291
  (
292
292
  "BLAST_MAX_DIFF",
293
293
  "BLAST_SIZE_MULTI",
294
- "BLAST_PENALTY"
295
294
  )
296
295
  ]
297
296
 
@@ -384,7 +383,6 @@ def confirm_config(args, log_file):
384
383
  ("qpcr deletion size still considered for deltaG calculation", config.QAMPLICON_DEL_CUTOFF),
385
384
  ("maximum difference between primer and blast db", config.BLAST_MAX_DIFF),
386
385
  ("multiplier of the maximum length for non-specific amplicons", config.BLAST_SIZE_MULTI),
387
- ("blast penalty for off targets", config.BLAST_PENALTY)
388
386
  ]
389
387
  for var_type, var in non_negative_var:
390
388
  if var < 0:
@@ -468,11 +466,6 @@ def confirm_config(args, log_file):
468
466
  log_file,
469
467
  exit=True
470
468
  )
471
- if config.BLAST_PENALTY < 10:
472
- raise_error(
473
- "giving a too small penalty could result in the selection of off-target producing amplicons in the final scheme.",
474
- log_file,
475
- )
476
469
  # confirm proper BLAST settings in dictionary
477
470
  if not isinstance(config.BLAST_SETTINGS, dict):
478
471
  raise_error(
@@ -386,13 +386,13 @@ def find_best_primers(left_primer_candidates, right_primer_candidates):
386
386
  primer_candidates.sort(key=lambda x: (x[3], x[1]))
387
387
  # ini everything with the primer with the lowest penalty
388
388
  to_retain = [primer_candidates[0]]
389
- primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]+1))
389
+ primer_ranges = list(range(primer_candidates[0][1], primer_candidates[0][2]))
390
390
  primer_set = set(primer_ranges)
391
391
 
392
392
  for primer in primer_candidates:
393
393
  # get the thirds of the primer, only consider the middle
394
394
  thirds_len = int((primer[2] - primer[1])/3)
395
- primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len + 1))
395
+ primer_positions = list(range(primer[1] + thirds_len, primer[2] - thirds_len))
396
396
  # check if none of the nucleotides of the next primer
397
397
  # are already covered by a better primer
398
398
  if not any(x in primer_positions for x in primer_set):
@@ -211,13 +211,13 @@ def assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_con
211
211
  if "LEFT" in probe:
212
212
  if not qpcr_probes[probe][1] in range(
213
213
  left_primer[2] + config.QPROBE_DISTANCE[0],
214
- left_primer[2] + config.QPROBE_DISTANCE[1] + 1
214
+ left_primer[2] + config.QPROBE_DISTANCE[1]
215
215
  ):
216
216
  continue
217
217
  elif "RIGHT" in probe:
218
218
  if not right_primer[1] in range(
219
219
  qpcr_probes[probe][2] + config.QPROBE_DISTANCE[0],
220
- qpcr_probes[probe][2] + config.QPROBE_DISTANCE[1] + 1
220
+ qpcr_probes[probe][2] + config.QPROBE_DISTANCE[1]
221
221
 
222
222
  ):
223
223
  continue
@@ -258,7 +258,7 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
258
258
  there is no need to consider this primer probe combination.
259
259
  """
260
260
 
261
- qpcr_scheme_candidates = {}
261
+ qpcr_scheme_candidates = []
262
262
  found_amplicons = []
263
263
  amplicon_nr = -1
264
264
 
@@ -279,15 +279,16 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
279
279
  # populate the primer dictionary:
280
280
  amplicon_nr += 1
281
281
  found_amplicons.append(primer_combination)
282
- qpcr_scheme_candidates[f"AMPLICON_{amplicon_nr}"] = {
283
- "penalty": qpcr_probes[probe][3] + primer_combination[0][3] + primer_combination[1][3],
284
- "PROBE": qpcr_probes[probe],
285
- "LEFT": primer_combination[0],
286
- "RIGHT": primer_combination[1]
287
- }
282
+ qpcr_scheme_candidates.append(
283
+ {
284
+ "id": f"AMPLICON_{amplicon_nr}",
285
+ "penalty": qpcr_probes[probe][3] + primer_combination[0][3] + primer_combination[1][3],
286
+ "PROBE": qpcr_probes[probe],
287
+ "LEFT": primer_combination[0],
288
+ "RIGHT": primer_combination[1]
289
+ }
290
+ )
288
291
  # and again sort by total penalty (left + right + probe)
289
- qpcr_scheme_candidates = dict(sorted(qpcr_scheme_candidates.items(), key=lambda x: x[1]["penalty"]))
290
-
291
292
  return qpcr_scheme_candidates
292
293
 
293
294
 
@@ -296,21 +297,17 @@ def process_single_amplicon_deltaG(amplicon, majority_consensus):
296
297
  Process a single amplicon to test its deltaG and apply filtering.
297
298
  This function will be called concurrently by multiple threads.
298
299
  """
299
- name, data = amplicon
300
- start = data["LEFT"][1]
301
- stop = data["RIGHT"][2]
302
- seq = majority_consensus[start:stop]
300
+ seq = majority_consensus[amplicon["LEFT"][1]:amplicon["RIGHT"][2]]
303
301
  seq = seq.replace("N", "")
304
302
  seq = seq.replace("n", "")
305
- amp_positions = list(range(start, stop + 1))
306
303
  # check if the amplicon overlaps with an amplicon that was previously
307
304
  # found and had a high enough deltaG
308
- min_temp = min((primers.calc_temp(data["LEFT"][0]),
309
- primers.calc_temp(data["RIGHT"][0])))
305
+ min_temp = min((primers.calc_temp(amplicon["LEFT"][0]),
306
+ primers.calc_temp(amplicon["RIGHT"][0])))
310
307
  # calculate deltaG at the minimal primer temp
311
- deltaG = seqfold.dg(seq, min_temp)
308
+ amplicon["deltaG"] = seqfold.dg(seq, min_temp)
312
309
 
313
- return deltaG, amp_positions, name
310
+ return amplicon
314
311
 
315
312
 
316
313
  def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_threads):
@@ -319,29 +316,34 @@ def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n
319
316
  and filters if they fall below the cutoff. Multiple processes are used
320
317
  for processing amplicons in parallel.
321
318
  """
322
- final_schemes = {}
323
- passed_counter = 0 # counter for re-naming amplicons that passed deltaG cutoff
324
- amplicon_set = set()
319
+ final_amplicons = []
325
320
 
326
321
  # Create a pool of processes to handle the concurrent processing
327
322
  with multiprocessing.Pool(processes=n_threads) as pool:
328
323
  # Create a list of the first n amplicon tuples for processing
329
- amplicons = itertools.islice(qpcr_schemes_candidates.items(), n_to_test)
324
+ # The list is sorted first on whether offset targets were predicted for the amplicon,
325
+ # then by penalty. This ensures that amplicons with offset targets are always considered last
326
+ amplicons = itertools.islice(
327
+ sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])),
328
+ n_to_test
329
+ )
330
330
  # process amplicons concurrently
331
331
  results = pool.starmap(process_single_amplicon_deltaG, [(amp, majority_consensus) for amp in amplicons])
332
332
  # Process the results
333
- for deltaG, amp_positions, amp_name in results:
333
+ retained_ranges = []
334
+ for amp in results:
334
335
  # check if the amplicon overlaps with an amplicon that was previously
335
336
  # found and had a high enough deltaG
336
- if any(x in amp_positions for x in amplicon_set):
337
+ if amp["deltaG"] <= deltaG_cutoff:
337
338
  continue
338
- # and if this passes cutoff make a dict entry and do not allow further
339
- # amplicons in that region (they will have a lower penalty)
340
- if deltaG > deltaG_cutoff:
341
- new_name = f"QPCR_SCHEME_{passed_counter}"
342
- final_schemes[new_name] = qpcr_schemes_candidates[amp_name]
343
- final_schemes[new_name]["deltaG"] = deltaG
344
- amplicon_set.update(amp_positions)
345
- passed_counter += 1
346
-
347
- return final_schemes
339
+ amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
340
+ overlaps_retained = False
341
+ for r in retained_ranges:
342
+ if amp_range.start < r.stop and r.start < amp_range.stop:
343
+ overlaps_retained = True
344
+ break
345
+ if not overlaps_retained:
346
+ final_amplicons.append(amp)
347
+ retained_ranges.append(amp_range)
348
+
349
+ return final_amplicons