varvamp 1.1.2__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ from matplotlib.backends.backend_pdf import PdfPages
16
16
  # varVAMP
17
17
  from varvamp.scripts import primers
18
18
  from varvamp.scripts import config
19
+ from varvamp.scripts import logging
19
20
 
20
21
 
21
22
  def write_fasta(path, seq_id, seq):
@@ -48,10 +49,9 @@ def write_regions_to_bed(primer_regions, path, mode=None):
48
49
  outfile = f"{path}probe_regions.bed"
49
50
  else:
50
51
  outfile = f"{path}primer_regions.bed"
51
- counter = 0
52
52
 
53
53
  with open(outfile, 'w') as o:
54
- for region in primer_regions:
54
+ for counter, region in enumerate(primer_regions):
55
55
  print(
56
56
  "ambiguous_consensus",
57
57
  region[0],
@@ -60,7 +60,6 @@ def write_regions_to_bed(primer_regions, path, mode=None):
60
60
  sep="\t",
61
61
  file=o
62
62
  )
63
- counter += 1
64
63
 
65
64
 
66
65
  def write_primers_to_bed(outfile, primer_name, primer_properties, direction):
@@ -121,7 +120,7 @@ def calc_mean_stats(permutations):
121
120
  return round(gc/len(permutations), 1), round(temp/len(permutations), 1)
122
121
 
123
122
 
124
- def write_qpcr_to_files(path, final_schemes, ambiguous_consensus):
123
+ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file):
125
124
  """
126
125
  write all relevant bed files and tsv file for the qPCR design
127
126
  """
@@ -134,32 +133,43 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus):
134
133
 
135
134
  with open(tsv_file, "w") as tsv, open(tsv_file_2, "w") as tsv2, open(amplicon_bed_file, "w") as bed, open(primer_fasta_file, "w") as fasta:
136
135
  print(
137
- "qpcr_scheme\toligo_type\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty",
136
+ "qpcr_scheme\toligo_type\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty\toff_target_amplicons",
138
137
  file=tsv2
139
138
  )
140
139
  print(
141
- "qpcr_scheme\tpenalty\tdeltaG\tlength\tstart\tstop\tseq",
140
+ "qpcr_scheme\toff_target_amplicons\tpenalty\tdeltaG\tlength\tstart\tstop\tseq",
142
141
  file=tsv
143
142
  )
144
- for scheme in final_schemes:
143
+ for n, amp in enumerate(final_schemes):
144
+ amp_name = f"QPCR_SCHEME_{n}"
145
145
  # write bed amplicon file
146
146
  print(
147
147
  "ambiguous_consensus",
148
- final_schemes[scheme]["LEFT"][1],
149
- final_schemes[scheme]["RIGHT"][2],
150
- scheme,
151
- round(final_schemes[scheme]["penalty"], 1),
148
+ amp["LEFT"][1],
149
+ amp["RIGHT"][2],
150
+ amp_name,
151
+ round(amp["penalty"], 1),
152
+ ".",
152
153
  sep="\t",
153
154
  file=bed
154
155
  )
155
156
  # write tsv
156
- amplicon_start = final_schemes[scheme]["LEFT"][1]
157
- amplicon_stop = final_schemes[scheme]["RIGHT"][2]
157
+ amplicon_start = amp["LEFT"][1]
158
+ amplicon_stop = amp["RIGHT"][2]
159
+ if "off_targets" in amp:
160
+ if amp["off_targets"]:
161
+ amplicon_has_off_target = "Yes"
162
+ write_BLAST_warning(amp_name, log_file)
163
+ else:
164
+ amplicon_has_off_target = "No"
165
+ else:
166
+ amplicon_has_off_target = "n.d."
158
167
  amplicon_seq = ambiguous_consensus[amplicon_start:amplicon_stop]
159
168
  print(
160
- scheme,
161
- round(final_schemes[scheme]["penalty"], 1),
162
- final_schemes[scheme]["deltaG"],
169
+ amp_name,
170
+ amplicon_has_off_target,
171
+ round(amp["penalty"], 1),
172
+ amp["deltaG"],
163
173
  len(amplicon_seq),
164
174
  amplicon_start + 1,
165
175
  amplicon_stop,
@@ -168,11 +178,9 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus):
168
178
  file=tsv
169
179
  )
170
180
  # write tsv2
171
- for oligo_type in final_schemes[scheme]:
172
- if oligo_type == "penalty" or oligo_type == "deltaG":
173
- continue
174
- seq = ambiguous_consensus[final_schemes[scheme][oligo_type][1]:final_schemes[scheme][oligo_type][2]]
175
- if oligo_type == "RIGHT" or all([oligo_type == "PROBE", final_schemes[scheme]["PROBE"][5] == "-"]):
181
+ for oligo_type in ["LEFT", "PROBE", "RIGHT"]:
182
+ seq = ambiguous_consensus[amp[oligo_type][1]:amp[oligo_type][2]]
183
+ if oligo_type == "RIGHT" or (oligo_type == "PROBE" and amp["PROBE"][5] == "-"):
176
184
  seq = primers.rev_complement(seq)
177
185
  direction = "-"
178
186
  else:
@@ -182,32 +190,33 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus):
182
190
  gc, temp = calc_mean_stats(permutations)
183
191
 
184
192
  print(
185
- scheme,
193
+ amp_name,
186
194
  oligo_type,
187
- final_schemes[scheme][oligo_type][1] + 1,
188
- final_schemes[scheme][oligo_type][2],
195
+ amp[oligo_type][1] + 1,
196
+ amp[oligo_type][2],
189
197
  seq.upper(),
190
198
  len(seq),
191
- round(primers.calc_gc(final_schemes[scheme][oligo_type][0]), 1),
192
- round(primers.calc_temp(final_schemes[scheme][oligo_type][0]), 1),
199
+ round(primers.calc_gc(amp[oligo_type][0]), 1),
200
+ round(primers.calc_temp(amp[oligo_type][0]), 1),
193
201
  gc,
194
202
  temp,
195
- round(final_schemes[scheme][oligo_type][3], 1),
203
+ round(amp[oligo_type][3], 1),
204
+ amplicon_has_off_target,
196
205
  sep="\t",
197
206
  file=tsv2
198
207
  )
199
208
  # write primer bed file
200
209
  write_primers_to_bed(
201
210
  primer_bed_file,
202
- f"{scheme}_{oligo_type}",
203
- final_schemes[scheme][oligo_type],
211
+ f"{amp_name}_{oligo_type}",
212
+ amp[oligo_type],
204
213
  direction
205
214
  )
206
215
  # write fasta
207
- print(f">{scheme}_{oligo_type}\n{seq.upper()}", file=fasta)
216
+ print(f">{amp_name}_{oligo_type}\n{seq.upper()}", file=fasta)
208
217
 
209
218
 
210
- def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode):
219
+ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_file):
211
220
  """
212
221
  write all relevant bed files and a tsv file with all primer stats
213
222
  """
@@ -216,55 +225,60 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode):
216
225
  amplicon_bed_file = os.path.join(path, "amplicons.bed")
217
226
  tabular_file = os.path.join(path, "primer_to_amplicon_assignment.tabular")
218
227
 
219
- counter = 0
220
-
221
228
  # open files to write
222
229
  with open(tsv_file, "w") as tsv, open(amplicon_bed_file, "w") as bed, open(tabular_file, "w") as tabular:
223
230
  # write header for primer tsv
224
231
  print(
225
- "amlicon_name\tamplicon_length\tprimer_name\talternate_primer_name\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty",
232
+ "amlicon_name\tamplicon_length\tprimer_name\talternate_primer_name\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty\toff_target_amplicons",
226
233
  file=tsv
227
234
  )
228
-
229
- for pool in amplicon_scheme:
235
+ amplicon_bed_records = []
236
+ primer_bed_records = []
237
+ primer_assignment_records = []
238
+ pools = {amp.get("pool", 0) for amp in amplicon_scheme}
239
+ for pool in pools:
230
240
  if mode == "single":
231
241
  primer_fasta_file = os.path.join(path, "primers.fasta")
232
242
  else:
233
243
  primer_fasta_file = os.path.join(path, f"primers_pool_{pool}.fasta")
234
244
  with open(primer_fasta_file, "w") as primer_fasta:
235
- for amp in amplicon_scheme[pool]:
245
+ for counter, amp in enumerate(amplicon_scheme[pool::len(pools)]):
236
246
  # give a new amplicon name
237
- new_name = f"AMPLICON_{str(counter)}"
238
- counter += 1
247
+ amplicon_index = counter*len(pools) + pool
248
+ new_name = f"AMPLICON_{amplicon_index}"
239
249
  # get left and right primers and their names
240
- primer_names = list(amplicon_scheme[pool][amp].keys())
241
- left = (primer_names[0], amplicon_scheme[pool][amp][primer_names[0]])
242
- right = (primer_names[1], amplicon_scheme[pool][amp][primer_names[1]])
243
- amp_length = right[1][2] - left[1][1]
250
+ amp_length = amp["RIGHT"][2] - amp["LEFT"][1]
251
+ if "off_targets" in amp:
252
+ if amp["off_targets"]:
253
+ amplicon_has_off_target = "Yes"
254
+ write_BLAST_warning(amp_name, log_file)
255
+ else:
256
+ amplicon_has_off_target = "No"
257
+ else:
258
+ amplicon_has_off_target = "n.d."
244
259
  # write amplicon bed
245
260
  if mode == "tiled":
246
261
  bed_score = pool
247
262
  elif mode == "single":
248
- bed_score = round(left[1][3] + right[1][3], 1)
249
- print(
250
- "ambiguous_consensus",
251
- left[1][1],
252
- right[1][2],
253
- new_name,
254
- bed_score,
255
- sep="\t",
256
- file=bed
263
+ bed_score = round(amp["LEFT"][3] + amp["RIGHT"][3], 1)
264
+ amplicon_bed_records.append(
265
+ (
266
+ amp["LEFT"][1],
267
+ amp["RIGHT"][2],
268
+ new_name,
269
+ bed_score
270
+ )
257
271
  )
258
- # write primer assignments tabular file
259
- print(
260
- f"{new_name}_LEFT",
261
- f"{new_name}_RIGHT",
262
- sep="\t",
263
- file=tabular
272
+ primer_assignment_records.append(
273
+ (
274
+ # will need amplicon_index for sorting
275
+ amplicon_index,
276
+ (f"{new_name}_LEFT", f"{new_name}_RIGHT")
277
+ )
264
278
  )
265
279
  # write primer tsv and primer bed
266
- for direction, primer in [("+", left), ("-", right)]:
267
- seq = ambiguous_consensus[primer[1][1]:primer[1][2]]
280
+ for direction, primer in [("+", amp["LEFT"]), ("-", amp["RIGHT"])]:
281
+ seq = ambiguous_consensus[primer[1]:primer[2]]
268
282
  if direction == "-":
269
283
  seq = primers.rev_complement(seq)
270
284
  primer_name = f"{new_name}_RIGHT"
@@ -280,27 +294,50 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode):
280
294
  new_name,
281
295
  amp_length,
282
296
  primer_name,
283
- primer[0],
297
+ primer[-1],
284
298
  pool,
285
- primer[1][1] + 1,
286
- primer[1][2],
299
+ primer[1] + 1,
300
+ primer[2],
287
301
  seq.upper(),
288
- len(primer[1][0]),
289
- round(primers.calc_gc(primer[1][0]), 1),
290
- round(primers.calc_temp(primer[1][0]), 1),
302
+ len(primer[0]),
303
+ round(primers.calc_gc(primer[0]), 1),
304
+ round(primers.calc_temp(primer[0]), 1),
291
305
  gc,
292
306
  temp,
293
- round(primer[1][3], 1),
307
+ round(primer[3], 1),
308
+ amplicon_has_off_target,
294
309
  sep="\t",
295
310
  file=tsv
296
311
  )
297
- # write primer bed file
298
- write_primers_to_bed(
299
- primer_bed_file,
300
- primer_name,
301
- primer[1],
302
- direction
312
+ primer_bed_records.append(
313
+ (
314
+ # will need amplicon_index for sorting
315
+ amplicon_index,
316
+ (primer_name, primer, direction)
317
+ )
303
318
  )
319
+ # write amplicon bed with amplicons sorted by start position
320
+ for record in sorted(amplicon_bed_records, key=lambda x: x[0]):
321
+ print(
322
+ "ambiguous_consensus",
323
+ *record,
324
+ ".",
325
+ sep="\t",
326
+ file=bed
327
+ )
328
+ # use sorting by amplicon index for primer assignment file
329
+ for record in sorted(primer_assignment_records):
330
+ print(
331
+ *record[1],
332
+ sep="\t",
333
+ file=tabular
334
+ )
335
+ # same for primer bed
336
+ for record in sorted(primer_bed_records):
337
+ write_primers_to_bed(
338
+ primer_bed_file,
339
+ *record[1]
340
+ )
304
341
 
305
342
 
306
343
  def write_dimers(path, primer_dimers):
@@ -313,12 +350,12 @@ def write_dimers(path, primer_dimers):
313
350
  "pool\tprimer_name_1\tprimer_name_2\tdimer melting temp",
314
351
  file=tsv
315
352
  )
316
- for dimers in primer_dimers:
353
+ for pool, primer1, primer2 in primer_dimers:
317
354
  print(
318
- dimers[0][0],
319
- dimers[0][2],
320
- dimers[1][2],
321
- round(primers.calc_dimer(dimers[0][3][0], dimers[1][3][0]).tm, 1),
355
+ pool,
356
+ primer1[1],
357
+ primer2[1],
358
+ round(primers.calc_dimer(primer1[2][0], primer2[2][0]).tm, 1),
322
359
  sep="\t",
323
360
  file=tsv
324
361
  )
@@ -418,26 +455,23 @@ def amplicon_subplot(ax, amplicon_scheme):
418
455
  """
419
456
  creates the amplicon subplot
420
457
  """
421
- counter = 0
422
- for pool in amplicon_scheme:
423
- for amp in amplicon_scheme[pool]:
424
- if pool == 0:
425
- position_amp = 0.7
426
- position_text = 0.6
427
- elif pool == 1:
428
- position_amp = 0.6
429
- position_text = 0.65
430
- primer_names = [i for i in amplicon_scheme[pool][amp]]
431
- left = amplicon_scheme[pool][amp][primer_names[0]]
432
- right = amplicon_scheme[pool][amp][primer_names[1]]
433
- # amplicons
434
- ax[1].hlines(position_amp, left[1], right[2], linewidth=5)
435
- # text
436
- ax[1].text(right[2] - (right[2]-left[1])/2, position_text, str(counter), fontsize=8)
437
- # primers
438
- ax[1].hlines(position_amp, left[1], left[2], linewidth=5, color="red")
439
- ax[1].hlines(position_amp, right[1], right[2], linewidth=5, color="red")
440
- counter += 1
458
+ for counter, amp in enumerate(amplicon_scheme):
459
+ pool = amp.get("pool", 0)
460
+ if pool == 0:
461
+ position_amp = 0.7
462
+ position_text = 0.6
463
+ elif pool == 1:
464
+ position_amp = 0.6
465
+ position_text = 0.65
466
+ left = amp["LEFT"]
467
+ right = amp["RIGHT"]
468
+ # amplicons
469
+ ax[1].hlines(position_amp, left[1], right[2], linewidth=5)
470
+ # text
471
+ ax[1].text(right[2] - (right[2]-left[1])/2, position_text, str(counter), fontsize=8)
472
+ # primers
473
+ ax[1].hlines(position_amp, left[1], left[2], linewidth=5, color="red")
474
+ ax[1].hlines(position_amp, right[1], right[2], linewidth=5, color="red")
441
475
  # legends
442
476
  ax[1].hlines(position_amp, left[1]+config.PRIMER_SIZES[1], right[2]-config.PRIMER_SIZES[1], linewidth=5, label="amplicons")
443
477
  ax[1].hlines(position_amp, left[1], left[2], linewidth=5, color="red", label="primers")
@@ -447,12 +481,10 @@ def qpcr_subplot(ax, amplicon_scheme):
447
481
  """
448
482
  creates the qpcr subplot
449
483
  """
450
- counter = 0
451
-
452
- for scheme in amplicon_scheme:
453
- left = amplicon_scheme[scheme]["LEFT"]
454
- right = amplicon_scheme[scheme]["RIGHT"]
455
- probe = amplicon_scheme[scheme]["PROBE"]
484
+ for counter, amp in enumerate(amplicon_scheme):
485
+ left = amp["LEFT"]
486
+ right = amp["RIGHT"]
487
+ probe = amp["PROBE"]
456
488
  # amplicons
457
489
  ax[1].hlines(0.8, left[1], right[2], linewidth=5)
458
490
  # text
@@ -463,7 +495,6 @@ def qpcr_subplot(ax, amplicon_scheme):
463
495
  # probe
464
496
  ax[1].hlines(0.75, probe[1], probe[2], linewidth=5, color="darkgrey")
465
497
 
466
- counter += 1
467
498
  # legends
468
499
  ax[1].hlines(0.8, left[1]+config.PRIMER_SIZES[1], right[2]-config.PRIMER_SIZES[1], linewidth=5, label="amplicons")
469
500
  ax[1].hlines(0.8, left[1], left[2], linewidth=5, color="red", label="primers")
@@ -515,13 +546,10 @@ def get_SINGLE_TILED_primers_for_plot(amplicon_scheme):
515
546
  """
516
547
  amplicon_primers = []
517
548
 
518
- for pool in amplicon_scheme:
519
- for amp in amplicon_scheme[pool]:
520
- primer_names = [i for i in amplicon_scheme[pool][amp]]
521
- left = amplicon_scheme[pool][amp][primer_names[0]]
522
- right = amplicon_scheme[pool][amp][primer_names[1]]
523
- amplicon_primers.append((primer_names[0], left))
524
- amplicon_primers.append((primer_names[1], right))
549
+ for counter, amp in enumerate(amplicon_scheme):
550
+ for type in ["LEFT", "RIGHT"]:
551
+ primer_name = f"AMPLICON_{counter}_{type}"
552
+ amplicon_primers.append((primer_name, amp[type]))
525
553
 
526
554
  return amplicon_primers
527
555
 
@@ -532,12 +560,10 @@ def get_QPCR_primers_for_plot(amplicon_schemes):
532
560
  """
533
561
  amplicon_primers = []
534
562
 
535
- for scheme in amplicon_schemes:
536
- for type in amplicon_schemes[scheme]:
537
- if type == "penalty" or type == "deltaG":
538
- continue
539
- primer_name = f"{scheme}_{type}"
540
- amplicon_primers.append((primer_name, amplicon_schemes[scheme][type]))
563
+ for counter, amp in enumerate(amplicon_schemes):
564
+ for type in ["PROBE", "LEFT", "RIGHT"]:
565
+ primer_name = f"QPCR_SCHEME_{counter}_{type}"
566
+ amplicon_primers.append((primer_name, amp[type]))
541
567
 
542
568
  return amplicon_primers
543
569
 
@@ -581,3 +607,15 @@ def per_base_mismatch_plot(path, amplicon_scheme, threshold, mode="SINGLE/TILED"
581
607
  # - to pdf
582
608
  pdf.savefig(fig, bbox_inches='tight')
583
609
  plt.close()
610
+
611
+
612
+ def write_BLAST_warning(amplicon_name, log_file):
613
+ """
614
+ for each primer pair that has potential unspecific amplicons
615
+ write warnings to file.
616
+ """
617
+ logging.raise_error(
618
+ f"{amplicon_name} could produce off-targets. No better amplicon in this area was found.",
619
+ log_file,
620
+ exit=False,
621
+ )