toulligqc 2.5.3__tar.gz → 2.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {toulligqc-2.5.3 → toulligqc-2.5.5}/PKG-INFO +1 -1
  2. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/bam_extractor.py +30 -20
  3. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/fastq_bam_common.py +17 -2
  4. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/plotly_graph_common.py +12 -5
  5. toulligqc-2.5.5/toulligqc/version.py +1 -0
  6. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/PKG-INFO +1 -1
  7. toulligqc-2.5.3/toulligqc/version.py +0 -1
  8. {toulligqc-2.5.3 → toulligqc-2.5.5}/AUTHORS +0 -0
  9. {toulligqc-2.5.3 → toulligqc-2.5.5}/LICENSE-CeCILL.txt +0 -0
  10. {toulligqc-2.5.3 → toulligqc-2.5.5}/LICENSE.txt +0 -0
  11. {toulligqc-2.5.3 → toulligqc-2.5.5}/MANIFEST.in +0 -0
  12. {toulligqc-2.5.3 → toulligqc-2.5.5}/README.md +0 -0
  13. {toulligqc-2.5.3 → toulligqc-2.5.5}/setup.cfg +0 -0
  14. {toulligqc-2.5.3 → toulligqc-2.5.5}/setup.py +0 -0
  15. {toulligqc-2.5.3 → toulligqc-2.5.5}/test/test_sequencing_summary_extractor.py +0 -0
  16. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/__init__.py +0 -0
  17. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/common.py +0 -0
  18. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/common_statistics.py +0 -0
  19. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/configuration.py +0 -0
  20. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/extractor_common.py +0 -0
  21. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/fast5_extractor.py +0 -0
  22. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/fastq_extractor.py +0 -0
  23. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/html_report_generator.py +0 -0
  24. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/plotly_graph_generator.py +0 -0
  25. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
  26. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/report_data_file_generator.py +0 -0
  27. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/resources/plotly-latest.min.js +0 -0
  28. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/resources/toulligqc.css +0 -0
  29. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/resources/toulligqc.png +0 -0
  30. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/sequencing_summary_extractor.py +0 -0
  31. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
  32. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/sequencing_telemetry_extractor.py +0 -0
  33. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/toulligqc.py +0 -0
  34. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/toulligqc_info_extractor.py +0 -0
  35. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/SOURCES.txt +0 -0
  36. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/dependency_links.txt +0 -0
  37. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/entry_points.txt +0 -0
  38. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/not-zip-safe +0 -0
  39. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/requires.txt +0 -0
  40. {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.5.3
3
+ Version: 2.5.5
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomicParisCentre/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -231,8 +231,10 @@ class uBAM_Extractor:
231
231
  """
232
232
  #def process_bam_chunk(bam_chunk):
233
233
  rec_data = []
234
+ record_count = 0
234
235
  for rec in uBAM_chunk:
235
- rec_dict = self._process_record(rec)
236
+ record_count += 1
237
+ rec_dict = self._process_record(rec, record_count)
236
238
  rec_data.append(rec_dict)
237
239
  return rec_data
238
240
 
@@ -258,35 +260,43 @@ class uBAM_Extractor:
258
260
 
259
261
 
260
262
  def _get_header(self):
261
- samfile = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
262
- header = samfile.header.to_dict()
263
- run_id, model_version_id = extract_headerTag(header,'RG','ID').split('_', 1)
263
+ sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
264
+ header = sam_file.header.to_dict()
265
+ run_id, model_version_id = extract_headerTag(header, 'RG','ID',
266
+ 'Unknown_Unknown').split('_', 1)
264
267
  self.header = {
265
- "run_id" : run_id,
266
- "run_date" : extract_headerTag(header, 'RG', 'DT'),
267
- "sample_id" : extract_headerTag(header,'RG','SM'),
268
- "basecaller" : extract_headerTag(header,'PG','PN'),
269
- "basecaller_version" : extract_headerTag(header,'PG','VN'),
270
- "model_version_id" : model_version_id,
271
- "flow_cell_id" : extract_headerTag(header,'RG','PU')
268
+ "run_id": run_id,
269
+ "run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
270
+ "sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
271
+ "basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
272
+ "basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
273
+ "model_version_id": model_version_id,
274
+ "flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
272
275
  }
273
276
 
274
277
 
275
- def _process_record(self, rec):
278
+ def _process_record(self, rec, record_count):
276
279
  """
277
280
  extract QC info from BAM record
278
281
  return : dict of QC info
279
282
  """
280
- tags = rec.split("\t")
281
- iso_start_time = tags[17].split(':',2)[2]
282
- qual = avg_qual(tags[10])
283
+ fields = rec.split("\t")
284
+
285
+ # Parse optional fields
286
+ attributes = {}
287
+ for t in fields[11:]:
288
+ k, t, v = t.split(':', 2)
289
+ attributes[k] = v
290
+
291
+ iso_start_time = attributes.get('st', None)
292
+ qual = avg_qual(fields[10])
283
293
  passes_filtering = True if qual > self.threshold_Qscore else False
284
294
  data = [
285
- len(tags[9]), # read length
295
+ len(fields[9]), # read length
286
296
  qual, # AVG Qscore
287
297
  passes_filtering, # Passing filter
288
- timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
289
- tags[16].split(':',2)[2], # Channel
290
- tags[12].split(':',2)[2] # Duration
298
+ float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
299
+ attributes.get('ch', '1'), # Channel
300
+ attributes.get('du', '1') # Duration
291
301
  ]
292
- return data
302
+ return data
@@ -2,8 +2,23 @@ import multiprocessing as mp
2
2
  from tqdm import tqdm
3
3
  from concurrent.futures import ProcessPoolExecutor, as_completed
4
4
 
5
- def extract_headerTag(header, tagGroup, tag):
6
- return header[tagGroup][0][tag]
5
+ def extract_headerTag(header, tagGroup, tag, defaultValue = None):
6
+
7
+ if tagGroup not in header:
8
+ if defaultValue is not None:
9
+ return defaultValue
10
+ else:
11
+ raise KeyError(tagGroup)
12
+
13
+ first_entry = header[tagGroup][0]
14
+
15
+ if tag not in first_entry:
16
+ if defaultValue is not None:
17
+ return defaultValue
18
+ else:
19
+ raise KeyError(tag)
20
+
21
+ return first_entry[tag]
7
22
 
8
23
 
9
24
  def batch_iterator(iterator, batch_size):
@@ -22,6 +22,7 @@
22
22
 
23
23
  from collections import defaultdict
24
24
 
25
+ import pkgutil
25
26
  import numpy as np
26
27
  import pandas as pd
27
28
  import plotly.graph_objs as go
@@ -301,6 +302,10 @@ def _transparent_component(c, b, a):
301
302
  return '0' + r
302
303
  return r
303
304
 
305
+ def _copy_latest_minjs(result_directory, js_file):
306
+ with open(result_directory + '/' + js_file , 'w+') as f:
307
+ plotly_min_js = pkgutil.get_data(__name__, "resources/plotly-latest.min.js").decode('utf8')
308
+ f.write(plotly_min_js)
304
309
 
305
310
  def _create_and_save_div(fig, result_directory, main):
306
311
  div = py.plot(fig,
@@ -311,11 +316,13 @@ def _create_and_save_div(fig, result_directory, main):
311
316
 
312
317
  if result_directory is not None:
313
318
  output_file = result_directory + '/' + '_'.join(main.split())
319
+ js_file="plotly.min.js"
314
320
  py.plot(fig,
315
321
  filename=output_file,
316
322
  output_type="file",
317
- include_plotlyjs="directory",
323
+ include_plotlyjs= js_file,
318
324
  auto_open=False)
325
+ _copy_latest_minjs(result_directory, js_file)
319
326
  else:
320
327
  output_file = None
321
328
 
@@ -622,9 +629,9 @@ def _pie_chart_graph(graph_name, count_sorted, color_palette, one_d_square, resu
622
629
  method="update"
623
630
  ),
624
631
  dict(
625
- args=[{'visible': [False, False, False, True]},
632
+ args=[{'visible': [False, False, True, False]},
626
633
  {**_xaxis('Barcodes', dict(visible=True)),
627
- **_yaxis('Base count', dict(visible=True)),
634
+ **_yaxis('Read count', dict(visible=True)),
628
635
  'plot_bgcolor': plotly_background_color}],
629
636
  label="Reads Histogram",
630
637
  method="update"
@@ -638,9 +645,9 @@ def _pie_chart_graph(graph_name, count_sorted, color_palette, one_d_square, resu
638
645
  method="update"
639
646
  ),
640
647
  dict(
641
- args=[{'visible': [False, False, True, False]},
648
+ args=[{'visible': [False, False, False, True]},
642
649
  {**_xaxis('Barcodes', dict(visible=True)),
643
- **_yaxis('Read count', dict(visible=True)),
650
+ **_yaxis('Base count', dict(visible=True)),
644
651
  'plot_bgcolor': plotly_background_color}],
645
652
  label="Bases Histogram",
646
653
  method="update"
@@ -0,0 +1 @@
1
+ __version__ = '2.5.5'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.5.3
3
+ Version: 2.5.5
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomicParisCentre/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -1 +0,0 @@
1
- __version__ = '2.5.3'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes