toulligqc 2.5.3__tar.gz → 2.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {toulligqc-2.5.3 → toulligqc-2.5.4}/PKG-INFO +1 -1
  2. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/bam_extractor.py +30 -20
  3. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fastq_bam_common.py +17 -2
  4. toulligqc-2.5.4/toulligqc/version.py +1 -0
  5. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/PKG-INFO +1 -1
  6. toulligqc-2.5.3/toulligqc/version.py +0 -1
  7. {toulligqc-2.5.3 → toulligqc-2.5.4}/AUTHORS +0 -0
  8. {toulligqc-2.5.3 → toulligqc-2.5.4}/LICENSE-CeCILL.txt +0 -0
  9. {toulligqc-2.5.3 → toulligqc-2.5.4}/LICENSE.txt +0 -0
  10. {toulligqc-2.5.3 → toulligqc-2.5.4}/MANIFEST.in +0 -0
  11. {toulligqc-2.5.3 → toulligqc-2.5.4}/README.md +0 -0
  12. {toulligqc-2.5.3 → toulligqc-2.5.4}/setup.cfg +0 -0
  13. {toulligqc-2.5.3 → toulligqc-2.5.4}/setup.py +0 -0
  14. {toulligqc-2.5.3 → toulligqc-2.5.4}/test/test_sequencing_summary_extractor.py +0 -0
  15. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/__init__.py +0 -0
  16. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/common.py +0 -0
  17. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/common_statistics.py +0 -0
  18. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/configuration.py +0 -0
  19. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/extractor_common.py +0 -0
  20. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fast5_extractor.py +0 -0
  21. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fastq_extractor.py +0 -0
  22. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/html_report_generator.py +0 -0
  23. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_common.py +0 -0
  24. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_generator.py +0 -0
  25. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
  26. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/report_data_file_generator.py +0 -0
  27. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/plotly-latest.min.js +0 -0
  28. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.css +0 -0
  29. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.png +0 -0
  30. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_summary_extractor.py +0 -0
  31. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
  32. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_telemetry_extractor.py +0 -0
  33. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/toulligqc.py +0 -0
  34. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/toulligqc_info_extractor.py +0 -0
  35. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/SOURCES.txt +0 -0
  36. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/dependency_links.txt +0 -0
  37. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/entry_points.txt +0 -0
  38. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/not-zip-safe +0 -0
  39. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/requires.txt +0 -0
  40. {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.5.3
3
+ Version: 2.5.4
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomicParisCentre/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -231,8 +231,10 @@ class uBAM_Extractor:
231
231
  """
232
232
  #def process_bam_chunk(bam_chunk):
233
233
  rec_data = []
234
+ record_count = 0
234
235
  for rec in uBAM_chunk:
235
- rec_dict = self._process_record(rec)
236
+ record_count += 1
237
+ rec_dict = self._process_record(rec, record_count)
236
238
  rec_data.append(rec_dict)
237
239
  return rec_data
238
240
 
@@ -258,35 +260,43 @@ class uBAM_Extractor:
258
260
 
259
261
 
260
262
  def _get_header(self):
261
- samfile = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
262
- header = samfile.header.to_dict()
263
- run_id, model_version_id = extract_headerTag(header,'RG','ID').split('_', 1)
263
+ sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
264
+ header = sam_file.header.to_dict()
265
+ run_id, model_version_id = extract_headerTag(header, 'RG','ID',
266
+ 'Unknown_Unknown').split('_', 1)
264
267
  self.header = {
265
- "run_id" : run_id,
266
- "run_date" : extract_headerTag(header, 'RG', 'DT'),
267
- "sample_id" : extract_headerTag(header,'RG','SM'),
268
- "basecaller" : extract_headerTag(header,'PG','PN'),
269
- "basecaller_version" : extract_headerTag(header,'PG','VN'),
270
- "model_version_id" : model_version_id,
271
- "flow_cell_id" : extract_headerTag(header,'RG','PU')
268
+ "run_id": run_id,
269
+ "run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
270
+ "sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
271
+ "basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
272
+ "basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
273
+ "model_version_id": model_version_id,
274
+ "flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
272
275
  }
273
276
 
274
277
 
275
- def _process_record(self, rec):
278
+ def _process_record(self, rec, record_count):
276
279
  """
277
280
  extract QC info from BAM record
278
281
  return : dict of QC info
279
282
  """
280
- tags = rec.split("\t")
281
- iso_start_time = tags[17].split(':',2)[2]
282
- qual = avg_qual(tags[10])
283
+ fields = rec.split("\t")
284
+
285
+ # Parse optional fields
286
+ attributes = {}
287
+ for t in fields[11:]:
288
+ k, t, v = t.split(':', 2)
289
+ attributes[k] = v
290
+
291
+ iso_start_time = attributes.get('st', None)
292
+ qual = avg_qual(fields[10])
283
293
  passes_filtering = True if qual > self.threshold_Qscore else False
284
294
  data = [
285
- len(tags[9]), # read length
295
+ len(fields[9]), # read length
286
296
  qual, # AVG Qscore
287
297
  passes_filtering, # Passing filter
288
- timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
289
- tags[16].split(':',2)[2], # Channel
290
- tags[12].split(':',2)[2] # Duration
298
+ float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
299
+ attributes.get('ch', '1'), # Channel
300
+ attributes.get('du', '1') # Duration
291
301
  ]
292
- return data
302
+ return data
@@ -2,8 +2,23 @@ import multiprocessing as mp
2
2
  from tqdm import tqdm
3
3
  from concurrent.futures import ProcessPoolExecutor, as_completed
4
4
 
5
- def extract_headerTag(header, tagGroup, tag):
6
- return header[tagGroup][0][tag]
5
+ def extract_headerTag(header, tagGroup, tag, defaultValue = None):
6
+
7
+ if tagGroup not in header:
8
+ if defaultValue is not None:
9
+ return defaultValue
10
+ else:
11
+ raise KeyError(tagGroup)
12
+
13
+ first_entry = header[tagGroup][0]
14
+
15
+ if tag not in first_entry:
16
+ if defaultValue is not None:
17
+ return defaultValue
18
+ else:
19
+ raise KeyError(tag)
20
+
21
+ return first_entry[tag]
7
22
 
8
23
 
9
24
  def batch_iterator(iterator, batch_size):
@@ -0,0 +1 @@
1
+ __version__ = '2.5.4'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.5.3
3
+ Version: 2.5.4
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomicParisCentre/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -1 +0,0 @@
1
- __version__ = '2.5.3'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes