toulligqc 2.5.2__tar.gz → 2.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {toulligqc-2.5.2 → toulligqc-2.5.4}/PKG-INFO +3 -3
  2. {toulligqc-2.5.2 → toulligqc-2.5.4}/setup.py +5 -4
  3. {toulligqc-2.5.2 → toulligqc-2.5.4}/test/test_sequencing_summary_extractor.py +2 -1
  4. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/bam_extractor.py +32 -21
  5. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/fastq_bam_common.py +17 -2
  6. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/fastq_extractor.py +2 -1
  7. toulligqc-2.5.4/toulligqc/version.py +1 -0
  8. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/PKG-INFO +3 -3
  9. toulligqc-2.5.4/toulligqc.egg-info/requires.txt +9 -0
  10. toulligqc-2.5.2/toulligqc/version.py +0 -1
  11. toulligqc-2.5.2/toulligqc.egg-info/requires.txt +0 -7
  12. {toulligqc-2.5.2 → toulligqc-2.5.4}/AUTHORS +0 -0
  13. {toulligqc-2.5.2 → toulligqc-2.5.4}/LICENSE-CeCILL.txt +0 -0
  14. {toulligqc-2.5.2 → toulligqc-2.5.4}/LICENSE.txt +0 -0
  15. {toulligqc-2.5.2 → toulligqc-2.5.4}/MANIFEST.in +0 -0
  16. {toulligqc-2.5.2 → toulligqc-2.5.4}/README.md +0 -0
  17. {toulligqc-2.5.2 → toulligqc-2.5.4}/setup.cfg +0 -0
  18. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/__init__.py +0 -0
  19. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/common.py +0 -0
  20. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/common_statistics.py +0 -0
  21. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/configuration.py +0 -0
  22. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/extractor_common.py +0 -0
  23. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/fast5_extractor.py +0 -0
  24. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/html_report_generator.py +0 -0
  25. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/plotly_graph_common.py +0 -0
  26. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/plotly_graph_generator.py +0 -0
  27. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
  28. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/report_data_file_generator.py +0 -0
  29. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/resources/plotly-latest.min.js +0 -0
  30. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.css +0 -0
  31. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.png +0 -0
  32. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/sequencing_summary_extractor.py +0 -0
  33. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
  34. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/sequencing_telemetry_extractor.py +0 -0
  35. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/toulligqc.py +0 -0
  36. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/toulligqc_info_extractor.py +0 -0
  37. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/SOURCES.txt +0 -0
  38. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/dependency_links.txt +0 -0
  39. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/entry_points.txt +0 -0
  40. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/not-zip-safe +0 -0
  41. {toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.5.2
3
+ Version: 2.5.4
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomicParisCentre/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -15,8 +15,8 @@ Classifier: Intended Audience :: Science/Research
15
15
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
16
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
17
17
  Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
18
- Classifier: Programming Language :: Python :: 3.8
19
- Requires-Python: >=3.8.0
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Requires-Python: >=3.11.0
20
20
  License-File: LICENSE-CeCILL.txt
21
21
  License-File: LICENSE.txt
22
22
  License-File: AUTHORS
@@ -34,7 +34,7 @@ setup(
34
34
  'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
35
35
  'License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)',
36
36
 
37
- 'Programming Language :: Python :: 3.8'
37
+ 'Programming Language :: Python :: 3.11'
38
38
  ],
39
39
 
40
40
  keywords='Nanopore MinION QC report',
@@ -45,9 +45,10 @@ setup(
45
45
  zip_safe=False,
46
46
  include_package_data=True,
47
47
 
48
- python_requires='>=3.8.0',
49
- install_requires=['matplotlib>=3.1.2', 'plotly>=4.5.0', 'h5py>=2.10',
50
- 'pandas>=0.25.3', 'numpy>=1.17.4', 'scipy>=1.3.3', 'scikit-learn>=0.22'],
48
+ python_requires='>=3.11.0',
49
+ install_requires=['matplotlib>=3.6.3', 'plotly>=5.15.0', 'h5py>=3.7.0',
50
+ 'pandas>=1.5.3', 'numpy>=1.24.2', 'scipy>=1.10.1',
51
+ 'scikit-learn>=1.2.1', 'tqdm>=4.64.1', 'pysam>=0.21.0'],
51
52
 
52
53
  entry_points={
53
54
  'console_scripts': [
@@ -6,6 +6,7 @@ from unittest.mock import patch, Mock, MagicMock
6
6
  import config as cfg
7
7
  import pandas as pd
8
8
  import pandas.util.testing as testing
9
+ from toulligqc.common import is_numpy_1_24
9
10
  import numpy as np
10
11
  from distutils import util
11
12
 
@@ -65,7 +66,7 @@ class TestSequencingSummaryExtractorWholeConfig (unittest.TestCase):
65
66
  cls.expected_df = cls.expected_df.astype({
66
67
  'channel': np.int16,
67
68
  'start_time': np.float,
68
- 'passes_filtering': np.bool,
69
+ 'passes_filtering': np.bool_ if is_numpy_1_24 else np.bool,
69
70
  'sequence_length': np.uint32,
70
71
  'mean_qscore_template': np.float,
71
72
  'duration': np.float,
@@ -18,6 +18,7 @@ from toulligqc.extractor_common import timeISO_to_float
18
18
  from toulligqc.common_statistics import compute_NXX, compute_LXX, occupancy_channel, avg_qual
19
19
  from toulligqc.fastq_bam_common import multiprocessing_submit, extract_headerTag
20
20
  from toulligqc.fastq_bam_common import batch_iterator
21
+ from toulligqc.common import is_numpy_1_24
21
22
  from toulligqc import plotly_graph_generator as pgg
22
23
 
23
24
 
@@ -215,7 +216,7 @@ class uBAM_Extractor:
215
216
 
216
217
  uBAM_data['sequence_length'] = uBAM_data['sequence_length'].astype(np.uint32)
217
218
  uBAM_data['mean_qscore'] = uBAM_data['mean_qscore'].astype(np.float32)
218
- uBAM_data['passes_filtering'] = uBAM_data['passes_filtering'].astype(np.bool)
219
+ uBAM_data['passes_filtering'] = uBAM_data['passes_filtering'].astype(np.bool_ if is_numpy_1_24 else np.bool)
219
220
  uBAM_data["start_time"] = uBAM_data["start_time"] - uBAM_data["start_time"].min()
220
221
  uBAM_data['channel'] = uBAM_data['channel'].astype(np.int16)
221
222
  uBAM_data['start_time'] = uBAM_data['start_time'].astype(np.float64)
@@ -230,8 +231,10 @@ class uBAM_Extractor:
230
231
  """
231
232
  #def process_bam_chunk(bam_chunk):
232
233
  rec_data = []
234
+ record_count = 0
233
235
  for rec in uBAM_chunk:
234
- rec_dict = self._process_record(rec)
236
+ record_count += 1
237
+ rec_dict = self._process_record(rec, record_count)
235
238
  rec_data.append(rec_dict)
236
239
  return rec_data
237
240
 
@@ -257,35 +260,43 @@ class uBAM_Extractor:
257
260
 
258
261
 
259
262
  def _get_header(self):
260
- samfile = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
261
- header = samfile.header.to_dict()
262
- run_id, model_version_id = extract_headerTag(header,'RG','ID').split('_', 1)
263
+ sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
264
+ header = sam_file.header.to_dict()
265
+ run_id, model_version_id = extract_headerTag(header, 'RG','ID',
266
+ 'Unknown_Unknown').split('_', 1)
263
267
  self.header = {
264
- "run_id" : run_id,
265
- "run_date" : extract_headerTag(header, 'RG', 'DT'),
266
- "sample_id" : extract_headerTag(header,'RG','SM'),
267
- "basecaller" : extract_headerTag(header,'PG','PN'),
268
- "basecaller_version" : extract_headerTag(header,'PG','VN'),
269
- "model_version_id" : model_version_id,
270
- "flow_cell_id" : extract_headerTag(header,'RG','PU')
268
+ "run_id": run_id,
269
+ "run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
270
+ "sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
271
+ "basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
272
+ "basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
273
+ "model_version_id": model_version_id,
274
+ "flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
271
275
  }
272
276
 
273
277
 
274
- def _process_record(self, rec):
278
+ def _process_record(self, rec, record_count):
275
279
  """
276
280
  extract QC info from BAM record
277
281
  return : dict of QC info
278
282
  """
279
- tags = rec.split("\t")
280
- iso_start_time = tags[17].split(':',2)[2]
281
- qual = avg_qual(tags[10])
283
+ fields = rec.split("\t")
284
+
285
+ # Parse optional fields
286
+ attributes = {}
287
+ for t in fields[11:]:
288
+ k, t, v = t.split(':', 2)
289
+ attributes[k] = v
290
+
291
+ iso_start_time = attributes.get('st', None)
292
+ qual = avg_qual(fields[10])
282
293
  passes_filtering = True if qual > self.threshold_Qscore else False
283
294
  data = [
284
- len(tags[9]), # read length
295
+ len(fields[9]), # read length
285
296
  qual, # AVG Qscore
286
297
  passes_filtering, # Passing filter
287
- timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
288
- tags[16].split(':',2)[2], # Channel
289
- tags[12].split(':',2)[2] # Duration
298
+ float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
299
+ attributes.get('ch', '1'), # Channel
300
+ attributes.get('du', '1') # Duration
290
301
  ]
291
- return data
302
+ return data
@@ -2,8 +2,23 @@ import multiprocessing as mp
2
2
  from tqdm import tqdm
3
3
  from concurrent.futures import ProcessPoolExecutor, as_completed
4
4
 
5
- def extract_headerTag(header, tagGroup, tag):
6
- return header[tagGroup][0][tag]
5
+ def extract_headerTag(header, tagGroup, tag, defaultValue = None):
6
+
7
+ if tagGroup not in header:
8
+ if defaultValue is not None:
9
+ return defaultValue
10
+ else:
11
+ raise KeyError(tagGroup)
12
+
13
+ first_entry = header[tagGroup][0]
14
+
15
+ if tag not in first_entry:
16
+ if defaultValue is not None:
17
+ return defaultValue
18
+ else:
19
+ raise KeyError(tag)
20
+
21
+ return first_entry[tag]
7
22
 
8
23
 
9
24
  def batch_iterator(iterator, batch_size):
@@ -16,6 +16,7 @@ from toulligqc.extractor_common import set_result_dict_telemetry_value
16
16
  from toulligqc.extractor_common import timeISO_to_float
17
17
  from toulligqc.common_statistics import compute_NXX, compute_LXX, occupancy_channel, avg_qual
18
18
  from toulligqc.fastq_bam_common import multiprocessing_submit
19
+ from toulligqc.common import is_numpy_1_24
19
20
  from toulligqc import plotly_graph_generator as pgg
20
21
 
21
22
 
@@ -226,7 +227,7 @@ class fastqExtractor:
226
227
 
227
228
  fq_data['sequence_length'] = fq_data['sequence_length'].astype(np.uint32)
228
229
  fq_data['mean_qscore'] = fq_data['mean_qscore'].astype(np.float32)
229
- fq_data['passes_filtering'] = fq_data['passes_filtering'].astype(np.bool)
230
+ fq_data['passes_filtering'] = fq_data['passes_filtering'].astype(np.bool_ if is_numpy_1_24 else np.bool)
230
231
 
231
232
  if self.rich:
232
233
  fq_data["start_time"] = fq_data["start_time"] - fq_data["start_time"].min()
@@ -0,0 +1 @@
1
+ __version__ = '2.5.4'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.5.2
3
+ Version: 2.5.4
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomicParisCentre/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -15,8 +15,8 @@ Classifier: Intended Audience :: Science/Research
15
15
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
16
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
17
17
  Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
18
- Classifier: Programming Language :: Python :: 3.8
19
- Requires-Python: >=3.8.0
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Requires-Python: >=3.11.0
20
20
  License-File: LICENSE-CeCILL.txt
21
21
  License-File: LICENSE.txt
22
22
  License-File: AUTHORS
@@ -0,0 +1,9 @@
1
+ h5py>=3.7.0
2
+ matplotlib>=3.6.3
3
+ numpy>=1.24.2
4
+ pandas>=1.5.3
5
+ plotly>=5.15.0
6
+ pysam>=0.21.0
7
+ scikit-learn>=1.2.1
8
+ scipy>=1.10.1
9
+ tqdm>=4.64.1
@@ -1 +0,0 @@
1
- __version__ = '2.5.2'
@@ -1,7 +0,0 @@
1
- h5py>=2.10
2
- matplotlib>=3.1.2
3
- numpy>=1.17.4
4
- pandas>=0.25.3
5
- plotly>=4.5.0
6
- scikit-learn>=0.22
7
- scipy>=1.3.3
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes