PyPI - toulligqc - Versions diffs - 2.5.2__tar.gz → 2.5.4__tar.gz - Mend

toulligqc 2.5.2tar.gz → 2.5.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{toulligqc-2.5.2 → toulligqc-2.5.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: toulligqc
-Version: 2.5.2
+Version: 2.5.4
 Summary: A post sequencing QC tool for Oxford Nanopore sequencers
 Home-page: https://github.com/GenomicParisCentre/toulligQC
 Author: Genomic Paris Centre team
@@ -15,8 +15,8 @@ Classifier: Intended Audience :: Science/Research
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
-Classifier: Programming Language :: Python :: 3.8
-Requires-Python: >=3.8.0
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.11.0
 License-File: LICENSE-CeCILL.txt
 License-File: LICENSE.txt
 License-File: AUTHORS

{toulligqc-2.5.2 → toulligqc-2.5.4}/setup.py RENAMED Viewed

@@ -34,7 +34,7 @@ setup(
         'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
         'License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)',
-        'Programming Language :: Python :: 3.8'
+        'Programming Language :: Python :: 3.11'
     ],
     keywords='Nanopore MinION QC report',
@@ -45,9 +45,10 @@ setup(
     zip_safe=False,
     include_package_data=True,
-    python_requires='>=3.8.0',
-    install_requires=['matplotlib>=3.1.2', 'plotly>=4.5.0', 'h5py>=2.10',
-                      'pandas>=0.25.3', 'numpy>=1.17.4', 'scipy>=1.3.3', 'scikit-learn>=0.22'],
+    python_requires='>=3.11.0',
+    install_requires=['matplotlib>=3.6.3',   'plotly>=5.15.0', 'h5py>=3.7.0',
+                      'pandas>=1.5.3',       'numpy>=1.24.2',  'scipy>=1.10.1',
+                      'scikit-learn>=1.2.1', 'tqdm>=4.64.1',   'pysam>=0.21.0'],
     entry_points={
         'console_scripts': [

{toulligqc-2.5.2 → toulligqc-2.5.4}/test/test_sequencing_summary_extractor.py RENAMED Viewed

@@ -6,6 +6,7 @@ from unittest.mock import patch, Mock, MagicMock
 import config as cfg
 import pandas as pd
 import pandas.util.testing as testing
+from toulligqc.common import is_numpy_1_24
 import numpy as np
 from distutils import util
@@ -65,7 +66,7 @@ class TestSequencingSummaryExtractorWholeConfig (unittest.TestCase):
         cls.expected_df = cls.expected_df.astype({
             'channel': np.int16,
             'start_time': np.float,
-            'passes_filtering': np.bool,
+            'passes_filtering': np.bool_ if is_numpy_1_24 else np.bool,
             'sequence_length': np.uint32,
             'mean_qscore_template': np.float,
             'duration': np.float,

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/bam_extractor.py RENAMED Viewed

@@ -18,6 +18,7 @@ from toulligqc.extractor_common import timeISO_to_float
 from toulligqc.common_statistics import compute_NXX, compute_LXX, occupancy_channel, avg_qual
 from toulligqc.fastq_bam_common import multiprocessing_submit, extract_headerTag
 from toulligqc.fastq_bam_common import batch_iterator
+from toulligqc.common import is_numpy_1_24
 from toulligqc import plotly_graph_generator as pgg
@@ -215,7 +216,7 @@ class uBAM_Extractor:
         uBAM_data['sequence_length'] = uBAM_data['sequence_length'].astype(np.uint32)
         uBAM_data['mean_qscore'] = uBAM_data['mean_qscore'].astype(np.float32)
-        uBAM_data['passes_filtering'] = uBAM_data['passes_filtering'].astype(np.bool)
+        uBAM_data['passes_filtering'] = uBAM_data['passes_filtering'].astype(np.bool_ if is_numpy_1_24 else np.bool)
         uBAM_data["start_time"] = uBAM_data["start_time"] - uBAM_data["start_time"].min()
         uBAM_data['channel'] = uBAM_data['channel'].astype(np.int16)
         uBAM_data['start_time'] = uBAM_data['start_time'].astype(np.float64)
@@ -230,8 +231,10 @@ class uBAM_Extractor:
         """
         #def process_bam_chunk(bam_chunk):
         rec_data = []
+        record_count = 0
         for rec in uBAM_chunk:
-            rec_dict = self._process_record(rec)
+            record_count += 1
+            rec_dict = self._process_record(rec, record_count)
             rec_data.append(rec_dict)
         return rec_data
@@ -257,35 +260,43 @@ class uBAM_Extractor:
     def _get_header(self):
-        samfile = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
-        header = samfile.header.to_dict()
-        run_id, model_version_id =  extract_headerTag(header,'RG','ID').split('_', 1)
+        sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
+        header = sam_file.header.to_dict()
+        run_id, model_version_id = extract_headerTag(header, 'RG','ID',
+                                                     'Unknown_Unknown').split('_', 1)
         self.header = {
-        "run_id" : run_id,
-        "run_date" : extract_headerTag(header, 'RG', 'DT'),
-        "sample_id" : extract_headerTag(header,'RG','SM'),
-        "basecaller" : extract_headerTag(header,'PG','PN'),
-        "basecaller_version" : extract_headerTag(header,'PG','VN'),
-        "model_version_id" : model_version_id,
-        "flow_cell_id" : extract_headerTag(header,'RG','PU')
+            "run_id": run_id,
+            "run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
+            "sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
+            "basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
+            "basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
+            "model_version_id": model_version_id,
+            "flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
         }
-    def _process_record(self, rec):
+    def _process_record(self, rec, record_count):
         """
         extract QC info from BAM record
         return : dict of QC info
         """
-        tags = rec.split("\t")
-        iso_start_time = tags[17].split(':',2)[2]
-        qual = avg_qual(tags[10])
+        fields = rec.split("\t")
+        # Parse optional fields
+        attributes = {}
+        for t in fields[11:]:
+            k, t, v = t.split(':', 2)
+            attributes[k] = v
+        iso_start_time = attributes.get('st', None)
+        qual = avg_qual(fields[10])
         passes_filtering = True if qual > self.threshold_Qscore else False
         data = [
-            len(tags[9]), # read length
+            len(fields[9]), # read length
             qual, # AVG Qscore
             passes_filtering, # Passing filter
-            timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
-            tags[16].split(':',2)[2], # Channel
-            tags[12].split(':',2)[2] # Duration
+            float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
+            attributes.get('ch', '1'),  # Channel
+            attributes.get('du', '1')  # Duration
         ]
-        return data
+        return data

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/fastq_bam_common.py RENAMED Viewed

@@ -2,8 +2,23 @@ import multiprocessing as mp
 from tqdm import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed
-def extract_headerTag(header, tagGroup, tag):
-        return header[tagGroup][0][tag]
+def extract_headerTag(header, tagGroup, tag, defaultValue = None):
+    if tagGroup not in header:
+        if defaultValue is not None:
+            return defaultValue
+        else:
+            raise KeyError(tagGroup)
+    first_entry = header[tagGroup][0]
+    if tag not in first_entry:
+        if defaultValue is not None:
+            return defaultValue
+        else:
+            raise KeyError(tag)
+    return first_entry[tag]
 def batch_iterator(iterator, batch_size):

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/fastq_extractor.py RENAMED Viewed

@@ -16,6 +16,7 @@ from toulligqc.extractor_common import set_result_dict_telemetry_value
 from toulligqc.extractor_common import timeISO_to_float
 from toulligqc.common_statistics import compute_NXX, compute_LXX, occupancy_channel, avg_qual
 from toulligqc.fastq_bam_common import multiprocessing_submit
+from toulligqc.common import is_numpy_1_24
 from toulligqc import plotly_graph_generator as pgg
@@ -226,7 +227,7 @@ class fastqExtractor:
         fq_data['sequence_length'] = fq_data['sequence_length'].astype(np.uint32)
         fq_data['mean_qscore'] = fq_data['mean_qscore'].astype(np.float32)
-        fq_data['passes_filtering'] = fq_data['passes_filtering'].astype(np.bool)
+        fq_data['passes_filtering'] = fq_data['passes_filtering'].astype(np.bool_ if is_numpy_1_24 else np.bool)
         if self.rich:
             fq_data["start_time"] = fq_data["start_time"] - fq_data["start_time"].min()

toulligqc-2.5.4/toulligqc/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '2.5.4'

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: toulligqc
-Version: 2.5.2
+Version: 2.5.4
 Summary: A post sequencing QC tool for Oxford Nanopore sequencers
 Home-page: https://github.com/GenomicParisCentre/toulligQC
 Author: Genomic Paris Centre team
@@ -15,8 +15,8 @@ Classifier: Intended Audience :: Science/Research
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
-Classifier: Programming Language :: Python :: 3.8
-Requires-Python: >=3.8.0
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.11.0
 License-File: LICENSE-CeCILL.txt
 License-File: LICENSE.txt
 License-File: AUTHORS

toulligqc-2.5.4/toulligqc.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,9 @@
+h5py>=3.7.0
+matplotlib>=3.6.3
+numpy>=1.24.2
+pandas>=1.5.3
+plotly>=5.15.0
+pysam>=0.21.0
+scikit-learn>=1.2.1
+scipy>=1.10.1
+tqdm>=4.64.1

toulligqc-2.5.2/toulligqc/version.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = '2.5.2'

toulligqc-2.5.2/toulligqc.egg-info/requires.txt DELETED Viewed

@@ -1,7 +0,0 @@
-h5py>=2.10
-matplotlib>=3.1.2
-numpy>=1.17.4
-pandas>=0.25.3
-plotly>=4.5.0
-scikit-learn>=0.22
-scipy>=1.3.3

{toulligqc-2.5.2 → toulligqc-2.5.4}/AUTHORS RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/LICENSE-CeCILL.txt RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/LICENSE.txt RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/MANIFEST.in RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/README.md RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/setup.cfg RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/__init__.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/common.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/common_statistics.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/configuration.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/extractor_common.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/fast5_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/html_report_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/plotly_graph_common.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/plotly_graph_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/plotly_graph_onedsquare_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/report_data_file_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/resources/plotly-latest.min.js RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.css RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.png RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/sequencing_summary_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/sequencing_summary_onedsquare_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/sequencing_telemetry_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/toulligqc.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc/toulligqc_info_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/entry_points.txt RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/not-zip-safe RENAMED Viewed

File without changes

{toulligqc-2.5.2 → toulligqc-2.5.4}/toulligqc.egg-info/top_level.txt RENAMED Viewed

File without changes

toulligqc 2.5.2__tar.gz → 2.5.4__tar.gz

toulligqc 2.5.2tar.gz → 2.5.4tar.gz