PyPI - toulligqc - Versions diffs - 2.5.3__tar.gz → 2.5.4__tar.gz - Mend

toulligqc 2.5.3tar.gz → 2.5.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{toulligqc-2.5.3 → toulligqc-2.5.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: toulligqc
-Version: 2.5.3
+Version: 2.5.4
 Summary: A post sequencing QC tool for Oxford Nanopore sequencers
 Home-page: https://github.com/GenomicParisCentre/toulligQC
 Author: Genomic Paris Centre team

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/bam_extractor.py RENAMED Viewed

@@ -231,8 +231,10 @@ class uBAM_Extractor:
         """
         #def process_bam_chunk(bam_chunk):
         rec_data = []
+        record_count = 0
         for rec in uBAM_chunk:
-            rec_dict = self._process_record(rec)
+            record_count += 1
+            rec_dict = self._process_record(rec, record_count)
             rec_data.append(rec_dict)
         return rec_data
@@ -258,35 +260,43 @@ class uBAM_Extractor:
     def _get_header(self):
-        samfile = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
-        header = samfile.header.to_dict()
-        run_id, model_version_id =  extract_headerTag(header,'RG','ID').split('_', 1)
+        sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
+        header = sam_file.header.to_dict()
+        run_id, model_version_id = extract_headerTag(header, 'RG','ID',
+                                                     'Unknown_Unknown').split('_', 1)
         self.header = {
-        "run_id" : run_id,
-        "run_date" : extract_headerTag(header, 'RG', 'DT'),
-        "sample_id" : extract_headerTag(header,'RG','SM'),
-        "basecaller" : extract_headerTag(header,'PG','PN'),
-        "basecaller_version" : extract_headerTag(header,'PG','VN'),
-        "model_version_id" : model_version_id,
-        "flow_cell_id" : extract_headerTag(header,'RG','PU')
+            "run_id": run_id,
+            "run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
+            "sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
+            "basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
+            "basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
+            "model_version_id": model_version_id,
+            "flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
         }
-    def _process_record(self, rec):
+    def _process_record(self, rec, record_count):
         """
         extract QC info from BAM record
         return : dict of QC info
         """
-        tags = rec.split("\t")
-        iso_start_time = tags[17].split(':',2)[2]
-        qual = avg_qual(tags[10])
+        fields = rec.split("\t")
+        # Parse optional fields
+        attributes = {}
+        for t in fields[11:]:
+            k, t, v = t.split(':', 2)
+            attributes[k] = v
+        iso_start_time = attributes.get('st', None)
+        qual = avg_qual(fields[10])
         passes_filtering = True if qual > self.threshold_Qscore else False
         data = [
-            len(tags[9]), # read length
+            len(fields[9]), # read length
             qual, # AVG Qscore
             passes_filtering, # Passing filter
-            timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
-            tags[16].split(':',2)[2], # Channel
-            tags[12].split(':',2)[2] # Duration
+            float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
+            attributes.get('ch', '1'),  # Channel
+            attributes.get('du', '1')  # Duration
         ]
-        return data
+        return data

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fastq_bam_common.py RENAMED Viewed

@@ -2,8 +2,23 @@ import multiprocessing as mp
 from tqdm import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed
-def extract_headerTag(header, tagGroup, tag):
-        return header[tagGroup][0][tag]
+def extract_headerTag(header, tagGroup, tag, defaultValue = None):
+    if tagGroup not in header:
+        if defaultValue is not None:
+            return defaultValue
+        else:
+            raise KeyError(tagGroup)
+    first_entry = header[tagGroup][0]
+    if tag not in first_entry:
+        if defaultValue is not None:
+            return defaultValue
+        else:
+            raise KeyError(tag)
+    return first_entry[tag]
 def batch_iterator(iterator, batch_size):

toulligqc-2.5.4/toulligqc/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '2.5.4'

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: toulligqc
-Version: 2.5.3
+Version: 2.5.4
 Summary: A post sequencing QC tool for Oxford Nanopore sequencers
 Home-page: https://github.com/GenomicParisCentre/toulligQC
 Author: Genomic Paris Centre team

toulligqc-2.5.3/toulligqc/version.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = '2.5.3'

{toulligqc-2.5.3 → toulligqc-2.5.4}/AUTHORS RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/LICENSE-CeCILL.txt RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/LICENSE.txt RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/MANIFEST.in RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/README.md RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/setup.cfg RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/setup.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/test/test_sequencing_summary_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/__init__.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/common.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/common_statistics.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/configuration.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/extractor_common.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fast5_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fastq_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/html_report_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_common.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_onedsquare_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/report_data_file_generator.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/plotly-latest.min.js RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.css RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.png RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_summary_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_summary_onedsquare_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_telemetry_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/toulligqc.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/toulligqc_info_extractor.py RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/entry_points.txt RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/not-zip-safe RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/requires.txt RENAMED Viewed

File without changes

{toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/top_level.txt RENAMED Viewed

File without changes

toulligqc 2.5.3__tar.gz → 2.5.4__tar.gz

toulligqc 2.5.3tar.gz → 2.5.4tar.gz