toulligqc 2.5.3__tar.gz → 2.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {toulligqc-2.5.3 → toulligqc-2.5.4}/PKG-INFO +1 -1
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/bam_extractor.py +30 -20
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fastq_bam_common.py +17 -2
- toulligqc-2.5.4/toulligqc/version.py +1 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/PKG-INFO +1 -1
- toulligqc-2.5.3/toulligqc/version.py +0 -1
- {toulligqc-2.5.3 → toulligqc-2.5.4}/AUTHORS +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/LICENSE-CeCILL.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/LICENSE.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/MANIFEST.in +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/README.md +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/setup.cfg +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/setup.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/test/test_sequencing_summary_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/__init__.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/common.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/common_statistics.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/configuration.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/extractor_common.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fast5_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/fastq_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/html_report_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_common.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/report_data_file_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/plotly-latest.min.js +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.css +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/resources/toulligqc.png +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_summary_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/sequencing_telemetry_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/toulligqc.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc/toulligqc_info_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/SOURCES.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/dependency_links.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/entry_points.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/not-zip-safe +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/requires.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.4}/toulligqc.egg-info/top_level.txt +0 -0
|
@@ -231,8 +231,10 @@ class uBAM_Extractor:
|
|
|
231
231
|
"""
|
|
232
232
|
#def process_bam_chunk(bam_chunk):
|
|
233
233
|
rec_data = []
|
|
234
|
+
record_count = 0
|
|
234
235
|
for rec in uBAM_chunk:
|
|
235
|
-
|
|
236
|
+
record_count += 1
|
|
237
|
+
rec_dict = self._process_record(rec, record_count)
|
|
236
238
|
rec_data.append(rec_dict)
|
|
237
239
|
return rec_data
|
|
238
240
|
|
|
@@ -258,35 +260,43 @@ class uBAM_Extractor:
|
|
|
258
260
|
|
|
259
261
|
|
|
260
262
|
def _get_header(self):
|
|
261
|
-
|
|
262
|
-
header =
|
|
263
|
-
run_id, model_version_id =
|
|
263
|
+
sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
|
|
264
|
+
header = sam_file.header.to_dict()
|
|
265
|
+
run_id, model_version_id = extract_headerTag(header, 'RG','ID',
|
|
266
|
+
'Unknown_Unknown').split('_', 1)
|
|
264
267
|
self.header = {
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
268
|
+
"run_id": run_id,
|
|
269
|
+
"run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
|
|
270
|
+
"sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
|
|
271
|
+
"basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
|
|
272
|
+
"basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
|
|
273
|
+
"model_version_id": model_version_id,
|
|
274
|
+
"flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
|
|
272
275
|
}
|
|
273
276
|
|
|
274
277
|
|
|
275
|
-
def _process_record(self, rec):
|
|
278
|
+
def _process_record(self, rec, record_count):
|
|
276
279
|
"""
|
|
277
280
|
extract QC info from BAM record
|
|
278
281
|
return : dict of QC info
|
|
279
282
|
"""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
+
fields = rec.split("\t")
|
|
284
|
+
|
|
285
|
+
# Parse optional fields
|
|
286
|
+
attributes = {}
|
|
287
|
+
for t in fields[11:]:
|
|
288
|
+
k, t, v = t.split(':', 2)
|
|
289
|
+
attributes[k] = v
|
|
290
|
+
|
|
291
|
+
iso_start_time = attributes.get('st', None)
|
|
292
|
+
qual = avg_qual(fields[10])
|
|
283
293
|
passes_filtering = True if qual > self.threshold_Qscore else False
|
|
284
294
|
data = [
|
|
285
|
-
len(
|
|
295
|
+
len(fields[9]), # read length
|
|
286
296
|
qual, # AVG Qscore
|
|
287
297
|
passes_filtering, # Passing filter
|
|
288
|
-
timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
|
|
289
|
-
|
|
290
|
-
|
|
298
|
+
float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
|
|
299
|
+
attributes.get('ch', '1'), # Channel
|
|
300
|
+
attributes.get('du', '1') # Duration
|
|
291
301
|
]
|
|
292
|
-
return data
|
|
302
|
+
return data
|
|
@@ -2,8 +2,23 @@ import multiprocessing as mp
|
|
|
2
2
|
from tqdm import tqdm
|
|
3
3
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
4
4
|
|
|
5
|
-
def extract_headerTag(header, tagGroup, tag):
|
|
6
|
-
|
|
5
|
+
def extract_headerTag(header, tagGroup, tag, defaultValue = None):
|
|
6
|
+
|
|
7
|
+
if tagGroup not in header:
|
|
8
|
+
if defaultValue is not None:
|
|
9
|
+
return defaultValue
|
|
10
|
+
else:
|
|
11
|
+
raise KeyError(tagGroup)
|
|
12
|
+
|
|
13
|
+
first_entry = header[tagGroup][0]
|
|
14
|
+
|
|
15
|
+
if tag not in first_entry:
|
|
16
|
+
if defaultValue is not None:
|
|
17
|
+
return defaultValue
|
|
18
|
+
else:
|
|
19
|
+
raise KeyError(tag)
|
|
20
|
+
|
|
21
|
+
return first_entry[tag]
|
|
7
22
|
|
|
8
23
|
|
|
9
24
|
def batch_iterator(iterator, batch_size):
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.5.4'
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '2.5.3'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|