toulligqc 2.5.3__tar.gz → 2.5.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {toulligqc-2.5.3 → toulligqc-2.5.5}/PKG-INFO +1 -1
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/bam_extractor.py +30 -20
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/fastq_bam_common.py +17 -2
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/plotly_graph_common.py +12 -5
- toulligqc-2.5.5/toulligqc/version.py +1 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/PKG-INFO +1 -1
- toulligqc-2.5.3/toulligqc/version.py +0 -1
- {toulligqc-2.5.3 → toulligqc-2.5.5}/AUTHORS +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/LICENSE-CeCILL.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/LICENSE.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/MANIFEST.in +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/README.md +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/setup.cfg +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/setup.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/test/test_sequencing_summary_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/__init__.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/common.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/common_statistics.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/configuration.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/extractor_common.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/fast5_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/fastq_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/html_report_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/plotly_graph_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/report_data_file_generator.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/resources/plotly-latest.min.js +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/resources/toulligqc.css +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/resources/toulligqc.png +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/sequencing_summary_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/sequencing_telemetry_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/toulligqc.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc/toulligqc_info_extractor.py +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/SOURCES.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/dependency_links.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/entry_points.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/not-zip-safe +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/requires.txt +0 -0
- {toulligqc-2.5.3 → toulligqc-2.5.5}/toulligqc.egg-info/top_level.txt +0 -0
|
@@ -231,8 +231,10 @@ class uBAM_Extractor:
|
|
|
231
231
|
"""
|
|
232
232
|
#def process_bam_chunk(bam_chunk):
|
|
233
233
|
rec_data = []
|
|
234
|
+
record_count = 0
|
|
234
235
|
for rec in uBAM_chunk:
|
|
235
|
-
|
|
236
|
+
record_count += 1
|
|
237
|
+
rec_dict = self._process_record(rec, record_count)
|
|
236
238
|
rec_data.append(rec_dict)
|
|
237
239
|
return rec_data
|
|
238
240
|
|
|
@@ -258,35 +260,43 @@ class uBAM_Extractor:
|
|
|
258
260
|
|
|
259
261
|
|
|
260
262
|
def _get_header(self):
|
|
261
|
-
|
|
262
|
-
header =
|
|
263
|
-
run_id, model_version_id =
|
|
263
|
+
sam_file = pysam.AlignmentFile(self.ubam[0], "rb", check_sq=False)
|
|
264
|
+
header = sam_file.header.to_dict()
|
|
265
|
+
run_id, model_version_id = extract_headerTag(header, 'RG','ID',
|
|
266
|
+
'Unknown_Unknown').split('_', 1)
|
|
264
267
|
self.header = {
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
268
|
+
"run_id": run_id,
|
|
269
|
+
"run_date": extract_headerTag(header, 'RG', 'DT', 'Unknown'),
|
|
270
|
+
"sample_id": extract_headerTag(header, 'RG', 'SM', 'Unknown'),
|
|
271
|
+
"basecaller": extract_headerTag(header, 'PG', 'PN', 'Unknown'),
|
|
272
|
+
"basecaller_version": extract_headerTag(header, 'PG', 'VN', 'Unknown'),
|
|
273
|
+
"model_version_id": model_version_id,
|
|
274
|
+
"flow_cell_id": extract_headerTag(header, 'RG', 'PU', 'Unknown')
|
|
272
275
|
}
|
|
273
276
|
|
|
274
277
|
|
|
275
|
-
def _process_record(self, rec):
|
|
278
|
+
def _process_record(self, rec, record_count):
|
|
276
279
|
"""
|
|
277
280
|
extract QC info from BAM record
|
|
278
281
|
return : dict of QC info
|
|
279
282
|
"""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
+
fields = rec.split("\t")
|
|
284
|
+
|
|
285
|
+
# Parse optional fields
|
|
286
|
+
attributes = {}
|
|
287
|
+
for t in fields[11:]:
|
|
288
|
+
k, t, v = t.split(':', 2)
|
|
289
|
+
attributes[k] = v
|
|
290
|
+
|
|
291
|
+
iso_start_time = attributes.get('st', None)
|
|
292
|
+
qual = avg_qual(fields[10])
|
|
283
293
|
passes_filtering = True if qual > self.threshold_Qscore else False
|
|
284
294
|
data = [
|
|
285
|
-
len(
|
|
295
|
+
len(fields[9]), # read length
|
|
286
296
|
qual, # AVG Qscore
|
|
287
297
|
passes_filtering, # Passing filter
|
|
288
|
-
timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
|
|
289
|
-
|
|
290
|
-
|
|
298
|
+
float(record_count) if iso_start_time is None else timeISO_to_float(iso_start_time, '%Y-%m-%dT%H:%M:%S.%f%z'), # start time
|
|
299
|
+
attributes.get('ch', '1'), # Channel
|
|
300
|
+
attributes.get('du', '1') # Duration
|
|
291
301
|
]
|
|
292
|
-
return data
|
|
302
|
+
return data
|
|
@@ -2,8 +2,23 @@ import multiprocessing as mp
|
|
|
2
2
|
from tqdm import tqdm
|
|
3
3
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
4
4
|
|
|
5
|
-
def extract_headerTag(header, tagGroup, tag):
|
|
6
|
-
|
|
5
|
+
def extract_headerTag(header, tagGroup, tag, defaultValue = None):
|
|
6
|
+
|
|
7
|
+
if tagGroup not in header:
|
|
8
|
+
if defaultValue is not None:
|
|
9
|
+
return defaultValue
|
|
10
|
+
else:
|
|
11
|
+
raise KeyError(tagGroup)
|
|
12
|
+
|
|
13
|
+
first_entry = header[tagGroup][0]
|
|
14
|
+
|
|
15
|
+
if tag not in first_entry:
|
|
16
|
+
if defaultValue is not None:
|
|
17
|
+
return defaultValue
|
|
18
|
+
else:
|
|
19
|
+
raise KeyError(tag)
|
|
20
|
+
|
|
21
|
+
return first_entry[tag]
|
|
7
22
|
|
|
8
23
|
|
|
9
24
|
def batch_iterator(iterator, batch_size):
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
|
|
23
23
|
from collections import defaultdict
|
|
24
24
|
|
|
25
|
+
import pkgutil
|
|
25
26
|
import numpy as np
|
|
26
27
|
import pandas as pd
|
|
27
28
|
import plotly.graph_objs as go
|
|
@@ -301,6 +302,10 @@ def _transparent_component(c, b, a):
|
|
|
301
302
|
return '0' + r
|
|
302
303
|
return r
|
|
303
304
|
|
|
305
|
+
def _copy_latest_minjs(result_directory, js_file):
|
|
306
|
+
with open(result_directory + '/' + js_file , 'w+') as f:
|
|
307
|
+
plotly_min_js = pkgutil.get_data(__name__, "resources/plotly-latest.min.js").decode('utf8')
|
|
308
|
+
f.write(plotly_min_js)
|
|
304
309
|
|
|
305
310
|
def _create_and_save_div(fig, result_directory, main):
|
|
306
311
|
div = py.plot(fig,
|
|
@@ -311,11 +316,13 @@ def _create_and_save_div(fig, result_directory, main):
|
|
|
311
316
|
|
|
312
317
|
if result_directory is not None:
|
|
313
318
|
output_file = result_directory + '/' + '_'.join(main.split())
|
|
319
|
+
js_file="plotly.min.js"
|
|
314
320
|
py.plot(fig,
|
|
315
321
|
filename=output_file,
|
|
316
322
|
output_type="file",
|
|
317
|
-
include_plotlyjs=
|
|
323
|
+
include_plotlyjs= js_file,
|
|
318
324
|
auto_open=False)
|
|
325
|
+
_copy_latest_minjs(result_directory, js_file)
|
|
319
326
|
else:
|
|
320
327
|
output_file = None
|
|
321
328
|
|
|
@@ -622,9 +629,9 @@ def _pie_chart_graph(graph_name, count_sorted, color_palette, one_d_square, resu
|
|
|
622
629
|
method="update"
|
|
623
630
|
),
|
|
624
631
|
dict(
|
|
625
|
-
args=[{'visible': [False, False,
|
|
632
|
+
args=[{'visible': [False, False, True, False]},
|
|
626
633
|
{**_xaxis('Barcodes', dict(visible=True)),
|
|
627
|
-
**_yaxis('
|
|
634
|
+
**_yaxis('Read count', dict(visible=True)),
|
|
628
635
|
'plot_bgcolor': plotly_background_color}],
|
|
629
636
|
label="Reads Histogram",
|
|
630
637
|
method="update"
|
|
@@ -638,9 +645,9 @@ def _pie_chart_graph(graph_name, count_sorted, color_palette, one_d_square, resu
|
|
|
638
645
|
method="update"
|
|
639
646
|
),
|
|
640
647
|
dict(
|
|
641
|
-
args=[{'visible': [False, False,
|
|
648
|
+
args=[{'visible': [False, False, False, True]},
|
|
642
649
|
{**_xaxis('Barcodes', dict(visible=True)),
|
|
643
|
-
**_yaxis('
|
|
650
|
+
**_yaxis('Base count', dict(visible=True)),
|
|
644
651
|
'plot_bgcolor': plotly_background_color}],
|
|
645
652
|
label="Bases Histogram",
|
|
646
653
|
method="update"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.5.5'
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '2.5.3'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|