toulligqc 2.7__tar.gz → 2.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {toulligqc-2.7 → toulligqc-2.7.1}/PKG-INFO +1 -1
- {toulligqc-2.7 → toulligqc-2.7.1}/README.md +18 -5
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/common_statistics.py +2 -2
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/extractor_common.py +5 -1
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/fastq_extractor.py +41 -22
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/toulligqc.py +14 -10
- toulligqc-2.7.1/toulligqc/version.py +1 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/PKG-INFO +1 -1
- toulligqc-2.7/toulligqc/version.py +0 -1
- {toulligqc-2.7 → toulligqc-2.7.1}/AUTHORS +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/LICENSE-CeCILL.txt +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/LICENSE.txt +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/MANIFEST.in +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/setup.cfg +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/setup.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/test/test_sequencing_summary_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/__init__.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/bam_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/common.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/configuration.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/fast5_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/fastq_bam_common.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/html_report_generator.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/plotly_graph_common.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/plotly_graph_generator.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/pod5_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/report_data_file_generator.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/resources/plotly-latest.min.js +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/resources/toulligqc.css +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/resources/toulligqc.png +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/sequencing_summary_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/sequencing_telemetry_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/toulligqc_info_extractor.py +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/SOURCES.txt +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/dependency_links.txt +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/entry_points.txt +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/not-zip-safe +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/requires.txt +0 -0
- {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/top_level.txt +0 -0
|
@@ -53,18 +53,31 @@ $ cd toulligqc && python3 setup.py build install
|
|
|
53
53
|
ToulligQC is written with Python 3.
|
|
54
54
|
To run ToulligQC without Docker, you need to install the following Python modules:
|
|
55
55
|
|
|
56
|
-
* matplotlib
|
|
57
|
-
* plotly
|
|
58
|
-
* h5py
|
|
56
|
+
* matplotlib
|
|
57
|
+
* plotly
|
|
58
|
+
* h5py
|
|
59
59
|
* pandas
|
|
60
60
|
* numpy
|
|
61
61
|
* scipy
|
|
62
62
|
* scikit-learn
|
|
63
63
|
* pysam
|
|
64
|
+
* tqdm
|
|
65
|
+
* pod5
|
|
64
66
|
|
|
67
|
+
<a name="Conda-environemnt"></a>
|
|
68
|
+
### 1.2 Conda environemnt**
|
|
69
|
+
|
|
70
|
+
You can use a conda environment to install the required packages:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
git clone https://github.com/GenomicParisCentre/toulligQC.git
|
|
74
|
+
cd toulligqc && python3 setup.py build install
|
|
75
|
+
conda env create -f environment.yml
|
|
76
|
+
conda activate toulliqc
|
|
77
|
+
```
|
|
65
78
|
|
|
66
79
|
<a name="pypi-installation"></a>
|
|
67
|
-
### 1.
|
|
80
|
+
### 1.3 Using a PyPi package
|
|
68
81
|
|
|
69
82
|
ToulligQC can be more easlily installed with a pip package availlable on the PyPi repository. The following command line will install the latest version of ToulligQC:
|
|
70
83
|
```bash
|
|
@@ -72,7 +85,7 @@ $ pip3 install toulligqc
|
|
|
72
85
|
```
|
|
73
86
|
|
|
74
87
|
<a name="docker"></a>
|
|
75
|
-
### 1.
|
|
88
|
+
### 1.4 Using Docker
|
|
76
89
|
ToulligQC and its dependencies are available through a Docker image. To install docker on your system, go to the Docker website (<https://docs.docker.com/engine/installation/>).
|
|
77
90
|
Even if Docker can run on Windows or macOS virtual machines, we recommend to run ToulligQC on a Linux host.
|
|
78
91
|
<a name="docker-image-recovery"></a>
|
|
@@ -18,7 +18,7 @@ def compute_LXX(dataframe_dict, x):
|
|
|
18
18
|
cum_sum = 0
|
|
19
19
|
count = 0
|
|
20
20
|
for v in data:
|
|
21
|
-
cum_sum += v
|
|
21
|
+
cum_sum += int(v)
|
|
22
22
|
count += 1
|
|
23
23
|
if cum_sum >= half_sum:
|
|
24
24
|
return count
|
|
@@ -31,7 +31,7 @@ def compute_NXX(dataframe_dict, x):
|
|
|
31
31
|
half_sum = data.sum() * x / 100
|
|
32
32
|
cum_sum = 0
|
|
33
33
|
for v in data:
|
|
34
|
-
cum_sum += v
|
|
34
|
+
cum_sum += int(v)
|
|
35
35
|
if cum_sum >= half_sum:
|
|
36
36
|
return int(v)
|
|
37
37
|
|
|
@@ -432,7 +432,11 @@ def add_image_to_result(quiet, image_list, start_time, image):
|
|
|
432
432
|
def timeISO_to_float(iso_datetime, format):
|
|
433
433
|
"""
|
|
434
434
|
"""
|
|
435
|
-
|
|
435
|
+
try:
|
|
436
|
+
dt = datetime.strptime(iso_datetime, format)
|
|
437
|
+
except:
|
|
438
|
+
format = '%Y-%m-%dT%H:%M:%SZ'
|
|
439
|
+
dt = datetime.strptime(iso_datetime, format)
|
|
436
440
|
unix_timestamp = dt.timestamp()
|
|
437
441
|
return unix_timestamp
|
|
438
442
|
|
|
@@ -119,32 +119,45 @@ class fastqExtractor:
|
|
|
119
119
|
|
|
120
120
|
add_image_to_result(self.quiet, images, time.time(), pgg.read_count_histogram(result_dict, self.images_directory))
|
|
121
121
|
add_image_to_result(self.quiet, images, time.time(), pgg.read_length_scatterplot(self.dataframe_dict, self.images_directory))
|
|
122
|
+
|
|
122
123
|
if self.rich:
|
|
123
124
|
add_image_to_result(self.quiet, images, time.time(), pgg.yield_plot(self.dataframe_1d, self.images_directory))
|
|
124
125
|
add_image_to_result(self.quiet, images, time.time(), pgg.read_quality_multiboxplot(self.dataframe_dict, self.images_directory))
|
|
125
126
|
add_image_to_result(self.quiet, images, time.time(), pgg.allphred_score_frequency(self.dataframe_dict, self.images_directory))
|
|
127
|
+
|
|
126
128
|
if self.rich:
|
|
127
129
|
add_image_to_result(self.quiet, images, time.time(), pgg.plot_performance(self.dataframe_1d, self.images_directory))
|
|
128
130
|
add_image_to_result(self.quiet, images, time.time(), pgg.twod_density(self.dataframe_dict, self.images_directory))
|
|
131
|
+
|
|
129
132
|
if self.rich:
|
|
130
133
|
add_image_to_result(self.quiet, images, time.time(), pgg.sequence_length_over_time(self.dataframe_dict, self.images_directory))
|
|
131
134
|
add_image_to_result(self.quiet, images, time.time(), pgg.phred_score_over_time(self.dataframe_dict, result_dict, self.images_directory))
|
|
132
|
-
if self.is_barcode:
|
|
133
|
-
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_pass(self.dataframe_dict,
|
|
134
|
-
self.barcode_selection,
|
|
135
|
-
self.images_directory))
|
|
136
135
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
136
|
+
if self.is_barcode:
|
|
137
|
+
if "barcode_alias" in self.config_dictionary:
|
|
138
|
+
barcode_alias = self.config_dictionary['barcode_alias']
|
|
139
|
+
else:
|
|
140
|
+
barcode_alias = None
|
|
141
|
+
|
|
142
|
+
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_pass(self.dataframe_dict,
|
|
143
|
+
self.barcode_selection,
|
|
144
|
+
self.images_directory,
|
|
145
|
+
barcode_alias))
|
|
146
|
+
|
|
147
|
+
read_fail = self.dataframe_dict["read.fail.barcoded"]
|
|
148
|
+
if not (len(read_fail) == 1 and read_fail["other barcodes"] == 0):
|
|
149
|
+
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_fail(self.dataframe_dict,
|
|
150
|
+
self.barcode_selection,
|
|
151
|
+
self.images_directory,
|
|
152
|
+
barcode_alias))
|
|
153
|
+
|
|
154
|
+
add_image_to_result(self.quiet, images, time.time(), pgg.barcode_length_boxplot(self.dataframe_dict,
|
|
155
|
+
self.images_directory,
|
|
156
|
+
barcode_alias))
|
|
157
|
+
|
|
158
|
+
add_image_to_result(self.quiet, images, time.time(), pgg.barcoded_phred_score_frequency(self.dataframe_dict,
|
|
159
|
+
self.images_directory,
|
|
160
|
+
barcode_alias))
|
|
148
161
|
return images
|
|
149
162
|
|
|
150
163
|
|
|
@@ -211,7 +224,7 @@ class fastqExtractor:
|
|
|
211
224
|
"pass.reads.sequence.length")
|
|
212
225
|
describe_dict(self, result_dict, self.dataframe_dict["fail.reads.sequence.length"],
|
|
213
226
|
"fail.reads.sequence.length")
|
|
214
|
-
if self.is_barcode:
|
|
227
|
+
if self.rich and self.is_barcode:
|
|
215
228
|
extract_barcode_info(self, result_dict,
|
|
216
229
|
self.barcode_selection,
|
|
217
230
|
self.dataframe_dict,
|
|
@@ -258,8 +271,9 @@ class fastqExtractor:
|
|
|
258
271
|
columns = ['sequence_length', 'mean_qscore', 'passes_filtering']
|
|
259
272
|
if self.rich:
|
|
260
273
|
columns.extend(['start_time', 'channel'])
|
|
261
|
-
|
|
262
|
-
|
|
274
|
+
|
|
275
|
+
if self.is_barcode:
|
|
276
|
+
columns.append('barcode_arrangement')
|
|
263
277
|
|
|
264
278
|
fq_df = pd.DataFrame(fq_df, columns=columns)
|
|
265
279
|
|
|
@@ -271,8 +285,10 @@ class fastqExtractor:
|
|
|
271
285
|
fq_df["start_time"] = fq_df["start_time"] - fq_df["start_time"].min()
|
|
272
286
|
fq_df['start_time'] = fq_df['start_time'].astype(np.float64)
|
|
273
287
|
fq_df['channel'] = fq_df['channel'].astype(np.int16)
|
|
274
|
-
|
|
275
|
-
|
|
288
|
+
|
|
289
|
+
if self.is_barcode:
|
|
290
|
+
fq_df['barcode_arrangement'] = fq_df['barcode_arrangement'].astype("category")
|
|
291
|
+
|
|
276
292
|
return fq_df
|
|
277
293
|
|
|
278
294
|
|
|
@@ -346,8 +362,11 @@ class fastqExtractor:
|
|
|
346
362
|
self.is_barcode = False
|
|
347
363
|
if 'model_version_id' not in metadata:
|
|
348
364
|
metadata['model_version_id'] = 'Unknow'
|
|
365
|
+
run_info = []
|
|
349
366
|
try:
|
|
350
|
-
|
|
367
|
+
sample_id = 'sample_id' if 'sample_id' in metadata else 'sampleid'
|
|
368
|
+
run_id = 'run_id' if 'run_id' in metadata else 'runid'
|
|
369
|
+
return metadata[run_id] , metadata[sample_id] , metadata['model_version_id']
|
|
351
370
|
except:
|
|
352
371
|
return None
|
|
353
372
|
|
|
@@ -356,7 +375,7 @@ class fastqExtractor:
|
|
|
356
375
|
"""
|
|
357
376
|
"""
|
|
358
377
|
metadata = dict(x.split("=") for x in name.split(" ")[1:])
|
|
359
|
-
start_time = timeISO_to_float(metadata['start_time'],
|
|
378
|
+
start_time = timeISO_to_float(metadata['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
|
360
379
|
if self.is_barcode:
|
|
361
380
|
return start_time, metadata['ch'], metadata['barcode']
|
|
362
381
|
return start_time, metadata['ch']
|
|
@@ -352,17 +352,25 @@ def main():
|
|
|
352
352
|
sys.exit("ERROR: dico_path is empty")
|
|
353
353
|
|
|
354
354
|
# Get barcode selection
|
|
355
|
+
allowed_patterns = r'(BC|RB|NB|BP|BARCODE)(\d{2})'
|
|
356
|
+
|
|
355
357
|
if config_dictionary['barcoding'].lower() == 'true':
|
|
356
358
|
config_dictionary['barcode_selection'] = []
|
|
357
359
|
|
|
358
|
-
if '
|
|
360
|
+
if 'samplesheet' in config_dictionary:
|
|
361
|
+
samplesheet = parse_samplesheet(config_dictionary['samplesheet'])
|
|
362
|
+
config_dictionary['barcodes'] = ",".join(list(samplesheet['barcode']))
|
|
363
|
+
config_dictionary['barcode_alias'] = pd.Series(samplesheet.alias.values,
|
|
364
|
+
index=samplesheet.barcode).to_dict()
|
|
365
|
+
|
|
366
|
+
if 'barcodes' in config_dictionary or 'samplesheet' in config_dictionary:
|
|
359
367
|
barcode_set = set()
|
|
360
368
|
if ":" in config_dictionary['barcodes']:
|
|
361
369
|
start, end = config_dictionary['barcodes'].strip().split(':')
|
|
362
|
-
pattern = re.search(
|
|
370
|
+
pattern = re.search(allowed_patterns, start.strip().upper())
|
|
363
371
|
if pattern:
|
|
364
372
|
start_number = int(pattern.group(2))
|
|
365
|
-
pattern = re.search(
|
|
373
|
+
pattern = re.search(allowed_patterns, end.strip().upper())
|
|
366
374
|
if pattern:
|
|
367
375
|
end_number = int(pattern.group(2))
|
|
368
376
|
for i in range(start_number, end_number + 1):
|
|
@@ -371,13 +379,15 @@ def main():
|
|
|
371
379
|
|
|
372
380
|
else:
|
|
373
381
|
for b in config_dictionary['barcodes'].strip().split(','):
|
|
374
|
-
pattern = re.search(
|
|
382
|
+
pattern = re.search(allowed_patterns, b.strip().upper())
|
|
375
383
|
if pattern:
|
|
376
384
|
barcode = 'barcode{}'.format(pattern.group(2))
|
|
377
385
|
barcode_set.add(barcode)
|
|
378
386
|
else:
|
|
379
387
|
sys.stderr.write("\033[93mWarning:\033[0m Barcode '{}' is non-standard custom arrangement.\n".format(b))
|
|
380
388
|
barcode_set.add(b)
|
|
389
|
+
if 'samplesheet' in config_dictionary:
|
|
390
|
+
config_dictionary['barcode_alias'][barcode] = config_dictionary['barcode_alias'].pop(b)
|
|
381
391
|
|
|
382
392
|
barcode_selection = sorted(barcode_set)
|
|
383
393
|
|
|
@@ -385,12 +395,6 @@ def main():
|
|
|
385
395
|
sys.exit("ERROR: No known barcode found in provided list of barcodes")
|
|
386
396
|
config_dictionary['barcode_selection'] = barcode_selection
|
|
387
397
|
|
|
388
|
-
elif 'samplesheet' in config_dictionary:
|
|
389
|
-
samplesheet = parse_samplesheet(config_dictionary['samplesheet'])
|
|
390
|
-
config_dictionary['barcode_selection'] = list(samplesheet['barcode'])
|
|
391
|
-
config_dictionary['barcode_alias'] = pd.Series(samplesheet.alias.values,
|
|
392
|
-
index=samplesheet.barcode).to_dict()
|
|
393
|
-
|
|
394
398
|
else:
|
|
395
399
|
config_dictionary['barcode_selection'] = ''
|
|
396
400
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.7.1'
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '2.7'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|