toulligqc 2.7__tar.gz → 2.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {toulligqc-2.7 → toulligqc-2.7.1}/PKG-INFO +1 -1
  2. {toulligqc-2.7 → toulligqc-2.7.1}/README.md +18 -5
  3. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/common_statistics.py +2 -2
  4. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/extractor_common.py +5 -1
  5. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/fastq_extractor.py +41 -22
  6. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/toulligqc.py +14 -10
  7. toulligqc-2.7.1/toulligqc/version.py +1 -0
  8. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/PKG-INFO +1 -1
  9. toulligqc-2.7/toulligqc/version.py +0 -1
  10. {toulligqc-2.7 → toulligqc-2.7.1}/AUTHORS +0 -0
  11. {toulligqc-2.7 → toulligqc-2.7.1}/LICENSE-CeCILL.txt +0 -0
  12. {toulligqc-2.7 → toulligqc-2.7.1}/LICENSE.txt +0 -0
  13. {toulligqc-2.7 → toulligqc-2.7.1}/MANIFEST.in +0 -0
  14. {toulligqc-2.7 → toulligqc-2.7.1}/setup.cfg +0 -0
  15. {toulligqc-2.7 → toulligqc-2.7.1}/setup.py +0 -0
  16. {toulligqc-2.7 → toulligqc-2.7.1}/test/test_sequencing_summary_extractor.py +0 -0
  17. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/__init__.py +0 -0
  18. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/bam_extractor.py +0 -0
  19. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/common.py +0 -0
  20. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/configuration.py +0 -0
  21. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/fast5_extractor.py +0 -0
  22. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/fastq_bam_common.py +0 -0
  23. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/html_report_generator.py +0 -0
  24. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/plotly_graph_common.py +0 -0
  25. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/plotly_graph_generator.py +0 -0
  26. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
  27. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/pod5_extractor.py +0 -0
  28. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/report_data_file_generator.py +0 -0
  29. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/resources/plotly-latest.min.js +0 -0
  30. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/resources/toulligqc.css +0 -0
  31. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/resources/toulligqc.png +0 -0
  32. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/sequencing_summary_extractor.py +0 -0
  33. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
  34. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/sequencing_telemetry_extractor.py +0 -0
  35. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc/toulligqc_info_extractor.py +0 -0
  36. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/SOURCES.txt +0 -0
  37. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/dependency_links.txt +0 -0
  38. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/entry_points.txt +0 -0
  39. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/not-zip-safe +0 -0
  40. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/requires.txt +0 -0
  41. {toulligqc-2.7 → toulligqc-2.7.1}/toulligqc.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.7
3
+ Version: 2.7.1
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomiqueENS/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -53,18 +53,31 @@ $ cd toulligqc && python3 setup.py build install
53
53
  ToulligQC is written with Python 3.
54
54
  To run ToulligQC without Docker, you need to install the following Python modules:
55
55
 
56
- * matplotlib
57
- * plotly
58
- * h5py
56
+ * matplotlib
57
+ * plotly
58
+ * h5py
59
59
  * pandas
60
60
  * numpy
61
61
  * scipy
62
62
  * scikit-learn
63
63
  * pysam
64
+ * tqdm
65
+ * pod5
64
66
 
67
+ <a name="Conda-environemnt"></a>
68
+ ### 1.2 Conda environemnt**
69
+
70
+ You can use a conda environment to install the required packages:
71
+
72
+ ```
73
+ git clone https://github.com/GenomicParisCentre/toulligQC.git
74
+ cd toulligqc && python3 setup.py build install
75
+ conda env create -f environment.yml
76
+ conda activate toulliqc
77
+ ```
65
78
 
66
79
  <a name="pypi-installation"></a>
67
- ### 1.2 Using a PyPi package
80
+ ### 1.3 Using a PyPi package
68
81
 
69
82
  ToulligQC can be more easlily installed with a pip package availlable on the PyPi repository. The following command line will install the latest version of ToulligQC:
70
83
  ```bash
@@ -72,7 +85,7 @@ $ pip3 install toulligqc
72
85
  ```
73
86
 
74
87
  <a name="docker"></a>
75
- ### 1.3 Using Docker
88
+ ### 1.4 Using Docker
76
89
  ToulligQC and its dependencies are available through a Docker image. To install docker on your system, go to the Docker website (<https://docs.docker.com/engine/installation/>).
77
90
  Even if Docker can run on Windows or macOS virtual machines, we recommend to run ToulligQC on a Linux host.
78
91
  <a name="docker-image-recovery"></a>
@@ -18,7 +18,7 @@ def compute_LXX(dataframe_dict, x):
18
18
  cum_sum = 0
19
19
  count = 0
20
20
  for v in data:
21
- cum_sum += v
21
+ cum_sum += int(v)
22
22
  count += 1
23
23
  if cum_sum >= half_sum:
24
24
  return count
@@ -31,7 +31,7 @@ def compute_NXX(dataframe_dict, x):
31
31
  half_sum = data.sum() * x / 100
32
32
  cum_sum = 0
33
33
  for v in data:
34
- cum_sum += v
34
+ cum_sum += int(v)
35
35
  if cum_sum >= half_sum:
36
36
  return int(v)
37
37
 
@@ -432,7 +432,11 @@ def add_image_to_result(quiet, image_list, start_time, image):
432
432
  def timeISO_to_float(iso_datetime, format):
433
433
  """
434
434
  """
435
- dt = datetime.strptime(iso_datetime, format)
435
+ try:
436
+ dt = datetime.strptime(iso_datetime, format)
437
+ except:
438
+ format = '%Y-%m-%dT%H:%M:%SZ'
439
+ dt = datetime.strptime(iso_datetime, format)
436
440
  unix_timestamp = dt.timestamp()
437
441
  return unix_timestamp
438
442
 
@@ -119,32 +119,45 @@ class fastqExtractor:
119
119
 
120
120
  add_image_to_result(self.quiet, images, time.time(), pgg.read_count_histogram(result_dict, self.images_directory))
121
121
  add_image_to_result(self.quiet, images, time.time(), pgg.read_length_scatterplot(self.dataframe_dict, self.images_directory))
122
+
122
123
  if self.rich:
123
124
  add_image_to_result(self.quiet, images, time.time(), pgg.yield_plot(self.dataframe_1d, self.images_directory))
124
125
  add_image_to_result(self.quiet, images, time.time(), pgg.read_quality_multiboxplot(self.dataframe_dict, self.images_directory))
125
126
  add_image_to_result(self.quiet, images, time.time(), pgg.allphred_score_frequency(self.dataframe_dict, self.images_directory))
127
+
126
128
  if self.rich:
127
129
  add_image_to_result(self.quiet, images, time.time(), pgg.plot_performance(self.dataframe_1d, self.images_directory))
128
130
  add_image_to_result(self.quiet, images, time.time(), pgg.twod_density(self.dataframe_dict, self.images_directory))
131
+
129
132
  if self.rich:
130
133
  add_image_to_result(self.quiet, images, time.time(), pgg.sequence_length_over_time(self.dataframe_dict, self.images_directory))
131
134
  add_image_to_result(self.quiet, images, time.time(), pgg.phred_score_over_time(self.dataframe_dict, result_dict, self.images_directory))
132
- if self.is_barcode:
133
- add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_pass(self.dataframe_dict,
134
- self.barcode_selection,
135
- self.images_directory))
136
135
 
137
- read_fail = self.dataframe_dict["read.fail.barcoded"]
138
- if not (len(read_fail) == 1 and read_fail["other barcodes"] == 0):
139
- add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_fail(self.dataframe_dict,
140
- self.barcode_selection,
141
- self.images_directory))
142
-
143
- add_image_to_result(self.quiet, images, time.time(), pgg.barcode_length_boxplot(self.dataframe_dict,
144
- self.images_directory))
145
-
146
- add_image_to_result(self.quiet, images, time.time(), pgg.barcoded_phred_score_frequency(self.dataframe_dict,
147
- self.images_directory))
136
+ if self.is_barcode:
137
+ if "barcode_alias" in self.config_dictionary:
138
+ barcode_alias = self.config_dictionary['barcode_alias']
139
+ else:
140
+ barcode_alias = None
141
+
142
+ add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_pass(self.dataframe_dict,
143
+ self.barcode_selection,
144
+ self.images_directory,
145
+ barcode_alias))
146
+
147
+ read_fail = self.dataframe_dict["read.fail.barcoded"]
148
+ if not (len(read_fail) == 1 and read_fail["other barcodes"] == 0):
149
+ add_image_to_result(self.quiet, images, time.time(), pgg.barcode_percentage_pie_chart_fail(self.dataframe_dict,
150
+ self.barcode_selection,
151
+ self.images_directory,
152
+ barcode_alias))
153
+
154
+ add_image_to_result(self.quiet, images, time.time(), pgg.barcode_length_boxplot(self.dataframe_dict,
155
+ self.images_directory,
156
+ barcode_alias))
157
+
158
+ add_image_to_result(self.quiet, images, time.time(), pgg.barcoded_phred_score_frequency(self.dataframe_dict,
159
+ self.images_directory,
160
+ barcode_alias))
148
161
  return images
149
162
 
150
163
 
@@ -211,7 +224,7 @@ class fastqExtractor:
211
224
  "pass.reads.sequence.length")
212
225
  describe_dict(self, result_dict, self.dataframe_dict["fail.reads.sequence.length"],
213
226
  "fail.reads.sequence.length")
214
- if self.is_barcode:
227
+ if self.rich and self.is_barcode:
215
228
  extract_barcode_info(self, result_dict,
216
229
  self.barcode_selection,
217
230
  self.dataframe_dict,
@@ -258,8 +271,9 @@ class fastqExtractor:
258
271
  columns = ['sequence_length', 'mean_qscore', 'passes_filtering']
259
272
  if self.rich:
260
273
  columns.extend(['start_time', 'channel'])
261
- if self.is_barcode:
262
- columns.append('barcode_arrangement')
274
+
275
+ if self.is_barcode:
276
+ columns.append('barcode_arrangement')
263
277
 
264
278
  fq_df = pd.DataFrame(fq_df, columns=columns)
265
279
 
@@ -271,8 +285,10 @@ class fastqExtractor:
271
285
  fq_df["start_time"] = fq_df["start_time"] - fq_df["start_time"].min()
272
286
  fq_df['start_time'] = fq_df['start_time'].astype(np.float64)
273
287
  fq_df['channel'] = fq_df['channel'].astype(np.int16)
274
- if self.is_barcode:
275
- fq_df['barcode_arrangement'] = fq_df['barcode_arrangement'].astype("category")
288
+
289
+ if self.is_barcode:
290
+ fq_df['barcode_arrangement'] = fq_df['barcode_arrangement'].astype("category")
291
+
276
292
  return fq_df
277
293
 
278
294
 
@@ -346,8 +362,11 @@ class fastqExtractor:
346
362
  self.is_barcode = False
347
363
  if 'model_version_id' not in metadata:
348
364
  metadata['model_version_id'] = 'Unknow'
365
+ run_info = []
349
366
  try:
350
- return metadata['runid'] , metadata['sampleid'] , metadata['model_version_id']
367
+ sample_id = 'sample_id' if 'sample_id' in metadata else 'sampleid'
368
+ run_id = 'run_id' if 'run_id' in metadata else 'runid'
369
+ return metadata[run_id] , metadata[sample_id] , metadata['model_version_id']
351
370
  except:
352
371
  return None
353
372
 
@@ -356,7 +375,7 @@ class fastqExtractor:
356
375
  """
357
376
  """
358
377
  metadata = dict(x.split("=") for x in name.split(" ")[1:])
359
- start_time = timeISO_to_float(metadata['start_time'], '%Y-%m-%dT%H:%M:%SZ')
378
+ start_time = timeISO_to_float(metadata['start_time'], '%Y-%m-%dT%H:%M:%S.%f%z')
360
379
  if self.is_barcode:
361
380
  return start_time, metadata['ch'], metadata['barcode']
362
381
  return start_time, metadata['ch']
@@ -352,17 +352,25 @@ def main():
352
352
  sys.exit("ERROR: dico_path is empty")
353
353
 
354
354
  # Get barcode selection
355
+ allowed_patterns = r'(BC|RB|NB|BP|BARCODE)(\d{2})'
356
+
355
357
  if config_dictionary['barcoding'].lower() == 'true':
356
358
  config_dictionary['barcode_selection'] = []
357
359
 
358
- if 'barcodes' in config_dictionary:
360
+ if 'samplesheet' in config_dictionary:
361
+ samplesheet = parse_samplesheet(config_dictionary['samplesheet'])
362
+ config_dictionary['barcodes'] = ",".join(list(samplesheet['barcode']))
363
+ config_dictionary['barcode_alias'] = pd.Series(samplesheet.alias.values,
364
+ index=samplesheet.barcode).to_dict()
365
+
366
+ if 'barcodes' in config_dictionary or 'samplesheet' in config_dictionary:
359
367
  barcode_set = set()
360
368
  if ":" in config_dictionary['barcodes']:
361
369
  start, end = config_dictionary['barcodes'].strip().split(':')
362
- pattern = re.search(r'(BC|RB|NB|BP|BARCODE)(\d{2})', start.strip().upper())
370
+ pattern = re.search(allowed_patterns, start.strip().upper())
363
371
  if pattern:
364
372
  start_number = int(pattern.group(2))
365
- pattern = re.search(r'(BC|RB|NB|BP|BARCODE)(\d{2})', end.strip().upper())
373
+ pattern = re.search(allowed_patterns, end.strip().upper())
366
374
  if pattern:
367
375
  end_number = int(pattern.group(2))
368
376
  for i in range(start_number, end_number + 1):
@@ -371,13 +379,15 @@ def main():
371
379
 
372
380
  else:
373
381
  for b in config_dictionary['barcodes'].strip().split(','):
374
- pattern = re.search(r'(BC|RB|NB|BP|BARCODE)(\d{2})', b.strip().upper())
382
+ pattern = re.search(allowed_patterns, b.strip().upper())
375
383
  if pattern:
376
384
  barcode = 'barcode{}'.format(pattern.group(2))
377
385
  barcode_set.add(barcode)
378
386
  else:
379
387
  sys.stderr.write("\033[93mWarning:\033[0m Barcode '{}' is non-standard custom arrangement.\n".format(b))
380
388
  barcode_set.add(b)
389
+ if 'samplesheet' in config_dictionary:
390
+ config_dictionary['barcode_alias'][barcode] = config_dictionary['barcode_alias'].pop(b)
381
391
 
382
392
  barcode_selection = sorted(barcode_set)
383
393
 
@@ -385,12 +395,6 @@ def main():
385
395
  sys.exit("ERROR: No known barcode found in provided list of barcodes")
386
396
  config_dictionary['barcode_selection'] = barcode_selection
387
397
 
388
- elif 'samplesheet' in config_dictionary:
389
- samplesheet = parse_samplesheet(config_dictionary['samplesheet'])
390
- config_dictionary['barcode_selection'] = list(samplesheet['barcode'])
391
- config_dictionary['barcode_alias'] = pd.Series(samplesheet.alias.values,
392
- index=samplesheet.barcode).to_dict()
393
-
394
398
  else:
395
399
  config_dictionary['barcode_selection'] = ''
396
400
 
@@ -0,0 +1 @@
1
+ __version__ = '2.7.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toulligqc
3
- Version: 2.7
3
+ Version: 2.7.1
4
4
  Summary: A post sequencing QC tool for Oxford Nanopore sequencers
5
5
  Home-page: https://github.com/GenomiqueENS/toulligQC
6
6
  Author: Genomic Paris Centre team
@@ -1 +0,0 @@
1
- __version__ = '2.7'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes