toulligqc 2.7__tar.gz → 2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. toulligqc-2.8/.gitignore +238 -0
  2. toulligqc-2.8/CHANGES.md +215 -0
  3. toulligqc-2.8/PKG-INFO +454 -0
  4. {toulligqc-2.7 → toulligqc-2.8}/README.md +73 -22
  5. toulligqc-2.8/pyproject.toml +65 -0
  6. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/common_statistics.py +2 -2
  7. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/extractor_common.py +5 -1
  8. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/fastq_extractor.py +41 -22
  9. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/sequencing_summary_extractor.py +13 -3
  10. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/toulligqc.py +22 -12
  11. toulligqc-2.8/toulligqc/version.py +1 -0
  12. toulligqc-2.7/MANIFEST.in +0 -1
  13. toulligqc-2.7/PKG-INFO +0 -24
  14. toulligqc-2.7/setup.cfg +0 -10
  15. toulligqc-2.7/setup.py +0 -59
  16. toulligqc-2.7/test/test_sequencing_summary_extractor.py +0 -355
  17. toulligqc-2.7/toulligqc/version.py +0 -1
  18. toulligqc-2.7/toulligqc.egg-info/PKG-INFO +0 -24
  19. toulligqc-2.7/toulligqc.egg-info/SOURCES.txt +0 -39
  20. toulligqc-2.7/toulligqc.egg-info/dependency_links.txt +0 -1
  21. toulligqc-2.7/toulligqc.egg-info/entry_points.txt +0 -2
  22. toulligqc-2.7/toulligqc.egg-info/not-zip-safe +0 -1
  23. toulligqc-2.7/toulligqc.egg-info/requires.txt +0 -11
  24. toulligqc-2.7/toulligqc.egg-info/top_level.txt +0 -1
  25. {toulligqc-2.7 → toulligqc-2.8}/AUTHORS +0 -0
  26. {toulligqc-2.7 → toulligqc-2.8}/LICENSE-CeCILL.txt +0 -0
  27. {toulligqc-2.7 → toulligqc-2.8}/LICENSE.txt +0 -0
  28. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/__init__.py +0 -0
  29. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/bam_extractor.py +0 -0
  30. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/common.py +0 -0
  31. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/configuration.py +0 -0
  32. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/fast5_extractor.py +0 -0
  33. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/fastq_bam_common.py +0 -0
  34. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/html_report_generator.py +0 -0
  35. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/plotly_graph_common.py +0 -0
  36. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/plotly_graph_generator.py +0 -0
  37. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/plotly_graph_onedsquare_generator.py +0 -0
  38. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/pod5_extractor.py +0 -0
  39. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/report_data_file_generator.py +0 -0
  40. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/resources/plotly-latest.min.js +0 -0
  41. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/resources/toulligqc.css +0 -0
  42. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/resources/toulligqc.png +0 -0
  43. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/sequencing_summary_onedsquare_extractor.py +0 -0
  44. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/sequencing_telemetry_extractor.py +0 -0
  45. {toulligqc-2.7 → toulligqc-2.8}/toulligqc/toulligqc_info_extractor.py +0 -0
@@ -0,0 +1,238 @@
1
+
2
+ # Created by https://www.gitignore.io/api/git,python,pycharm,visualstudiocode
3
+ # Edit at https://www.gitignore.io/?templates=git,python,pycharm,visualstudiocode
4
+
5
+ ### Git ###
6
+ # Created by git for backups. To disable backups in Git:
7
+ # $ git config --global mergetool.keepBackup false
8
+ *.orig
9
+
10
+ # Created by git when using merge tools for conflicts
11
+ *.BACKUP.*
12
+ *.BASE.*
13
+ *.LOCAL.*
14
+ *.REMOTE.*
15
+ *_BACKUP_*.txt
16
+ *_BASE_*.txt
17
+ *_LOCAL_*.txt
18
+ *_REMOTE_*.txt
19
+
20
+ ### PyCharm ###
21
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
22
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
23
+
24
+ # User-specific stuff
25
+ .idea/**/workspace.xml
26
+ .idea/**/tasks.xml
27
+ .idea/**/usage.statistics.xml
28
+ .idea/**/dictionaries
29
+ .idea/**/shelf
30
+
31
+ # Generated files
32
+ .idea/**/contentModel.xml
33
+
34
+ # Sensitive or high-churn files
35
+ .idea/**/dataSources/
36
+ .idea/**/dataSources.ids
37
+ .idea/**/dataSources.local.xml
38
+ .idea/**/sqlDataSources.xml
39
+ .idea/**/dynamic.xml
40
+ .idea/**/uiDesigner.xml
41
+ .idea/**/dbnavigator.xml
42
+
43
+ # Gradle
44
+ .idea/**/gradle.xml
45
+ .idea/**/libraries
46
+
47
+ # Gradle and Maven with auto-import
48
+ # When using Gradle or Maven with auto-import, you should exclude module files,
49
+ # since they will be recreated, and may cause churn. Uncomment if using
50
+ # auto-import.
51
+ # .idea/modules.xml
52
+ # .idea/*.iml
53
+ # .idea/modules
54
+ # *.iml
55
+ # *.ipr
56
+
57
+ # CMake
58
+ cmake-build-*/
59
+
60
+ # Mongo Explorer plugin
61
+ .idea/**/mongoSettings.xml
62
+
63
+ # File-based project format
64
+ *.iws
65
+
66
+ # IntelliJ
67
+ out/
68
+
69
+ # mpeltonen/sbt-idea plugin
70
+ .idea_modules/
71
+
72
+ # JIRA plugin
73
+ atlassian-ide-plugin.xml
74
+
75
+ # Cursive Clojure plugin
76
+ .idea/replstate.xml
77
+
78
+ # Crashlytics plugin (for Android Studio and IntelliJ)
79
+ com_crashlytics_export_strings.xml
80
+ crashlytics.properties
81
+ crashlytics-build.properties
82
+ fabric.properties
83
+
84
+ # Editor-based Rest Client
85
+ .idea/httpRequests
86
+
87
+ # Android studio 3.1+ serialized cache file
88
+ .idea/caches/build_file_checksums.ser
89
+
90
+ ### PyCharm Patch ###
91
+ # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
92
+
93
+ # *.iml
94
+ # modules.xml
95
+ # .idea/misc.xml
96
+ # *.ipr
97
+
98
+ # Sonarlint plugin
99
+ .idea/**/sonarlint/
100
+
101
+ # SonarQube Plugin
102
+ .idea/**/sonarIssues.xml
103
+
104
+ # Markdown Navigator plugin
105
+ .idea/**/markdown-navigator.xml
106
+ .idea/**/markdown-navigator/
107
+
108
+ ### Python ###
109
+ # Byte-compiled / optimized / DLL files
110
+ __pycache__/
111
+ *.py[cod]
112
+ *$py.class
113
+ # for local use
114
+ __main__.py
115
+
116
+ # C extensions
117
+ *.so
118
+
119
+ # Distribution / packaging
120
+ .Python
121
+ build/
122
+ develop-eggs/
123
+ dist/
124
+ downloads/
125
+ eggs/
126
+ .eggs/
127
+ lib/
128
+ lib64/
129
+ parts/
130
+ sdist/
131
+ var/
132
+ wheels/
133
+ pip-wheel-metadata/
134
+ share/python-wheels/
135
+ *.egg-info/
136
+ .installed.cfg
137
+ *.egg
138
+ MANIFEST
139
+
140
+ # PyInstaller
141
+ # Usually these files are written by a python script from a template
142
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
143
+ *.manifest
144
+ *.spec
145
+
146
+ # Installer logs
147
+ pip-log.txt
148
+ pip-delete-this-directory.txt
149
+
150
+ # Unit test / coverage reports
151
+ htmlcov/
152
+ .tox/
153
+ .nox/
154
+ .coverage
155
+ .coverage.*
156
+ .cache
157
+ nosetests.xml
158
+ coverage.xml
159
+ *.cover
160
+ .hypothesis/
161
+ .pytest_cache/
162
+
163
+ # Translations
164
+ *.mo
165
+ *.pot
166
+
167
+ # Scrapy stuff:
168
+ .scrapy
169
+
170
+ # Sphinx documentation
171
+ docs/_build/
172
+
173
+ # PyBuilder
174
+ target/
175
+
176
+ # pyenv
177
+ # .python-version (commented out as uv uses this file)
178
+
179
+ # uv
180
+ uv.lock
181
+ .venv/
182
+
183
+ # pipenv
184
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
185
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
186
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
187
+ # install all needed dependencies.
188
+ #Pipfile.lock
189
+
190
+ # celery beat schedule file
191
+ celerybeat-schedule
192
+
193
+ # SageMath parsed files
194
+ *.sage.py
195
+
196
+ # Spyder project settings
197
+ .spyderproject
198
+ .spyproject
199
+
200
+ # Rope project settings
201
+ .ropeproject
202
+
203
+ # Mr Developer
204
+ .mr.developer.cfg
205
+ .project
206
+ .pydevproject
207
+
208
+ # mkdocs documentation
209
+ /site
210
+
211
+ # mypy
212
+ .mypy_cache/
213
+ .dmypy.json
214
+ dmypy.json
215
+
216
+ # Pyre type checker
217
+ .pyre/
218
+
219
+ ### VisualStudioCode ###
220
+ .vscode/*
221
+ .vscode/settings.json
222
+ !.vscode/tasks.json
223
+ .vscode/launch.json
224
+ !.vscode/extensions.json
225
+
226
+ ### VisualStudioCode Patch ###
227
+ # Ignore all local history of files
228
+ .history
229
+
230
+ ### Ignore test directory
231
+ test/
232
+
233
+ ### Ignore other files put to test_data/
234
+
235
+ /test_data
236
+
237
+
238
+ # End of https://www.gitignore.io/api/git,python,pycharm,visualstudiocode
@@ -0,0 +1,215 @@
1
+ # Changelog
2
+
3
+ ## 2.8 (2025-02-04)
4
+ * Now use uv to build the project.
5
+ * Demo scripts are now in the GitHub repository.
6
+ * Add a new command line option `--use-aliases-for-barcodes`, to use the "alias" column in sample sheet instead of the "barcode" column.
7
+ * Fix issue #44, the name of the first column of the sequencing summary file has changed with new Nanopore specification.
8
+
9
+ ## 2.7.1 (2024-08-23)
10
+ * Added Conda environment.yml.
11
+ * Fixed bugs:
12
+ * Corrected N50 and L50 computation (issue #28).
13
+ * Recognized the standard barcode arrangement (BC|RB|NB|BP) in the samplesheet (issue #27).
14
+ * Fixed barcoding for the Fastq_extractor (issue #30).
15
+ * Fixed time format for BAM and FASTQ extractors.
16
+
17
+
18
+ ## 2.7 (2024-06-21)
19
+ * Moved to Ubuntu 24.04.
20
+ * Added support for ONT samplesheet.
21
+ * ToulligQC now recognizes the barcode column name as 'barcode'.
22
+ * ToulligQC now supports irregular barcode naming; issue #22.
23
+ * Fixed bugs:
24
+ * Pandas inplace issue for FASTQ and onedsquare summary files.
25
+ * PlotlyJS inclusion for independent image exports.
26
+ * Corrected read/base count histogram.
27
+ * Handled empty lines in FASTQ file.
28
+ * Resolved compression issue in pod5 file.
29
+ * BAM extractor can now deal with any BAM, even if it has not been generated by Dorado.
30
+ * Handled barcode name when it's associated with kit_barcode.
31
+
32
+ ## 2.6 (2024-01-26)
33
+ * Support for the POD5 format.
34
+ * Handling of barcoding in BAM and FASTQ files.
35
+ * Support for Fast5 .tar files without compression.
36
+ * Fixed an issue with BAM tags extraction.
37
+ * Fixed the issue with the 'passes_filtering'-only sequencing summary.
38
+ * Fixed the bug related to numpy.bool with numpy version 1.24 or later.
39
+ * Improved compatibility with pandas 2.0.
40
+
41
+ ## 2.5 (2023-11-03)
42
+ * Fixed error when no failed reads were found (Issue #20).
43
+ * Fixed error when unclassified barcodes were missing.
44
+ * FASTQ and BAM files can now be used instead of the sequencing summary file.
45
+ * Added the ability to specify a barcode range (e.g., `--barcodes barcode01:barcode48`).
46
+
47
+ ## 2.4 (2023-04-26)
48
+ * In over time graphs (read length, PHRED score and translocation speed), now fill the gaps for the 75% and 25% to avoid filling glitch.
49
+ * Fix 2D density plot title style.
50
+ * Fix error when a summary file with barcode information was provided in addition of barcoding files (Issue #17), now the barcode files will be skipped with a warning message in case summary file with barcode information is provided.
51
+ * Add the selected speed and sample frequency of the run in the "Run statistics" table of the ToulligQC report
52
+
53
+ ## 2.3 (2023-03-22)
54
+ * Numpy 1.24 is now supported (thanks to Sean Black).
55
+ * Scatter plot of read length vs PHRED score has been replaced by a 2D density plot.
56
+ * Add bases per barcode distribution graphs.
57
+
58
+ ## 2.2.3 (2022-09-29)
59
+ * Fix error when no Fast5 file is found in a directory provided as argument. Now throw an understandable error message.
60
+
61
+ ## 2.2.2 (2022-08-31)
62
+ * Fix when multiple sequencing summary barcode files were available, the type of the 'barcode_arrangement' column in the dataframe was not correct.
63
+
64
+ ## 2.2.1 (2022-01-05)
65
+ * Generated images were not included in the main HTML report file.
66
+
67
+ ## 2.2 (2022-01-03)
68
+ * Add some flexibility to barcode specification (Thanks to Hunter Cameron).
69
+ * The sequencing_summary.txt and sequencing_telemetry.js files can now be read compressed in gzip or bzip2.
70
+ * Add some log information on stdout for duration of the sequencing summary extractors.
71
+ * Change the format of the duration in log.
72
+ * Now logs memory used by dataframes.
73
+ * Fix Docker image build issue when updating setuptools.
74
+ * QScores and durations are now stored in 32 bits floats instead of 64 bits to reduce memory consumption (≈25% for 1D data).
75
+ * Barcode arrangements are now stored as categories instead of strings.
76
+
77
+ ## 2.1.1 (2021-08-18)
78
+ * Fix issue when barcode list argument contains non existing barcode(s) in input data or when all existing barcodes are used.
79
+
80
+ ## 2.1 (2021-06-28)
81
+ * The channel occupancy of the flowcell graph code has been rewritten to use Plotly. Add all/pass/fail/fail ratio views. The flowcell graph can now also handle Flongle and PromethION flowcels in addition of standard MinION flowcell
82
+ * Add "Sequencing kit" and "Barcode kits" entries in the run statistics table in html report
83
+ * In the distribution of read lengths graph, add buttons to show base count distribution in linear/log modes.
84
+ * Fix scatterplot graph where the default max x-axis value was always the max value for fail reads
85
+ * Fix help links in demo report
86
+ * Fix the name of the "Device and software" and "Run statistics" sections
87
+
88
+ ## 2.0.1 (2021-04-14)
89
+ * In setup.py, set the developement status for ToulligQC as Production/Stable instead of Beta
90
+ * Add MANIFEST.in file to add resources files in PyPi package
91
+ * Fix error with latest versions of NumPy by add a missing int casting
92
+
93
+ ## 2.0 (2021-04-09)
94
+ * Fix duration computation
95
+ * For PHRED score distribution boxplots, remove unnecessary interpolation before creating boxplot
96
+ * Remove duplicated code for 1D and 1D2 in PHRED score distribution graphs
97
+ * Remove duplicated code for 1D and 1D2 in Correlation between read length and PHRED score graphs
98
+ * In read length distribution graphs, add buttons to switch between linear and log scale for xaxis
99
+ * Add minimal qscore threshold in the "Device and software information" report table
100
+ * In 1D/1D2 sequencing summary extractors, now replace NA values for barcode assignment by "unclassified". Print a warning message on console
101
+ * Update unit tests
102
+ * For read count histogram tables, replace "frequency" by "percent"
103
+ * Rename y-axis for "Distribution of read lengths" graphs to "Read count"
104
+ * Add new command line options to finely define output file paths
105
+ * Update the sigma value for gaussian filters when smoothing plots
106
+ * In correlation scatterplots, now ponderate the number of pass/fail spots by the pass/fail ratio when using interpolation
107
+
108
+ ## 2.0b3 (2021-03-22)
109
+ * New CSS for HTML report
110
+ * Add new plots (Read length and PHRED over time, translocation speed...)
111
+ * Enhancement of existing graphs
112
+ * Big refactoring code for sequencing summary file parsing
113
+ * Big refactoring code for creating plots
114
+ * Reduce memory usage and execution time with barcodes
115
+ * Fix Plotly dependency version requirement
116
+ * Add L50 computation
117
+ * Sequence lengths of reads was stored into np.int16 that cannot handle >=32kb reads. Now use np.uint32
118
+ * Add ToulligQC logo in HTML report
119
+ * A telemetry file or Fast5 file is no more required
120
+ * Add new fields in the two first tables of the report: Run ID, operating system and basecalling date
121
+ * Barcode distribution pie charts can now be visualised as histograms
122
+ * In table, float values have now comma separator for thousands
123
+ * Update the yield number format in run statistocs table
124
+ * Update colors in the graphs
125
+ * Add an information link in all the graph titles
126
+
127
+ ## 2.0b2 (2020-11-20)
128
+ * Fix import bug
129
+ * Fix graph names partially hidden in HTML summary element
130
+ * Rewrite help and rename arguments for clarity
131
+ * Create required and optional argument groups
132
+ * Create default values for --report-name and --output command line arguments
133
+ * Update report.html example in Docs with the new version of ToulliQC
134
+ * Create new presentation image for README
135
+
136
+ ## 2.0b1 (2020-11-17)
137
+ * Refactoring of the sequencing_summary_extractor
138
+ * Refactoring of the 1dsqr_sequencing_summary_extractor
139
+ * Many performance improvements (reducing memory usage)
140
+ * Graphs are now made with Plotly
141
+ * Removal of unused options (Albacore log, FASTQ files, configuration file and samplesheet file)
142
+ * N50 information added to report.html
143
+ * Removal of Albacore support
144
+ * Now handle PromethION data
145
+ * Update of required dependencies versions
146
+ * Add unit tests
147
+ * Add new plots (throughput sequencing time)
148
+ * Update graph colors
149
+
150
+ ## 1.3 (2019-11-07)
151
+ * Add a --barcodes option that allow to avoid samplesheet file creation
152
+ * The size of the graphs are now set to 1000x600px
153
+ * Many small fixes in graph generation (remove titles, fix grids and layouts...)
154
+ * In the HTML report, replace the tooltip icon by an unicode character
155
+
156
+ ## 1.2 (2019-07-25)
157
+ * MultiFast5 file can now be used to retrieve run information
158
+ * Reporting other barcodes in the "other" category
159
+ * Gathering information from Telemetry files in the HTML report
160
+
161
+ ## 1.1 (2019-03-21)
162
+ * Add Guppy support for 1D and 1D2
163
+ * Telemetry files generated by Albacore or Guppy can now be used to retrieve run information instead of reading a FAST5 file and the pipeline.log file.
164
+ * Refactoring of the code of the extractors
165
+
166
+ ## 1.0 (2018-10-23)
167
+ * Report.data log file reviewed
168
+
169
+ ## 0.10 (2018-07-18)
170
+ * Add pipeline.log parsing option
171
+
172
+ ## 0.9 (2018-03-21)
173
+ * Fix out of memory error when parsing big FASTQ files. The parsing of FASTQ files is now faster
174
+
175
+
176
+ ## 0.8 (2018-03-14)
177
+ * Fix unexisting import in toulligqc.py
178
+ * Fix the not working "--quiet" option
179
+
180
+
181
+ ## 0.7 (2018-03-14)
182
+ * Fix Dockerfile that used the Ubuntu 17.04 (Ubuntu 17.04 packages repository is no more available)
183
+
184
+
185
+ ## 0.6 (2018-03-12)
186
+ * Update html.report for 1D and 1Dsquare data
187
+ * Fix issue when processing fast5 files directory
188
+ * Add pass/fail filter
189
+ * Add extractor and graphs for 1dsquare analysis
190
+
191
+
192
+ ## 0.5 (2017-11-28)
193
+ * Fix exception when toulligqc was launched with no arguments
194
+ * Remove pypandoc dependency in setup.py
195
+ * Fix issue when checking if directory paths ends with a '/'
196
+ * Fix issue when checking missing arguments
197
+
198
+
199
+ ## 0.4 (2017-11-27)
200
+ * Fix issue with the --version option of ToulligQC
201
+
202
+
203
+ ## 0.3 (2017-11-27)
204
+ * Fix issue with setup.py and pip install
205
+
206
+
207
+ ## 0.2 (2017-11-27)
208
+ * ToulligQC can now handle Albacore 2.0 output
209
+ * The run date is now extracted from a FAST5 file
210
+ * Update setup.cfg for PyPi package submission
211
+ * Update ToulligQC documentation
212
+
213
+
214
+ ## 0.1 (2017-08-30)
215
+ * Initial version