virtool-workflow 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. virtool_workflow-0.0.0/LICENSE +21 -0
  2. virtool_workflow-0.0.0/PKG-INFO +71 -0
  3. virtool_workflow-0.0.0/README.md +48 -0
  4. virtool_workflow-0.0.0/pyproject.toml +87 -0
  5. virtool_workflow-0.0.0/virtool_workflow/__init__.py +13 -0
  6. virtool_workflow-0.0.0/virtool_workflow/analysis/__init__.py +1 -0
  7. virtool_workflow-0.0.0/virtool_workflow/analysis/fastqc.py +467 -0
  8. virtool_workflow-0.0.0/virtool_workflow/analysis/skewer.py +265 -0
  9. virtool_workflow-0.0.0/virtool_workflow/analysis/trimming.py +56 -0
  10. virtool_workflow-0.0.0/virtool_workflow/analysis/utils.py +27 -0
  11. virtool_workflow-0.0.0/virtool_workflow/api/__init__.py +0 -0
  12. virtool_workflow-0.0.0/virtool_workflow/api/acquire.py +66 -0
  13. virtool_workflow-0.0.0/virtool_workflow/api/client.py +132 -0
  14. virtool_workflow-0.0.0/virtool_workflow/api/utils.py +109 -0
  15. virtool_workflow-0.0.0/virtool_workflow/cli.py +66 -0
  16. virtool_workflow-0.0.0/virtool_workflow/data/__init__.py +22 -0
  17. virtool_workflow-0.0.0/virtool_workflow/data/analyses.py +106 -0
  18. virtool_workflow-0.0.0/virtool_workflow/data/hmms.py +109 -0
  19. virtool_workflow-0.0.0/virtool_workflow/data/indexes.py +319 -0
  20. virtool_workflow-0.0.0/virtool_workflow/data/jobs.py +62 -0
  21. virtool_workflow-0.0.0/virtool_workflow/data/ml.py +82 -0
  22. virtool_workflow-0.0.0/virtool_workflow/data/samples.py +190 -0
  23. virtool_workflow-0.0.0/virtool_workflow/data/subtractions.py +244 -0
  24. virtool_workflow-0.0.0/virtool_workflow/data/uploads.py +35 -0
  25. virtool_workflow-0.0.0/virtool_workflow/decorators.py +47 -0
  26. virtool_workflow-0.0.0/virtool_workflow/errors.py +62 -0
  27. virtool_workflow-0.0.0/virtool_workflow/files.py +40 -0
  28. virtool_workflow-0.0.0/virtool_workflow/hooks.py +140 -0
  29. virtool_workflow-0.0.0/virtool_workflow/pytest_plugin/__init__.py +35 -0
  30. virtool_workflow-0.0.0/virtool_workflow/pytest_plugin/data.py +197 -0
  31. virtool_workflow-0.0.0/virtool_workflow/pytest_plugin/utils.py +9 -0
  32. virtool_workflow-0.0.0/virtool_workflow/runtime/__init__.py +0 -0
  33. virtool_workflow-0.0.0/virtool_workflow/runtime/config.py +21 -0
  34. virtool_workflow-0.0.0/virtool_workflow/runtime/discover.py +95 -0
  35. virtool_workflow-0.0.0/virtool_workflow/runtime/events.py +7 -0
  36. virtool_workflow-0.0.0/virtool_workflow/runtime/hook.py +129 -0
  37. virtool_workflow-0.0.0/virtool_workflow/runtime/path.py +19 -0
  38. virtool_workflow-0.0.0/virtool_workflow/runtime/ping.py +54 -0
  39. virtool_workflow-0.0.0/virtool_workflow/runtime/redis.py +65 -0
  40. virtool_workflow-0.0.0/virtool_workflow/runtime/run.py +276 -0
  41. virtool_workflow-0.0.0/virtool_workflow/runtime/run_subprocess.py +168 -0
  42. virtool_workflow-0.0.0/virtool_workflow/runtime/sentry.py +28 -0
  43. virtool_workflow-0.0.0/virtool_workflow/utils.py +90 -0
  44. virtool_workflow-0.0.0/virtool_workflow/workflow.py +90 -0
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Canadian Food Inspection Agency
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.3
2
+ Name: virtool-workflow
3
+ Version: 0.0.0
4
+ Summary: A framework for developing bioinformatics workflows for Virtool.
5
+ License: MIT
6
+ Author: Ian Boyes
7
+ Maintainer: Ian Boyes
8
+ Requires-Python: >=3.12.3,<3.13.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Requires-Dist: aiofiles (>=0.7.0,<0.8.0)
12
+ Requires-Dist: aiohttp (>=3.8.1,<4.0.0)
13
+ Requires-Dist: biopython (>=1.81,<2.0)
14
+ Requires-Dist: click (>=8.1.7,<9.0.0)
15
+ Requires-Dist: orjson (>=3.9.9,<4.0.0)
16
+ Requires-Dist: pydantic-factories (>=1.17.3,<2.0.0)
17
+ Requires-Dist: pyfixtures (>=1.0.0,<2.0.0)
18
+ Requires-Dist: sentry-sdk (>=2.3.1,<3.0.0)
19
+ Requires-Dist: structlog-sentry (>=2.2.1,<3.0.0)
20
+ Requires-Dist: virtool (>=31.1.3,<32.0.0)
21
+ Description-Content-Type: text/markdown
22
+
23
+ # Virtool Workflow
24
+
25
+ ![Tests](https://github.com/virtool/virtool-workflow/workflows/ci/badge.svg?branch=main)
26
+ [![PyPI version](https://badge.fury.io/py/virtool-workflow.svg)](https://badge.fury.io/py/virtool-workflow)
27
+
28
+ A framework for developing bioinformatic workflows in Python.
29
+
30
+ ```python
31
+ from virtool_workflow import step
32
+
33
+
34
+ @step
35
+ def step_function():
36
+ ...
37
+
38
+
39
+ @step
40
+ def step_function_2():
41
+ ...
42
+ ```
43
+
44
+ ## Contributing
45
+
46
+ ### Commits
47
+
48
+ We require specific commit formatting. Any commit that does not follow the guidelines
49
+ will be squashed at our discretion.
50
+
51
+ Read our [commit and release](https://dev.virtool.ca/en/latest/commits_releases.html)
52
+ documentation for more information.
53
+
54
+ ### Tests
55
+
56
+ Run tests with:
57
+
58
+ ```shell
59
+ # Bring up Redis and the test container.
60
+ docker compose up -d
61
+
62
+ # Run tests in the test container.
63
+ docker compose exec test poetry run pytest
64
+ ```
65
+
66
+ Run specific tests like:
67
+
68
+ ```shell
69
+ docker compose exec test poetry run pytest tests/test_status.py
70
+ ```
71
+
@@ -0,0 +1,48 @@
1
+ # Virtool Workflow
2
+
3
+ ![Tests](https://github.com/virtool/virtool-workflow/workflows/ci/badge.svg?branch=main)
4
+ [![PyPI version](https://badge.fury.io/py/virtool-workflow.svg)](https://badge.fury.io/py/virtool-workflow)
5
+
6
+ A framework for developing bioinformatic workflows in Python.
7
+
8
+ ```python
9
+ from virtool_workflow import step
10
+
11
+
12
+ @step
13
+ def step_function():
14
+ ...
15
+
16
+
17
+ @step
18
+ def step_function_2():
19
+ ...
20
+ ```
21
+
22
+ ## Contributing
23
+
24
+ ### Commits
25
+
26
+ We require specific commit formatting. Any commit that does not follow the guidelines
27
+ will be squashed at our discretion.
28
+
29
+ Read our [commit and release](https://dev.virtool.ca/en/latest/commits_releases.html)
30
+ documentation for more information.
31
+
32
+ ### Tests
33
+
34
+ Run tests with:
35
+
36
+ ```shell
37
+ # Bring up Redis and the test container.
38
+ docker compose up -d
39
+
40
+ # Run tests in the test container.
41
+ docker compose exec test poetry run pytest
42
+ ```
43
+
44
+ Run specific tests like:
45
+
46
+ ```shell
47
+ docker compose exec test poetry run pytest tests/test_status.py
48
+ ```
@@ -0,0 +1,87 @@
1
+ [project]
2
+ name = "virtool-workflow"
3
+ description = "A framework for developing bioinformatics workflows for Virtool."
4
+ version = "0.0.0"
5
+ authors = [
6
+ {name = "Ian Boyes"},
7
+ {name = "Reece Hoffman"},
8
+ {name = "Blake Smith"},
9
+ {name = "Ryan Fang"},
10
+ {name = "Matt Curtis"},
11
+ {name = "Aman Monga"},
12
+ {name = "Bryce Davidson"},
13
+ {name = "Christine Wong Chong"},
14
+ {name = "Lilly Roberts"},
15
+ {name = "Markus Swoveland"},
16
+ {name = "Ryan Fang"},
17
+ ]
18
+ dynamic = ["dependencies"]
19
+ license = "MIT"
20
+ maintainers = [
21
+ {name = "Ian Boyes"},
22
+ {name = "Reece Hoffman"},
23
+ ]
24
+ readme = "README.md"
25
+ repository = "https://github.com/virtool/virtool-workflow"
26
+ requires-python = ">=3.12.3,<3.13.0"
27
+
28
+ [project.scripts]
29
+ run-workflow = "virtool_workflow.cli:cli_main"
30
+
31
+ [tool.poetry]
32
+ packages = [
33
+ { include = "virtool_workflow" },
34
+ ]
35
+
36
+ [tool.poetry.dependencies]
37
+ aiofiles = "^0.7.0"
38
+ aiohttp = "^3.8.1"
39
+ biopython = "^1.81"
40
+ click = "^8.1.7"
41
+ orjson = "^3.9.9"
42
+ pydantic-factories = "^1.17.3"
43
+ pyfixtures = "^1.0.0"
44
+ sentry-sdk = "^2.3.1"
45
+ structlog-sentry = "^2.2.1"
46
+ virtool = "^31.1.3"
47
+
48
+ [tool.poetry.group.dev.dependencies]
49
+ piccolo-theme = "^0.24.0"
50
+ pytest = "^7.4.2"
51
+ pytest-asyncio = "^0.21.0"
52
+ pytest-mock = "^3.10.0"
53
+ pytest-structlog = "^1.0"
54
+ ruff = "^0.4.6"
55
+ sphinx = "^4.0.2"
56
+ sphinx-autobuild = "^2021.3.14"
57
+ sphinx-nameko-theme = "^0.0.3"
58
+ sphinx-toolbox = "^3.5.0"
59
+ syrupy = "^3.0.5"
60
+ pytest-aiohttp = "^1.1.0"
61
+
62
+ [tool.pytest.ini_options]
63
+ asyncio_mode = "auto"
64
+
65
+ [tool.ruff]
66
+ exclude = [
67
+ "docs",
68
+ ".eggs",
69
+ ".git",
70
+ ".github",
71
+ ".mypy_cache",
72
+ ".pytest-cache",
73
+ ".ruff_cache",
74
+ "__pypackages__",
75
+ ]
76
+
77
+ [tool.ruff.lint]
78
+ ignore = [
79
+ "ANN101",
80
+ "D203",
81
+ "D213"
82
+ ]
83
+ select = ["ALL"]
84
+
85
+ [build-system]
86
+ requires = ["poetry-core>=1.0.0"]
87
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,13 @@
1
+ """
2
+ A framework for defining Virtool workflows.
3
+ """
4
+ from virtool_workflow.decorators import step
5
+ from virtool_workflow.runtime.run_subprocess import RunSubprocess
6
+ from virtool_workflow.workflow import Workflow, WorkflowStep
7
+
8
+ __all__ = [
9
+ "step",
10
+ "RunSubprocess",
11
+ "Workflow",
12
+ "WorkflowStep",
13
+ ]
@@ -0,0 +1 @@
1
+ """Tools for workflows relating to sequence analysis."""
@@ -0,0 +1,467 @@
1
+ """Utilities and fixtures for running FastQC."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import shutil
6
+ import statistics
7
+ import tempfile
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import IO, Protocol, TextIO
11
+
12
+ from pyfixtures import fixture
13
+
14
+ from virtool_workflow import RunSubprocess
15
+ from virtool_workflow.analysis.utils import ReadPaths
16
+
17
+
18
+ @dataclass
19
+ class NucleotidePoint:
20
+ g: float
21
+ a: float
22
+ t: float
23
+ c: float
24
+
25
+
26
+ @dataclass
27
+ class QualityPoint:
28
+ mean: float
29
+ median: float
30
+ lower_quartile: float
31
+ upper_quartile: float
32
+ tenth_percentile: float
33
+ ninetieth_percentile: float
34
+
35
+
36
+ class BaseQualityParser:
37
+ """Parse the section of FastQC output containing per-base quality data."""
38
+
39
+ pattern = ">>Per base sequence quality"
40
+
41
+ def __init__(self):
42
+ self.data: list[QualityPoint] = []
43
+
44
+ def composite(self, parser: BaseQualityParser):
45
+ p = BaseQualityParser()
46
+
47
+ p.data = [
48
+ QualityPoint(
49
+ mean=statistics.mean([this.mean, other.mean]),
50
+ median=statistics.mean([this.median, other.median]),
51
+ lower_quartile=statistics.mean(
52
+ [this.lower_quartile, other.lower_quartile],
53
+ ),
54
+ upper_quartile=statistics.mean(
55
+ [this.upper_quartile, other.upper_quartile],
56
+ ),
57
+ tenth_percentile=statistics.mean(
58
+ [this.tenth_percentile, other.tenth_percentile],
59
+ ),
60
+ ninetieth_percentile=statistics.mean(
61
+ [this.ninetieth_percentile, other.ninetieth_percentile],
62
+ ),
63
+ )
64
+ for this, other in zip(self.data, parser.data)
65
+ ]
66
+
67
+ return p
68
+
69
+ def handle(self, f: TextIO):
70
+ max_index = -1
71
+
72
+ while True:
73
+ line = f.readline().rstrip()
74
+
75
+ if line == ">>END_MODULE":
76
+ break
77
+
78
+ if not line or line[0] == "#":
79
+ continue
80
+
81
+ split = line.split()
82
+
83
+ # Convert all fields except first to 2-decimal floats.
84
+ try:
85
+ values = [float(value) for value in split[1:]]
86
+ except ValueError as err:
87
+ if "NaN" not in str(err):
88
+ raise
89
+
90
+ values = _handle_base_quality_nan(split)
91
+
92
+ (
93
+ mean,
94
+ median,
95
+ lower_quartile,
96
+ upper_quartile,
97
+ tenth_percentile,
98
+ ninetieth_percentile,
99
+ ) = values
100
+
101
+ indexes = _calculate_index_range(split[0])
102
+
103
+ for i in indexes:
104
+ self.data.append(
105
+ QualityPoint(
106
+ mean=mean,
107
+ median=median,
108
+ lower_quartile=lower_quartile,
109
+ upper_quartile=upper_quartile,
110
+ tenth_percentile=tenth_percentile,
111
+ ninetieth_percentile=ninetieth_percentile,
112
+ ),
113
+ )
114
+
115
+ if i - max_index != 1:
116
+ raise ValueError("Non-contiguous index")
117
+
118
+ max_index = i
119
+
120
+
121
+ class BasicStatisticsParser:
122
+ """Parse the section of FastQC output containing basic statistics."""
123
+
124
+ pattern = ">>Basic Statistics"
125
+
126
+ def __init__(self):
127
+ self.count = 0
128
+ self.encoding = None
129
+ self.gc = None
130
+ self.length = None
131
+
132
+ self._populated = False
133
+
134
+ def composite(self, parser: BasicStatisticsParser):
135
+ p = BasicStatisticsParser()
136
+
137
+ p.count = self.count + parser.count
138
+ p.encoding = self.encoding
139
+ p.gc = (self.gc + parser.gc) / 2
140
+ p.length = [
141
+ min(self.length + parser.length),
142
+ max(self.length + parser.length),
143
+ ]
144
+
145
+ return p
146
+
147
+ def handle(self, f: IO):
148
+ while True:
149
+ line = f.readline().rstrip()
150
+
151
+ if line.startswith("#"):
152
+ continue
153
+
154
+ if line == ">>END_MODULE":
155
+ break
156
+
157
+ if "Total Sequences" in line:
158
+ self.count = int(line.split("\t")[1])
159
+
160
+ elif "Encoding" in line:
161
+ self.encoding = line.split("\t")[1]
162
+
163
+ elif "Sequence length" in line:
164
+ length_range = [int(s) for s in line.split("\t")[1].split("-")]
165
+ self.length = [min(length_range), max(length_range)]
166
+
167
+ # GC-content
168
+ elif "%GC" in line and "#" not in line:
169
+ self.gc = float(line.split("\t")[1])
170
+
171
+
172
+ class NucleotideCompositionParser:
173
+ """Parse the section of FastQC output containing per-base nucleotide composition."""
174
+
175
+ pattern = ">>Per base sequence content"
176
+
177
+ def __init__(self):
178
+ self.data: list[NucleotidePoint] = []
179
+
180
+ def composite(self, parser: NucleotideCompositionParser):
181
+ """Make a composite dataset given another :class:`.NucleotideCompositionParser`."""
182
+ p = NucleotideCompositionParser()
183
+
184
+ p.data = [
185
+ NucleotidePoint(
186
+ g=(this.g + other.g) / 2,
187
+ a=(this.a + other.a) / 2,
188
+ t=(this.t + other.t) / 2,
189
+ c=(this.c + other.c) / 2,
190
+ )
191
+ for this, other in zip(self.data, parser.data)
192
+ ]
193
+
194
+ return p
195
+
196
+ def handle(self, f: TextIO):
197
+ max_index = -1
198
+
199
+ while True:
200
+ line = f.readline().rstrip()
201
+
202
+ if line == ">>END_MODULE":
203
+ break
204
+
205
+ if not line or line[0] == "#":
206
+ continue
207
+
208
+ split = line.split()
209
+
210
+ try:
211
+ g, a, t, c = (float(value) for value in split[1:])
212
+ except ValueError as err:
213
+ if "NaN" not in str(err):
214
+ raise
215
+
216
+ g, a, t, c = _handle_base_quality_nan(split)
217
+
218
+ indexes = _calculate_index_range(split[0])
219
+
220
+ for i in indexes:
221
+ self.data.append(NucleotidePoint(g, a, t, c))
222
+
223
+ if i - max_index != 1:
224
+ raise ValueError("Non-contiguous index")
225
+
226
+ max_index = i
227
+
228
+
229
+ class SequenceQualityParser:
230
+ """Parse the section of FastQC output containing per-sequence quality data."""
231
+
232
+ pattern = ">>Per sequence quality scores"
233
+
234
+ def __init__(self):
235
+ self.data = [0] * 50
236
+
237
+ def composite(self, parser: SequenceQualityParser):
238
+ p = SequenceQualityParser()
239
+ p.data = [sum(both) for both in zip(self.data, parser.data)]
240
+
241
+ return p
242
+
243
+ def handle(self, f: TextIO):
244
+ while True:
245
+ line = f.readline().rstrip()
246
+
247
+ if not line or line.startswith("#"):
248
+ continue
249
+
250
+ if line == ">>END_MODULE":
251
+ break
252
+
253
+ line = line.split()
254
+
255
+ quality = int(line[0])
256
+ count = int(float(line[1]))
257
+
258
+ self.data[quality] = count
259
+
260
+
261
+ @dataclass
262
+ class FastQCSide:
263
+ base_quality: BaseQualityParser
264
+ basic_statistics: BasicStatisticsParser
265
+ nucleotide_composition: NucleotideCompositionParser
266
+ sequence_quality: SequenceQualityParser
267
+
268
+
269
+ def _calculate_index_range(base: str) -> range:
270
+ pos = [int(x) for x in base.split("-")]
271
+
272
+ if len(pos) > 1:
273
+ return range(pos[0] - 1, pos[1])
274
+
275
+ return range(pos[0] - 1, pos[0])
276
+
277
+
278
+ def _handle_base_quality_nan(split_line: list) -> list:
279
+ """Parse a per-base quality line from FastQC containing NaN values.
280
+
281
+ :param split_line: the quality line split into a :class:`.List`
282
+ :return: replacement values
283
+
284
+ """
285
+ values = split_line[1:]
286
+
287
+ for value in values:
288
+ try:
289
+ return [value for _ in values]
290
+ except ValueError:
291
+ pass
292
+
293
+ # Return all zeroes if none of the quality values are numbers.
294
+ if set(values) == {"NaN"}:
295
+ return [0] * 4
296
+
297
+ joined = ",".join(split_line)
298
+
299
+ raise ValueError(f"Could not parse base quality values '{joined}'")
300
+
301
+
302
+ def _parse_fastqc(fastqc_path: Path, output_path: Path) -> dict:
303
+ """Parse the FastQC results at `fastqc_path`.
304
+
305
+ All FastQC data except the textual data file are removed.
306
+
307
+ :param fastqc_path: the FastQC output data path
308
+ :param sample_path: the FastQC text output file will be moved here
309
+ :return: a dict containing a representation of the parsed FastQC data
310
+
311
+ """
312
+ output_path.mkdir(exist_ok=True, parents=True)
313
+
314
+ sides = []
315
+
316
+ # Get the text data files from the FastQC output
317
+ for path in fastqc_path.iterdir():
318
+ if not path.is_dir():
319
+ continue
320
+
321
+ for file_path in path.iterdir():
322
+ if file_path.name != "fastqc_data.txt":
323
+ continue
324
+
325
+ new_path = output_path / f"{path.name}.txt"
326
+
327
+ shutil.move(file_path, new_path)
328
+
329
+ base_quality = BaseQualityParser()
330
+ basic_statistics = BasicStatisticsParser()
331
+ nucleotide_composition = NucleotideCompositionParser()
332
+ sequence_quality = SequenceQualityParser()
333
+
334
+ with open(new_path) as f:
335
+ while True:
336
+ line = f.readline()
337
+
338
+ if not line:
339
+ break
340
+
341
+ if basic_statistics.pattern in line:
342
+ basic_statistics.handle(f)
343
+
344
+ if base_quality.pattern in line:
345
+ base_quality.handle(f)
346
+
347
+ if nucleotide_composition.pattern in line:
348
+ nucleotide_composition.handle(f)
349
+
350
+ if SequenceQualityParser.pattern in line:
351
+ sequence_quality.handle(f)
352
+
353
+ sides.append(
354
+ FastQCSide(
355
+ base_quality=base_quality,
356
+ basic_statistics=basic_statistics,
357
+ nucleotide_composition=nucleotide_composition,
358
+ sequence_quality=sequence_quality,
359
+ ),
360
+ )
361
+
362
+ if len(sides) == 1:
363
+ left = sides[0]
364
+
365
+ return {
366
+ "bases": [
367
+ [
368
+ round(n, 3)
369
+ for n in [
370
+ point.mean,
371
+ point.median,
372
+ point.lower_quartile,
373
+ point.upper_quartile,
374
+ point.tenth_percentile,
375
+ point.ninetieth_percentile,
376
+ ]
377
+ ]
378
+ for point in left.base_quality.data
379
+ ],
380
+ "composition": [
381
+ [round(n, 1) for n in [point.g, point.a, point.t, point.c]]
382
+ for point in left.nucleotide_composition.data
383
+ ],
384
+ "count": left.basic_statistics.count,
385
+ "encoding": left.basic_statistics.encoding,
386
+ "gc": left.basic_statistics.gc,
387
+ "length": left.basic_statistics.length,
388
+ "sequences": left.sequence_quality.data,
389
+ }
390
+
391
+ left, right = sides
392
+
393
+ basic = left.basic_statistics.composite(right.basic_statistics)
394
+
395
+ return {
396
+ "bases": [
397
+ [
398
+ round(n, 3)
399
+ for n in [
400
+ point.mean,
401
+ point.median,
402
+ point.lower_quartile,
403
+ point.upper_quartile,
404
+ point.tenth_percentile,
405
+ point.ninetieth_percentile,
406
+ ]
407
+ ]
408
+ for point in left.base_quality.composite(right.base_quality).data
409
+ ],
410
+ "composition": [
411
+ [round(n, 1) for n in [point.g, point.a, point.t, point.c]]
412
+ for point in left.nucleotide_composition.composite(
413
+ right.nucleotide_composition,
414
+ ).data
415
+ ],
416
+ "count": basic.count,
417
+ "length": basic.length,
418
+ "encoding": left.basic_statistics.encoding,
419
+ "gc": basic.gc,
420
+ "sequences": left.sequence_quality.composite(right.sequence_quality).data,
421
+ }
422
+
423
+
424
+ class FastQCRunner(Protocol):
425
+ """A protocol describing callables that can be used to run FastQC."""
426
+
427
+ async def __call__(self, paths: ReadPaths, output_path: Path) -> dict:
428
+ ...
429
+
430
+
431
+ @fixture
432
+ async def fastqc(run_subprocess: RunSubprocess):
433
+ """Provides an asynchronous function that can run FastQC as a subprocess.
434
+
435
+ The function takes a one or two paths to FASTQ read files (:class:`.ReadPaths`) in
436
+ a tuple.
437
+
438
+ Example:
439
+ -------
440
+ .. code-block:: python
441
+
442
+ @step
443
+ async def step_one(fastqc: FastQCRunner, work_path: Path):
444
+ fastqc_result = await fastqc((
445
+ work_path / "reads_1.fq",
446
+ work_path / "reads_2.fq"
447
+ ))
448
+
449
+ """
450
+ temp_path = Path(await asyncio.to_thread(tempfile.mkdtemp))
451
+
452
+ async def func(paths: ReadPaths, output_path: Path) -> dict:
453
+ command = [
454
+ "fastqc",
455
+ "-f",
456
+ "fastq",
457
+ "-o",
458
+ str(temp_path),
459
+ "--extract",
460
+ *[str(path) for path in paths],
461
+ ]
462
+
463
+ await run_subprocess(command)
464
+
465
+ return _parse_fastqc(temp_path, output_path)
466
+
467
+ return func