virtool-workflow 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. virtool_workflow/__init__.py +13 -0
  2. virtool_workflow/analysis/__init__.py +1 -0
  3. virtool_workflow/analysis/fastqc.py +467 -0
  4. virtool_workflow/analysis/skewer.py +265 -0
  5. virtool_workflow/analysis/trimming.py +56 -0
  6. virtool_workflow/analysis/utils.py +27 -0
  7. virtool_workflow/api/__init__.py +0 -0
  8. virtool_workflow/api/acquire.py +66 -0
  9. virtool_workflow/api/client.py +132 -0
  10. virtool_workflow/api/utils.py +109 -0
  11. virtool_workflow/cli.py +66 -0
  12. virtool_workflow/data/__init__.py +22 -0
  13. virtool_workflow/data/analyses.py +106 -0
  14. virtool_workflow/data/hmms.py +109 -0
  15. virtool_workflow/data/indexes.py +319 -0
  16. virtool_workflow/data/jobs.py +62 -0
  17. virtool_workflow/data/ml.py +82 -0
  18. virtool_workflow/data/samples.py +190 -0
  19. virtool_workflow/data/subtractions.py +244 -0
  20. virtool_workflow/data/uploads.py +35 -0
  21. virtool_workflow/decorators.py +47 -0
  22. virtool_workflow/errors.py +62 -0
  23. virtool_workflow/files.py +40 -0
  24. virtool_workflow/hooks.py +140 -0
  25. virtool_workflow/pytest_plugin/__init__.py +35 -0
  26. virtool_workflow/pytest_plugin/data.py +197 -0
  27. virtool_workflow/pytest_plugin/utils.py +9 -0
  28. virtool_workflow/runtime/__init__.py +0 -0
  29. virtool_workflow/runtime/config.py +21 -0
  30. virtool_workflow/runtime/discover.py +95 -0
  31. virtool_workflow/runtime/events.py +7 -0
  32. virtool_workflow/runtime/hook.py +129 -0
  33. virtool_workflow/runtime/path.py +19 -0
  34. virtool_workflow/runtime/ping.py +54 -0
  35. virtool_workflow/runtime/redis.py +65 -0
  36. virtool_workflow/runtime/run.py +276 -0
  37. virtool_workflow/runtime/run_subprocess.py +168 -0
  38. virtool_workflow/runtime/sentry.py +28 -0
  39. virtool_workflow/utils.py +90 -0
  40. virtool_workflow/workflow.py +90 -0
  41. virtool_workflow-0.0.0.dist-info/LICENSE +21 -0
  42. virtool_workflow-0.0.0.dist-info/METADATA +71 -0
  43. virtool_workflow-0.0.0.dist-info/RECORD +45 -0
  44. virtool_workflow-0.0.0.dist-info/WHEEL +4 -0
  45. virtool_workflow-0.0.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,140 @@
1
+ """Hooks do things when events happen during the workflow lifecycle."""
2
+
3
+ from virtool_workflow.runtime.hook import Hook
4
+
5
+ on_result = Hook("on_result")
6
+ """
7
+ Triggered when a workflow has completed and a result is available.
8
+
9
+ .. code-block:: python
10
+
11
+ @on_result
12
+ async def use_result(results: Dict[str, Any]):
13
+ ...
14
+ """
15
+
16
+ on_step_start = Hook("on_step_start")
17
+ """
18
+ Triggered before each workflow step is executed.
19
+
20
+ .. code-block:: python
21
+
22
+ @on_step_start
23
+ async def use_step():
24
+ ...
25
+ """
26
+
27
+ on_step_finish = Hook("on_step_end")
28
+ """
29
+ Triggered after each workflow step is executed.
30
+
31
+ .. code-block:: python
32
+
33
+ @on_step_finish
34
+ async def handle_step_finish():
35
+ ...
36
+ """
37
+
38
+ on_workflow_start = Hook("on_workflow_start")
39
+ """
40
+ Triggered at the start of the workflow, before any steps are executed.
41
+ """
42
+
43
+ on_success = Hook("on_success")
44
+ """
45
+ Triggered when a job completes successfully.
46
+
47
+ .. code-block:: python
48
+
49
+ @on_success
50
+ async def perform_on_success():
51
+ ...
52
+ """
53
+
54
+ on_cancelled = Hook("on_cancelled")
55
+ """
56
+ Triggered when a job is cancelled.
57
+
58
+ .. code-block:: python
59
+
60
+ @on_cancelled
61
+ async def handle_cancellation():
62
+ ...
63
+ """
64
+
65
+ on_error = Hook("on_error")
66
+ """
67
+ Triggered when a job encounters an exception while running.
68
+
69
+ .. code-block:: python
70
+
71
+ @on_error
72
+ async def handle_error():
73
+ ...
74
+ """
75
+
76
+ on_terminated = Hook("on_terminated")
77
+ """
78
+ Triggered when the workflow process receives a SIGTERM.
79
+
80
+ .. code-block:: python
81
+
82
+ @on_terminated
83
+ def handle_termination():
84
+ ...
85
+ """
86
+
87
+ on_failure = Hook("on_failure")
88
+ """
89
+ Triggered when a job fails to complete.
90
+
91
+ Failure to complete can be caused by: user cancellation, termination by the host, or
92
+ an error during workflow execution.
93
+
94
+ .. code-block:: python
95
+
96
+ @on_failure
97
+ async def handle_failure():
98
+ ...
99
+ """
100
+
101
+ on_finish = Hook("on_finish")
102
+ """
103
+ Triggered when a job completes, success or failure.
104
+
105
+ .. code-block:: python
106
+
107
+ @on_finish
108
+ async def do_something_on_finish():
109
+ ...
110
+ """
111
+
112
+ __all__ = [
113
+ "on_cancelled",
114
+ "on_error",
115
+ "on_failure",
116
+ "on_finish",
117
+ "on_result",
118
+ "on_step_finish",
119
+ "on_step_start",
120
+ "on_success",
121
+ "on_terminated",
122
+ "on_workflow_start",
123
+ ]
124
+
125
+
126
+ def cleanup_builtin_status_hooks() -> None:
127
+ """Clear callbacks for built-in status hooks.
128
+
129
+ This prevents carryover of hooks between tests. Carryover won't be encountered in
130
+ production because workflow processes exit after one run.
131
+
132
+ TODO: Find a better way to isolate hooks to workflow runs.
133
+
134
+ """
135
+ on_step_start.clear()
136
+ on_failure.clear()
137
+ on_cancelled.clear()
138
+ on_success.clear()
139
+ on_error.clear()
140
+ on_terminated.clear()
@@ -0,0 +1,35 @@
1
+ from pathlib import Path
2
+
3
+ import arrow
4
+ import pytest
5
+
6
+ import virtool_workflow.runtime.run_subprocess
7
+ from virtool_workflow.pytest_plugin.data import (
8
+ Data,
9
+ data,
10
+ )
11
+
12
+
13
+ @pytest.fixture()
14
+ def run_subprocess() -> virtool_workflow.runtime.run_subprocess.RunSubprocess:
15
+ return virtool_workflow.runtime.run_subprocess.run_subprocess()
16
+
17
+
18
+ @pytest.fixture()
19
+ def static_datetime():
20
+ return arrow.get(2020, 1, 1, 1, 1, 1).naive
21
+
22
+
23
+ @pytest.fixture()
24
+ def virtool_workflow_example_path() -> Path:
25
+ """The path to example data files for virtool-workflow."""
26
+ return Path(__file__).parent.parent.parent / "example"
27
+
28
+
29
+ __all__ = [
30
+ "data",
31
+ "Data",
32
+ "run_subprocess",
33
+ "static_datetime",
34
+ "virtool_workflow_example_path",
35
+ ]
@@ -0,0 +1,197 @@
1
+ import datetime
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+ from pydantic_factories import ModelFactory, Use
7
+ from virtool.analyses.models import Analysis, AnalysisSample
8
+ from virtool.indexes.models import Index, IndexNested
9
+ from virtool.jobs.models import JobAcquired, JobMinimal, JobPing
10
+ from virtool.ml.models import MLModelRelease
11
+ from virtool.references.models import Reference, ReferenceNested
12
+ from virtool.samples.models import Sample
13
+ from virtool.samples.models_base import SampleNested
14
+ from virtool.subtractions.models import Subtraction, SubtractionFile, SubtractionNested
15
+
16
+ from virtool_workflow.pytest_plugin.utils import SUBTRACTION_FILENAMES
17
+
18
+
19
+ @dataclass
20
+ class Data:
21
+ analysis: Analysis
22
+ """An analysis being populated in the active workflow."""
23
+
24
+ index: Index
25
+ """A finalized index to be used for testing analyses."""
26
+
27
+ new_index: Index
28
+ """An un-finalized index for testing index creation workflows."""
29
+
30
+ job: JobAcquired
31
+ """A fake job."""
32
+
33
+ ml: MLModelRelease | None
34
+ """An ML model release used in the active analysis."""
35
+
36
+ reference: Reference
37
+ """A reference to be used for testing analyses and index creation workflows."""
38
+
39
+ sample: Sample
40
+ """A finalized sample to be used for testing analyses."""
41
+
42
+ new_sample: Sample
43
+ """An un-finalized sample for testing sample creation workflows."""
44
+
45
+ subtraction: Subtraction
46
+ """A finalized subtraction to be used for testing analyses."""
47
+
48
+ new_subtraction: Subtraction
49
+ """An un-finalized subtraction for testing subtraction creation workflows."""
50
+
51
+
52
+ @pytest.fixture()
53
+ def data(
54
+ virtool_workflow_example_path: Path,
55
+ static_datetime: datetime.datetime,
56
+ ) -> Data:
57
+ class AnalysisFactory(ModelFactory):
58
+ __model__ = Analysis
59
+
60
+ created_at = Use(lambda: static_datetime)
61
+ updated_at = Use(lambda: static_datetime)
62
+
63
+ class IndexFactory(ModelFactory[Index]):
64
+ __model__ = Index
65
+
66
+ created_at = Use(lambda: static_datetime)
67
+
68
+ IndexFactory.seed_random(12)
69
+
70
+ class JobFactory(ModelFactory):
71
+ __model__ = JobAcquired
72
+
73
+ created_at = Use(lambda: static_datetime)
74
+ timestamp = Use(lambda: static_datetime)
75
+
76
+ JobFactory.seed_random(55)
77
+
78
+ class MLFactory(ModelFactory):
79
+ __model__ = MLModelRelease
80
+
81
+ created_at = Use(lambda: static_datetime)
82
+ published_at = Use(lambda: static_datetime)
83
+
84
+ class ReferenceFactory(ModelFactory):
85
+ __model__ = Reference
86
+
87
+ created_at = Use(lambda: static_datetime)
88
+
89
+ ReferenceFactory.seed_random(22)
90
+
91
+ class SampleFactory(ModelFactory):
92
+ __model__ = Sample
93
+
94
+ created_at = Use(lambda: static_datetime)
95
+ removed_at = None
96
+ uploaded_at = Use(lambda: static_datetime)
97
+
98
+ SampleFactory.seed_random(5)
99
+
100
+ class SubtractionFactory(ModelFactory):
101
+ __model__ = Subtraction
102
+
103
+ created_at = Use(lambda: static_datetime)
104
+
105
+ job: JobAcquired = JobFactory.build()
106
+
107
+ job.args = {
108
+ "item_id": 1211,
109
+ "resource_id": "foo",
110
+ "test": True,
111
+ }
112
+ job.ping = JobPing(pinged_at=static_datetime)
113
+
114
+ """A finalized sample to be used for testing analyses."""
115
+ sample = SampleFactory.build()
116
+ sample.job = JobMinimal.parse_obj(job)
117
+ sample.artifacts = []
118
+
119
+ # A new sample with the fake job configured as the creation job for the sample.
120
+ new_sample = SampleFactory.build()
121
+
122
+ new_sample_job = JobFactory.build()
123
+ new_sample_job.args["files"] = [
124
+ {
125
+ "id": 1,
126
+ "name": "reads_1.fq.gz",
127
+ "size": 100,
128
+ },
129
+ {
130
+ "id": 2,
131
+ "name": "reads_2.fq.gz",
132
+ "size": 100,
133
+ },
134
+ ]
135
+ new_sample_job.args["sample_id"] = new_sample.id
136
+
137
+ new_sample.artifacts = []
138
+ new_sample.job = JobMinimal.parse_obj(new_sample_job)
139
+ new_sample.quality = None
140
+ new_sample.reads = []
141
+ new_sample.ready = False
142
+
143
+ reference = ReferenceFactory.build()
144
+ reference.targets = []
145
+
146
+ index = IndexFactory.build()
147
+ index.reference = ReferenceNested.parse_obj(reference)
148
+ index.ready = True
149
+
150
+ new_index: Index = IndexFactory.build()
151
+ new_index.reference = ReferenceNested.parse_obj(reference)
152
+ new_index.files = []
153
+ new_index.ready = False
154
+
155
+ ml: MLModelRelease = MLFactory.build()
156
+ ml.ready = True
157
+ ml.model.id = 5
158
+ ml.id = 231
159
+
160
+ subtraction = SubtractionFactory.build()
161
+ subtraction.files = [
162
+ SubtractionFile(
163
+ download_url=f"/subtractions/{subtraction.id}/files/{filename}",
164
+ id=(i + 1),
165
+ name=filename,
166
+ size=(virtool_workflow_example_path / "subtraction" / filename)
167
+ .stat()
168
+ .st_size,
169
+ subtraction=subtraction.id,
170
+ type="bowtie2",
171
+ )
172
+ for i, filename in enumerate(SUBTRACTION_FILENAMES)
173
+ ]
174
+
175
+ new_subtraction = SubtractionFactory.build()
176
+ new_subtraction.files = []
177
+ new_subtraction.ready = False
178
+
179
+ analysis: Analysis = AnalysisFactory.build()
180
+ analysis.sample = AnalysisSample.parse_obj(sample)
181
+ analysis.workflow = "pathoscope_bowtie"
182
+ analysis.index = IndexNested.parse_obj(index)
183
+ analysis.reference = ReferenceNested.parse_obj(reference)
184
+ analysis.subtractions = [SubtractionNested.parse_obj(subtraction)]
185
+
186
+ return Data(
187
+ analysis=analysis,
188
+ index=index,
189
+ new_index=new_index,
190
+ job=job,
191
+ ml=ml,
192
+ reference=reference,
193
+ sample=sample,
194
+ new_sample=new_sample,
195
+ subtraction=subtraction,
196
+ new_subtraction=new_subtraction,
197
+ )
@@ -0,0 +1,9 @@
1
+ SUBTRACTION_FILENAMES = (
2
+ "subtraction.fa.gz",
3
+ "subtraction.1.bt2",
4
+ "subtraction.2.bt2",
5
+ "subtraction.3.bt2",
6
+ "subtraction.4.bt2",
7
+ "subtraction.rev.1.bt2",
8
+ "subtraction.rev.2.bt2",
9
+ )
File without changes
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+
5
+ @dataclass
6
+ class RunConfig:
7
+ """The configuration for a workflow run."""
8
+
9
+ dev: bool
10
+ """Whether the workflow should run in development mode."""
11
+
12
+ jobs_api_connection_string: str
13
+ """The connection string for the jobs API."""
14
+
15
+ mem: int
16
+ """The memory limit for the workflow run."""
17
+ proc: int
18
+ """The number of processors available to the workflow run."""
19
+
20
+ work_path: Path
21
+ """The path to a directory where the workflow can store temporary files."""
@@ -0,0 +1,95 @@
1
+ import sys
2
+ from importlib import import_module
3
+ from importlib.util import module_from_spec, spec_from_file_location
4
+ from pathlib import Path
5
+ from types import ModuleType
6
+
7
+ from structlog import get_logger
8
+
9
+ from virtool_workflow import Workflow
10
+ from virtool_workflow.decorators import collect
11
+
12
+ logger = get_logger("runtime")
13
+
14
+
15
+ def discover_workflow(path: Path) -> Workflow:
16
+ """Find an instance of :class:`.Workflow` in the python module located at the given
17
+ path.
18
+
19
+ :param path: The path to a Python module.
20
+ :return: The first :class:`.Workflow` class in the module.
21
+ :raises ValueError: No workflow definition found.
22
+ """
23
+ module = import_module_from_file(path.name.rstrip(path.suffix), path)
24
+
25
+ try:
26
+ return next(
27
+ attr for attr in module.__dict__.values() if isinstance(attr, Workflow)
28
+ )
29
+ except StopIteration:
30
+ return collect(module)
31
+
32
+
33
+ def load_builtin_fixtures():
34
+ """Load built-in fixtures.
35
+
36
+ This function is called before any fixtures defined in a workflow's
37
+ ``fixtures.py`` file. It is used to provide built-in fixtures that are
38
+ required for the workflow to run.
39
+
40
+ """
41
+ import_module("virtool_workflow.data")
42
+ import_module("virtool_workflow.analysis.fastqc")
43
+ import_module("virtool_workflow.analysis.skewer")
44
+ import_module("virtool_workflow.runtime.run_subprocess")
45
+
46
+
47
+ def load_custom_fixtures():
48
+ """Load fixtures defined by the workflow author in ``fixtures.py``."""
49
+ logger.info("importing fixtures.py")
50
+
51
+ fixtures_path = Path("./fixtures.py")
52
+
53
+ try:
54
+ import_module_from_file(fixtures_path.name.rstrip(".py"), fixtures_path)
55
+ except FileNotFoundError:
56
+ logger.info("could not find fixtures.py")
57
+
58
+
59
+ def load_workflow_from_file() -> Workflow:
60
+ """Load a workflow from a Python file at ``./workflow.py`` and return a :class:`.Workflow` object.
61
+
62
+ :raises FileNotFoundError: If no workflow.py file is found.
63
+ :return: The workflow.
64
+ """
65
+ logger.info("importing workflow.py")
66
+
67
+ try:
68
+ return discover_workflow(Path("./workflow.py"))
69
+ except FileNotFoundError:
70
+ logger.critical("could not find workflow.py")
71
+ sys.exit(1)
72
+
73
+
74
+ def import_module_from_file(module_name: str, path: Path) -> ModuleType:
75
+ """Import a module from a file.
76
+
77
+ The parent directory of `path` will also be added to `sys.path` prior to importing.
78
+ This ensures that modules and packages defined in that directory can be properly
79
+ imported.
80
+
81
+ :param module_name: The module's name.
82
+ :param path: The module's path.
83
+ :returns: The loaded module.
84
+ """
85
+ module_parent = str(path.parent)
86
+ sys.path.append(module_parent)
87
+
88
+ spec = spec_from_file_location(module_name, path)
89
+ if spec is None:
90
+ raise ImportError(f"could not import {path}")
91
+ module = spec.loader.load_module(module_from_spec(spec).__name__)
92
+
93
+ sys.path.remove(module_parent)
94
+
95
+ return module
@@ -0,0 +1,7 @@
1
+ import asyncio
2
+
3
+
4
+ class Events:
5
+ def __init__(self):
6
+ self.cancelled = asyncio.Event()
7
+ self.terminated = asyncio.Event()
@@ -0,0 +1,129 @@
1
+ """The :class:`Hook` class is used to hook into the workflow lifecycle."""
2
+ from __future__ import annotations
3
+
4
+ from asyncio import gather
5
+ from typing import Any, Callable
6
+
7
+ from pyfixtures import FixtureScope
8
+ from structlog import get_logger
9
+
10
+ from virtool_workflow.utils import coerce_to_coroutine_function
11
+
12
+ logger = get_logger("hooks")
13
+
14
+
15
+ class Hook:
16
+ """Used to hook into the workflow lifecycle."""
17
+
18
+ def __init__(self, hook_name: str):
19
+ """A set of functions to be called as a group upon a particular event.
20
+
21
+ The signature of any functions added via :func:`Hook.callback` or
22
+ :func:`Hook.__call__` are validated to match the types provided.
23
+
24
+ :param hook_name: The name of this hook.
25
+ """
26
+ self.name = hook_name
27
+
28
+ self.callbacks = []
29
+
30
+ self.clear = self.callbacks.clear
31
+
32
+ def __call__(self, callback_: Callable = None, until=None, once=False):
33
+ """Add a callback function to this Hook that will be called when the hook is
34
+ triggered.
35
+
36
+ :param callback_: The callback function to register.
37
+ :param until: Don't call the callback after the passed hook has been triggered.
38
+ :param once: Only execute the callback the next time this hook is triggered.
39
+ :return: The passed callback function as a coroutine.
40
+ """
41
+ if once:
42
+ until = self
43
+ if callback_ and not until:
44
+ cb = self._callback(callback_)
45
+ elif callback_ and until:
46
+ cb = self._callback_until(until)(callback_)
47
+ elif until:
48
+ cb = self._callback_until(until)
49
+ else:
50
+ cb = self._callback
51
+
52
+ return cb
53
+
54
+ def _callback(self, callback_: Callable):
55
+ """Register a callback function, skipping parameter validation"""
56
+ callback_ = coerce_to_coroutine_function(callback_)
57
+ self.callbacks.append(callback_)
58
+ return callback_
59
+
60
+ def _callback_until(self, hook_: Hook):
61
+ """Add a callback to this hook and remove it when :func:`hook_` is triggered."""
62
+
63
+ def _temporary_callback(callback_):
64
+ callback_ = self._callback(callback_)
65
+
66
+ @hook_._callback
67
+ def remove_callback():
68
+ self.callbacks.remove(callback_)
69
+ hook_.callbacks.remove(remove_callback)
70
+
71
+ return callback_
72
+
73
+ return _temporary_callback
74
+
75
+ async def trigger(self, scope: FixtureScope, suppress=False, **kwargs) -> list[Any]:
76
+ """Trigger the hook.
77
+
78
+ Bind fixtures from `scope` to each callback function and invoke them.
79
+
80
+ Each callback function registered by :func:`Hook.callback` or
81
+ :func:`Hook.__call__` will be called using the arguments supplied to this
82
+ function.
83
+
84
+ :param scope: the :class:`FixtureScope` to use to bind fixtures
85
+ :param suppress: suppress and log exceptions raised in callbacks
86
+ """
87
+ logger.info("triggering hook", hook=self.name)
88
+
89
+ if "scope" not in scope:
90
+ scope["scope"] = scope
91
+
92
+ async def _bind(callback_: Callable):
93
+ try:
94
+ return await scope.bind(callback_, **kwargs)
95
+ except KeyError as error:
96
+ if suppress:
97
+ logger.exception(error)
98
+ return lambda: None
99
+
100
+ raise error
101
+
102
+ _callbacks = [await _bind(callback) for callback in self.callbacks]
103
+
104
+ return await self._trigger(_callbacks, suppress_errors=suppress)
105
+
106
+ @staticmethod
107
+ async def _trigger(callbacks, *args, suppress_errors=False, **kwargs):
108
+ async def call_callback(callback):
109
+ if suppress_errors:
110
+ try:
111
+ return await callback(*args, **kwargs)
112
+ except Exception:
113
+ logger.exception(
114
+ "encountered exception in hook callback",
115
+ callback=callback.__name__,
116
+ hook=callback.hook.name,
117
+ )
118
+ else:
119
+ return await callback(*args, **kwargs)
120
+
121
+ results = await gather(
122
+ *[call_callback(callback) for callback in callbacks], return_exceptions=True,
123
+ )
124
+
125
+ for error in results:
126
+ if isinstance(error, Exception):
127
+ raise error
128
+
129
+ return results
@@ -0,0 +1,19 @@
1
+ import asyncio
2
+ from contextlib import asynccontextmanager
3
+ from pathlib import Path
4
+ from shutil import rmtree
5
+
6
+ from virtool_workflow.runtime.config import RunConfig
7
+
8
+
9
+ @asynccontextmanager
10
+ async def create_work_path(config: RunConfig) -> Path:
11
+ """A temporary working directory where all workflow files should be written."""
12
+ path = Path(config.work_path).absolute()
13
+
14
+ await asyncio.to_thread(rmtree, path, ignore_errors=True)
15
+ await asyncio.to_thread(path.mkdir, exist_ok=True, parents=True)
16
+
17
+ yield path
18
+
19
+ await asyncio.to_thread(rmtree, path)