PyPI - virtool-workflow - Versions diffs - 0.0.0__py3-none-any.whl - Mend

virtool-workflow 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

virtool_workflow/__init__.py +13 -0
virtool_workflow/analysis/__init__.py +1 -0
virtool_workflow/analysis/fastqc.py +467 -0
virtool_workflow/analysis/skewer.py +265 -0
virtool_workflow/analysis/trimming.py +56 -0
virtool_workflow/analysis/utils.py +27 -0
virtool_workflow/api/__init__.py +0 -0
virtool_workflow/api/acquire.py +66 -0
virtool_workflow/api/client.py +132 -0
virtool_workflow/api/utils.py +109 -0
virtool_workflow/cli.py +66 -0
virtool_workflow/data/__init__.py +22 -0
virtool_workflow/data/analyses.py +106 -0
virtool_workflow/data/hmms.py +109 -0
virtool_workflow/data/indexes.py +319 -0
virtool_workflow/data/jobs.py +62 -0
virtool_workflow/data/ml.py +82 -0
virtool_workflow/data/samples.py +190 -0
virtool_workflow/data/subtractions.py +244 -0
virtool_workflow/data/uploads.py +35 -0
virtool_workflow/decorators.py +47 -0
virtool_workflow/errors.py +62 -0
virtool_workflow/files.py +40 -0
virtool_workflow/hooks.py +140 -0
virtool_workflow/pytest_plugin/__init__.py +35 -0
virtool_workflow/pytest_plugin/data.py +197 -0
virtool_workflow/pytest_plugin/utils.py +9 -0
virtool_workflow/runtime/__init__.py +0 -0
virtool_workflow/runtime/config.py +21 -0
virtool_workflow/runtime/discover.py +95 -0
virtool_workflow/runtime/events.py +7 -0
virtool_workflow/runtime/hook.py +129 -0
virtool_workflow/runtime/path.py +19 -0
virtool_workflow/runtime/ping.py +54 -0
virtool_workflow/runtime/redis.py +65 -0
virtool_workflow/runtime/run.py +276 -0
virtool_workflow/runtime/run_subprocess.py +168 -0
virtool_workflow/runtime/sentry.py +28 -0
virtool_workflow/utils.py +90 -0
virtool_workflow/workflow.py +90 -0
virtool_workflow-0.0.0.dist-info/LICENSE +21 -0
virtool_workflow-0.0.0.dist-info/METADATA +71 -0
virtool_workflow-0.0.0.dist-info/RECORD +45 -0
virtool_workflow-0.0.0.dist-info/WHEEL +4 -0
virtool_workflow-0.0.0.dist-info/entry_points.txt +3 -0

virtool_workflow/hooks.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Hooks do things when events happen during the workflow lifecycle."""
+from virtool_workflow.runtime.hook import Hook
+on_result = Hook("on_result")
+"""
+Triggered when a workflow has completed and a result is available.
+.. code-block:: python
+    @on_result
+    async def use_result(results: Dict[str, Any]):
+        ...
+"""
+on_step_start = Hook("on_step_start")
+"""
+Triggered before each workflow step is executed.
+.. code-block:: python
+    @on_step_start
+    async def use_step():
+        ...
+"""
+on_step_finish = Hook("on_step_end")
+"""
+Triggered after each workflow step is executed.
+.. code-block:: python
+    @on_step_finish
+    async def handle_step_finish():
+    ...
+"""
+on_workflow_start = Hook("on_workflow_start")
+"""
+Triggered at the start of the workflow, before any steps are executed.
+"""
+on_success = Hook("on_success")
+"""
+Triggered when a job completes successfully.
+.. code-block:: python
+    @on_success
+    async def perform_on_success():
+        ...
+"""
+on_cancelled = Hook("on_cancelled")
+"""
+Triggered when a job is cancelled.
+.. code-block:: python
+    @on_cancelled
+    async def handle_cancellation():
+        ...
+"""
+on_error = Hook("on_error")
+"""
+Triggered when a job encounters an exception while running.
+.. code-block:: python
+    @on_error
+    async def handle_error():
+        ...
+"""
+on_terminated = Hook("on_terminated")
+"""
+Triggered when the workflow process receives a SIGTERM.
+.. code-block:: python
+    @on_terminated
+    def handle_termination():
+        ...
+"""
+on_failure = Hook("on_failure")
+"""
+Triggered when a job fails to complete.
+Failure to complete can be caused by: user cancellation, termination by the host, or
+an error during workflow execution.
+.. code-block:: python
+    @on_failure
+    async def handle_failure():
+        ...
+"""
+on_finish = Hook("on_finish")
+"""
+Triggered when a job completes, success or failure.
+.. code-block:: python
+    @on_finish
+    async def do_something_on_finish():
+        ...
+"""
+__all__ = [
+    "on_cancelled",
+    "on_error",
+    "on_failure",
+    "on_finish",
+    "on_result",
+    "on_step_finish",
+    "on_step_start",
+    "on_success",
+    "on_terminated",
+    "on_workflow_start",
+]
+def cleanup_builtin_status_hooks() -> None:
+    """Clear callbacks for built-in status hooks.
+    This prevents carryover of hooks between tests. Carryover won't be encountered in
+    production because workflow processes exit after one run.
+    TODO: Find a better way to isolate hooks to workflow runs.
+    """
+    on_step_start.clear()
+    on_failure.clear()
+    on_cancelled.clear()
+    on_success.clear()
+    on_error.clear()
+    on_terminated.clear()

virtool_workflow/pytest_plugin/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+from pathlib import Path
+import arrow
+import pytest
+import virtool_workflow.runtime.run_subprocess
+from virtool_workflow.pytest_plugin.data import (
+    Data,
+    data,
+)
+@pytest.fixture()
+def run_subprocess() -> virtool_workflow.runtime.run_subprocess.RunSubprocess:
+    return virtool_workflow.runtime.run_subprocess.run_subprocess()
+@pytest.fixture()
+def static_datetime():
+    return arrow.get(2020, 1, 1, 1, 1, 1).naive
+@pytest.fixture()
+def virtool_workflow_example_path() -> Path:
+    """The path to example data files for virtool-workflow."""
+    return Path(__file__).parent.parent.parent / "example"
+__all__ = [
+    "data",
+    "Data",
+    "run_subprocess",
+    "static_datetime",
+    "virtool_workflow_example_path",
+]

virtool_workflow/pytest_plugin/data.py ADDED Viewed

@@ -0,0 +1,197 @@
+import datetime
+from dataclasses import dataclass
+from pathlib import Path
+import pytest
+from pydantic_factories import ModelFactory, Use
+from virtool.analyses.models import Analysis, AnalysisSample
+from virtool.indexes.models import Index, IndexNested
+from virtool.jobs.models import JobAcquired, JobMinimal, JobPing
+from virtool.ml.models import MLModelRelease
+from virtool.references.models import Reference, ReferenceNested
+from virtool.samples.models import Sample
+from virtool.samples.models_base import SampleNested
+from virtool.subtractions.models import Subtraction, SubtractionFile, SubtractionNested
+from virtool_workflow.pytest_plugin.utils import SUBTRACTION_FILENAMES
+@dataclass
+class Data:
+    analysis: Analysis
+    """An analysis being populated in the active workflow."""
+    index: Index
+    """A finalized index to be used for testing analyses."""
+    new_index: Index
+    """An un-finalized index for testing index creation workflows."""
+    job: JobAcquired
+    """A fake job."""
+    ml: MLModelRelease | None
+    """An ML model release used in the active analysis."""
+    reference: Reference
+    """A reference to be used for testing analyses and index creation workflows."""
+    sample: Sample
+    """A finalized sample to be used for testing analyses."""
+    new_sample: Sample
+    """An un-finalized sample for testing sample creation workflows."""
+    subtraction: Subtraction
+    """A finalized subtraction to be used for testing analyses."""
+    new_subtraction: Subtraction
+    """An un-finalized subtraction for testing subtraction creation workflows."""
+@pytest.fixture()
+def data(
+    virtool_workflow_example_path: Path,
+    static_datetime: datetime.datetime,
+) -> Data:
+    class AnalysisFactory(ModelFactory):
+        __model__ = Analysis
+        created_at = Use(lambda: static_datetime)
+        updated_at = Use(lambda: static_datetime)
+    class IndexFactory(ModelFactory[Index]):
+        __model__ = Index
+        created_at = Use(lambda: static_datetime)
+    IndexFactory.seed_random(12)
+    class JobFactory(ModelFactory):
+        __model__ = JobAcquired
+        created_at = Use(lambda: static_datetime)
+        timestamp = Use(lambda: static_datetime)
+    JobFactory.seed_random(55)
+    class MLFactory(ModelFactory):
+        __model__ = MLModelRelease
+        created_at = Use(lambda: static_datetime)
+        published_at = Use(lambda: static_datetime)
+    class ReferenceFactory(ModelFactory):
+        __model__ = Reference
+        created_at = Use(lambda: static_datetime)
+    ReferenceFactory.seed_random(22)
+    class SampleFactory(ModelFactory):
+        __model__ = Sample
+        created_at = Use(lambda: static_datetime)
+        removed_at = None
+        uploaded_at = Use(lambda: static_datetime)
+    SampleFactory.seed_random(5)
+    class SubtractionFactory(ModelFactory):
+        __model__ = Subtraction
+        created_at = Use(lambda: static_datetime)
+    job: JobAcquired = JobFactory.build()
+    job.args = {
+        "item_id": 1211,
+        "resource_id": "foo",
+        "test": True,
+    }
+    job.ping = JobPing(pinged_at=static_datetime)
+    """A finalized sample to be used for testing analyses."""
+    sample = SampleFactory.build()
+    sample.job = JobMinimal.parse_obj(job)
+    sample.artifacts = []
+    # A new sample with the fake job configured as the creation job for the sample.
+    new_sample = SampleFactory.build()
+    new_sample_job = JobFactory.build()
+    new_sample_job.args["files"] = [
+        {
+            "id": 1,
+            "name": "reads_1.fq.gz",
+            "size": 100,
+        },
+        {
+            "id": 2,
+            "name": "reads_2.fq.gz",
+            "size": 100,
+        },
+    ]
+    new_sample_job.args["sample_id"] = new_sample.id
+    new_sample.artifacts = []
+    new_sample.job = JobMinimal.parse_obj(new_sample_job)
+    new_sample.quality = None
+    new_sample.reads = []
+    new_sample.ready = False
+    reference = ReferenceFactory.build()
+    reference.targets = []
+    index = IndexFactory.build()
+    index.reference = ReferenceNested.parse_obj(reference)
+    index.ready = True
+    new_index: Index = IndexFactory.build()
+    new_index.reference = ReferenceNested.parse_obj(reference)
+    new_index.files = []
+    new_index.ready = False
+    ml: MLModelRelease = MLFactory.build()
+    ml.ready = True
+    ml.model.id = 5
+    ml.id = 231
+    subtraction = SubtractionFactory.build()
+    subtraction.files = [
+        SubtractionFile(
+            download_url=f"/subtractions/{subtraction.id}/files/{filename}",
+            id=(i + 1),
+            name=filename,
+            size=(virtool_workflow_example_path / "subtraction" / filename)
+            .stat()
+            .st_size,
+            subtraction=subtraction.id,
+            type="bowtie2",
+        )
+        for i, filename in enumerate(SUBTRACTION_FILENAMES)
+    ]
+    new_subtraction = SubtractionFactory.build()
+    new_subtraction.files = []
+    new_subtraction.ready = False
+    analysis: Analysis = AnalysisFactory.build()
+    analysis.sample = AnalysisSample.parse_obj(sample)
+    analysis.workflow = "pathoscope_bowtie"
+    analysis.index = IndexNested.parse_obj(index)
+    analysis.reference = ReferenceNested.parse_obj(reference)
+    analysis.subtractions = [SubtractionNested.parse_obj(subtraction)]
+    return Data(
+        analysis=analysis,
+        index=index,
+        new_index=new_index,
+        job=job,
+        ml=ml,
+        reference=reference,
+        sample=sample,
+        new_sample=new_sample,
+        subtraction=subtraction,
+        new_subtraction=new_subtraction,
+    )

virtool_workflow/pytest_plugin/utils.py ADDED Viewed

@@ -0,0 +1,9 @@
+SUBTRACTION_FILENAMES = (
+    "subtraction.fa.gz",
+    "subtraction.1.bt2",
+    "subtraction.2.bt2",
+    "subtraction.3.bt2",
+    "subtraction.4.bt2",
+    "subtraction.rev.1.bt2",
+    "subtraction.rev.2.bt2",
+)

virtool_workflow/runtime/__init__.py ADDED Viewed

File without changes

virtool_workflow/runtime/config.py ADDED Viewed

@@ -0,0 +1,21 @@
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass
+class RunConfig:
+    """The configuration for a workflow run."""
+    dev: bool
+    """Whether the workflow should run in development mode."""
+    jobs_api_connection_string: str
+    """The connection string for the jobs API."""
+    mem: int
+    """The memory limit for the workflow run."""
+    proc: int
+    """The number of processors available to the workflow run."""
+    work_path: Path
+    """The path to a directory where the workflow can store temporary files."""

virtool_workflow/runtime/discover.py ADDED Viewed

@@ -0,0 +1,95 @@
+import sys
+from importlib import import_module
+from importlib.util import module_from_spec, spec_from_file_location
+from pathlib import Path
+from types import ModuleType
+from structlog import get_logger
+from virtool_workflow import Workflow
+from virtool_workflow.decorators import collect
+logger = get_logger("runtime")
+def discover_workflow(path: Path) -> Workflow:
+    """Find an instance of :class:`.Workflow` in the python module located at the given
+    path.
+    :param path: The path to a Python module.
+    :return: The first :class:`.Workflow` class in the module.
+    :raises ValueError: No workflow definition found.
+    """
+    module = import_module_from_file(path.name.rstrip(path.suffix), path)
+    try:
+        return next(
+            attr for attr in module.__dict__.values() if isinstance(attr, Workflow)
+        )
+    except StopIteration:
+        return collect(module)
+def load_builtin_fixtures():
+    """Load built-in fixtures.
+    This function is called before any fixtures defined in a workflow's
+    ``fixtures.py`` file. It is used to provide built-in fixtures that are
+    required for the workflow to run.
+    """
+    import_module("virtool_workflow.data")
+    import_module("virtool_workflow.analysis.fastqc")
+    import_module("virtool_workflow.analysis.skewer")
+    import_module("virtool_workflow.runtime.run_subprocess")
+def load_custom_fixtures():
+    """Load fixtures defined by the workflow author in ``fixtures.py``."""
+    logger.info("importing fixtures.py")
+    fixtures_path = Path("./fixtures.py")
+    try:
+        import_module_from_file(fixtures_path.name.rstrip(".py"), fixtures_path)
+    except FileNotFoundError:
+        logger.info("could not find fixtures.py")
+def load_workflow_from_file() -> Workflow:
+    """Load a workflow from a Python file at ``./workflow.py`` and return a :class:`.Workflow` object.
+    :raises FileNotFoundError: If no workflow.py file is found.
+    :return: The workflow.
+    """
+    logger.info("importing workflow.py")
+    try:
+        return discover_workflow(Path("./workflow.py"))
+    except FileNotFoundError:
+        logger.critical("could not find workflow.py")
+        sys.exit(1)
+def import_module_from_file(module_name: str, path: Path) -> ModuleType:
+    """Import a module from a file.
+    The parent directory of `path` will also be added to `sys.path` prior to importing.
+    This ensures that modules and packages defined in that directory can be properly
+    imported.
+    :param module_name: The module's name.
+    :param path: The module's path.
+    :returns: The loaded module.
+    """
+    module_parent = str(path.parent)
+    sys.path.append(module_parent)
+    spec = spec_from_file_location(module_name, path)
+    if spec is None:
+        raise ImportError(f"could not import {path}")
+    module = spec.loader.load_module(module_from_spec(spec).__name__)
+    sys.path.remove(module_parent)
+    return module

virtool_workflow/runtime/events.py ADDED Viewed

@@ -0,0 +1,7 @@
+import asyncio
+class Events:
+    def __init__(self):
+        self.cancelled = asyncio.Event()
+        self.terminated = asyncio.Event()

virtool_workflow/runtime/hook.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""The :class:`Hook` class is used to hook into the workflow lifecycle."""
+from __future__ import annotations
+from asyncio import gather
+from typing import Any, Callable
+from pyfixtures import FixtureScope
+from structlog import get_logger
+from virtool_workflow.utils import coerce_to_coroutine_function
+logger = get_logger("hooks")
+class Hook:
+    """Used to hook into the workflow lifecycle."""
+    def __init__(self, hook_name: str):
+        """A set of functions to be called as a group upon a particular event.
+        The signature of any functions added via :func:`Hook.callback` or
+        :func:`Hook.__call__` are validated to match the types provided.
+        :param hook_name: The name of this hook.
+        """
+        self.name = hook_name
+        self.callbacks = []
+        self.clear = self.callbacks.clear
+    def __call__(self, callback_: Callable = None, until=None, once=False):
+        """Add a callback function to this Hook that will be called when the hook is
+        triggered.
+        :param callback_: The callback function to register.
+        :param until: Don't call the callback after the passed hook has been triggered.
+        :param once: Only execute the callback the next time this hook is triggered.
+        :return: The passed callback function as a coroutine.
+        """
+        if once:
+            until = self
+        if callback_ and not until:
+            cb = self._callback(callback_)
+        elif callback_ and until:
+            cb = self._callback_until(until)(callback_)
+        elif until:
+            cb = self._callback_until(until)
+        else:
+            cb = self._callback
+        return cb
+    def _callback(self, callback_: Callable):
+        """Register a callback function, skipping parameter validation"""
+        callback_ = coerce_to_coroutine_function(callback_)
+        self.callbacks.append(callback_)
+        return callback_
+    def _callback_until(self, hook_: Hook):
+        """Add a callback to this hook and remove it when :func:`hook_` is triggered."""
+        def _temporary_callback(callback_):
+            callback_ = self._callback(callback_)
+            @hook_._callback
+            def remove_callback():
+                self.callbacks.remove(callback_)
+                hook_.callbacks.remove(remove_callback)
+            return callback_
+        return _temporary_callback
+    async def trigger(self, scope: FixtureScope, suppress=False, **kwargs) -> list[Any]:
+        """Trigger the hook.
+        Bind fixtures from `scope` to each callback function and invoke them.
+        Each callback function registered by :func:`Hook.callback` or
+        :func:`Hook.__call__` will be called using the arguments supplied to this
+        function.
+        :param scope: the :class:`FixtureScope` to use to bind fixtures
+        :param suppress: suppress and log exceptions raised in callbacks
+        """
+        logger.info("triggering hook", hook=self.name)
+        if "scope" not in scope:
+            scope["scope"] = scope
+        async def _bind(callback_: Callable):
+            try:
+                return await scope.bind(callback_, **kwargs)
+            except KeyError as error:
+                if suppress:
+                    logger.exception(error)
+                    return lambda: None
+                raise error
+        _callbacks = [await _bind(callback) for callback in self.callbacks]
+        return await self._trigger(_callbacks, suppress_errors=suppress)
+    @staticmethod
+    async def _trigger(callbacks, *args, suppress_errors=False, **kwargs):
+        async def call_callback(callback):
+            if suppress_errors:
+                try:
+                    return await callback(*args, **kwargs)
+                except Exception:
+                    logger.exception(
+                        "encountered exception in hook callback",
+                        callback=callback.__name__,
+                        hook=callback.hook.name,
+                    )
+            else:
+                return await callback(*args, **kwargs)
+        results = await gather(
+            *[call_callback(callback) for callback in callbacks], return_exceptions=True,
+        )
+        for error in results:
+            if isinstance(error, Exception):
+                raise error
+        return results

virtool_workflow/runtime/path.py ADDED Viewed

@@ -0,0 +1,19 @@
+import asyncio
+from contextlib import asynccontextmanager
+from pathlib import Path
+from shutil import rmtree
+from virtool_workflow.runtime.config import RunConfig
+@asynccontextmanager
+async def create_work_path(config: RunConfig) -> Path:
+    """A temporary working directory where all workflow files should be written."""
+    path = Path(config.work_path).absolute()
+    await asyncio.to_thread(rmtree, path, ignore_errors=True)
+    await asyncio.to_thread(path.mkdir, exist_ok=True, parents=True)
+    yield path
+    await asyncio.to_thread(rmtree, path)