virtool-workflow 7.1.5__py3-none-any.whl → 7.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,15 +3,18 @@ from pathlib import Path
3
3
 
4
4
  import aiofiles
5
5
  from aiohttp import BasicAuth, ClientSession
6
+ from structlog import get_logger
6
7
 
7
8
  from virtool_workflow.api.utils import (
9
+ API_CHUNK_SIZE,
8
10
  decode_json_response,
9
11
  raise_exception_by_status_code,
12
+ retry,
10
13
  )
11
14
  from virtool_workflow.errors import JobsAPIError
12
15
  from virtool_workflow.files import VirtoolFileFormat
13
16
 
14
- CHUNK_SIZE = 1024 * 1024 * 2
17
+ logger = get_logger("http")
15
18
 
16
19
 
17
20
  class APIClient:
@@ -19,26 +22,29 @@ class APIClient:
19
22
  self.http = http
20
23
  self.jobs_api_connection_string = jobs_api_connection_string
21
24
 
25
+ @retry
22
26
  async def get_json(self, path: str) -> dict:
23
27
  """Get the JSON response from the provided API ``path``."""
24
28
  async with self.http.get(f"{self.jobs_api_connection_string}{path}") as resp:
25
29
  await raise_exception_by_status_code(resp)
26
30
  return await decode_json_response(resp)
27
31
 
32
+ @retry
28
33
  async def get_file(self, path: str, target_path: Path):
29
- """Download the file at URL ``path`` to the local filesystem path ``target_path``.
30
- """
34
+ """Download the file at URL ``path`` to the local ``target_path``."""
31
35
  async with self.http.get(f"{self.jobs_api_connection_string}{path}") as resp:
32
36
  if resp.status != 200:
33
37
  raise JobsAPIError(
34
38
  f"Encountered {resp.status} while downloading '{path}'",
35
39
  )
40
+
36
41
  async with aiofiles.open(target_path, "wb") as f:
37
- async for chunk in resp.content.iter_chunked(CHUNK_SIZE):
42
+ async for chunk in resp.content.iter_chunked(API_CHUNK_SIZE):
38
43
  await f.write(chunk)
39
44
 
40
45
  return target_path
41
46
 
47
+ @retry
42
48
  async def patch_json(self, path: str, data: dict) -> dict:
43
49
  """Make a patch request against the provided API ``path`` and return the response
44
50
  as a dictionary of decoded JSON.
@@ -48,11 +54,13 @@ class APIClient:
48
54
  :return: the response as a dictionary of decoded JSON
49
55
  """
50
56
  async with self.http.patch(
51
- f"{self.jobs_api_connection_string}{path}", json=data,
57
+ f"{self.jobs_api_connection_string}{path}",
58
+ json=data,
52
59
  ) as resp:
53
60
  await raise_exception_by_status_code(resp)
54
61
  return await decode_json_response(resp)
55
62
 
63
+ @retry
56
64
  async def post_file(
57
65
  self,
58
66
  path: str,
@@ -73,13 +81,16 @@ class APIClient:
73
81
  ) as response:
74
82
  await raise_exception_by_status_code(response)
75
83
 
84
+ @retry
76
85
  async def post_json(self, path: str, data: dict) -> dict:
77
86
  async with self.http.post(
78
- f"{self.jobs_api_connection_string}{path}", json=data,
87
+ f"{self.jobs_api_connection_string}{path}",
88
+ json=data,
79
89
  ) as resp:
80
90
  await raise_exception_by_status_code(resp)
81
91
  return await decode_json_response(resp)
82
92
 
93
+ @retry
83
94
  async def put_file(
84
95
  self,
85
96
  path: str,
@@ -100,13 +111,16 @@ class APIClient:
100
111
  ) as response:
101
112
  await raise_exception_by_status_code(response)
102
113
 
114
+ @retry
103
115
  async def put_json(self, path: str, data: dict) -> dict:
104
116
  async with self.http.put(
105
- f"{self.jobs_api_connection_string}{path}", json=data,
117
+ f"{self.jobs_api_connection_string}{path}",
118
+ json=data,
106
119
  ) as resp:
107
120
  await raise_exception_by_status_code(resp)
108
121
  return await decode_json_response(resp)
109
122
 
123
+ @retry
110
124
  async def delete(self, path: str) -> dict | None:
111
125
  """Make a delete request against the provided API ``path``."""
112
126
  async with self.http.delete(f"{self.jobs_api_connection_string}{path}") as resp:
@@ -124,8 +138,7 @@ async def api_client(
124
138
  job_id: str,
125
139
  key: str,
126
140
  ):
127
- """An authenticated :class:``APIClient`` to make requests against the jobs API.
128
- """
141
+ """An authenticated :class:``APIClient`` to make requests against the jobs API."""
129
142
  async with ClientSession(
130
143
  auth=BasicAuth(login=f"job-{job_id}", password=key),
131
144
  ) as http:
@@ -1,11 +1,10 @@
1
1
  import asyncio
2
- import functools
2
+ from functools import wraps
3
3
 
4
4
  from aiohttp import (
5
- ClientConnectorError,
5
+ ClientError,
6
6
  ClientResponse,
7
7
  ContentTypeError,
8
- ServerDisconnectedError,
9
8
  )
10
9
  from structlog import get_logger
11
10
 
@@ -19,40 +18,76 @@ from virtool_workflow.errors import (
19
18
 
20
19
  logger = get_logger("api")
21
20
 
21
+ API_CHUNK_SIZE = 1024 * 1024 * 2
22
+ """The size of chunks to use when downloading files from the API in bytes."""
22
23
 
23
- def retry(func):
24
- """Retry an API call five times when encountering the following exceptions:
25
- * ``ConnectionRefusedError``.
26
- * ``ClientConnectorError``.
27
- * ``ServerDisconnectedError``.
24
+ API_MAX_RETRIES = 5
25
+ """The maximum number of retries for API requests."""
28
26
 
29
- These are probably due to transient issues in the cluster network.
27
+ API_RETRY_BASE_DELAY = 5.0
28
+ """The base delay in seconds between retries for API requests."""
30
29
 
30
+
31
+ def retry(
32
+ func=None,
33
+ *,
34
+ max_retries: int = API_MAX_RETRIES,
35
+ base_delay: float = API_RETRY_BASE_DELAY,
36
+ ):
37
+ """Retry the decorated function on connection errors.
38
+
39
+ :param func: The function to decorate (when used without parentheses)
40
+ :param max_retries: Maximum number of retry attempts before giving up (default: 5)
41
+ :param base_delay: Base delay in seconds between retries (default: 5.0)
31
42
  """
32
43
 
33
- @functools.wraps(func)
34
- async def wrapped(*args, **kwargs):
35
- attempts = 0
44
+ def decorator(f):
45
+ @wraps(f)
46
+ async def wrapper(*args, **kwargs):
47
+ last_exception = None
48
+
49
+ log = logger.bind(func_name=f.__name__, max_retries=max_retries)
50
+
51
+ for attempt in range(max_retries + 1):
52
+ try:
53
+ return await f(*args, **kwargs)
54
+ except (
55
+ ClientError,
56
+ ConnectionError,
57
+ ) as e:
58
+ last_exception = e
59
+
60
+ if attempt == max_retries:
61
+ log.warning(
62
+ "max retries reached for function",
63
+ exception=str(e),
64
+ )
65
+ raise
36
66
 
37
- try:
38
- return await func(*args, **kwargs)
39
- except (
40
- ConnectionRefusedError,
41
- ClientConnectorError,
42
- ServerDisconnectedError,
43
- ) as err:
44
- if attempts == 5:
45
- raise
67
+ # Use exponential backoff if base_delay != 5.0, otherwise use
68
+ # fixed delay.
69
+ if base_delay == API_RETRY_BASE_DELAY:
70
+ delay = base_delay
71
+ else:
72
+ delay = base_delay * (2**attempt)
46
73
 
47
- attempts += 1
48
- get_logger("runtime").info(
49
- f"Encountered {type(err).__name__}. Retrying in 5 seconds.",
50
- )
51
- await asyncio.sleep(5)
74
+ log.info(
75
+ "retrying after connection error",
76
+ exception=str(e),
77
+ retries=attempt,
78
+ retrying_in=delay,
79
+ )
52
80
 
53
- return await func(*args, **kwargs)
81
+ await asyncio.sleep(delay)
54
82
 
55
- return wrapped
83
+ raise last_exception
84
+
85
+ return wrapper
86
+
87
+ if func is None:
88
+ return decorator
89
+
90
+ return decorator(func)
56
91
 
57
92
 
58
93
  async def decode_json_response(resp: ClientResponse) -> dict | list | None:
@@ -103,7 +138,6 @@ async def raise_exception_by_status_code(resp: ClientResponse):
103
138
 
104
139
  if resp.status in status_exception_map:
105
140
  raise status_exception_map[resp.status](message)
106
- else:
107
- raise ValueError(
108
- f"Status code {resp.status} not handled for response\n {resp}",
109
- )
141
+ raise ValueError(
142
+ f"Status code {resp.status} not handled for response\n {resp}",
143
+ )
@@ -7,7 +7,6 @@ from functools import cached_property
7
7
  from pathlib import Path
8
8
  from shutil import which
9
9
 
10
- import aiofiles
11
10
  from pyfixtures import fixture
12
11
  from virtool.hmm.models import HMM
13
12
  from virtool.utils import decompress_file
@@ -84,25 +83,22 @@ async def hmms(
84
83
  hmms_path = work_path / "hmms"
85
84
  await asyncio.to_thread(hmms_path.mkdir, parents=True, exist_ok=True)
86
85
 
86
+ annotations_path = hmms_path / "annotations.json"
87
87
  compressed_annotations_path = hmms_path / "annotations.json.gz"
88
88
  await _api.get_file("/hmms/files/annotations.json.gz", compressed_annotations_path)
89
-
90
- annotations_path = hmms_path / "annotations.json"
91
89
  await asyncio.to_thread(
92
90
  decompress_file,
93
91
  compressed_annotations_path,
94
92
  annotations_path,
95
93
  proc,
96
94
  )
95
+ annotations = await asyncio.to_thread(
96
+ lambda: [HMM(**hmm) for hmm in json.loads(annotations_path.read_text())],
97
+ )
97
98
 
98
99
  profiles_path = hmms_path / "profiles.hmm"
99
100
  await _api.get_file("/hmms/files/profiles.hmm", profiles_path)
100
-
101
- async with aiofiles.open(annotations_path) as f:
102
- annotations = [HMM(**hmm) for hmm in json.loads(await f.read())]
103
-
104
101
  p = await run_subprocess(["hmmpress", str(profiles_path)])
105
-
106
102
  if p.returncode != 0:
107
103
  raise RuntimeError("hmmpress command failed")
108
104
 
@@ -3,7 +3,6 @@ import json
3
3
  from dataclasses import dataclass
4
4
  from pathlib import Path
5
5
 
6
- import aiofiles
7
6
  from pyfixtures import fixture
8
7
  from structlog import get_logger
9
8
  from virtool.analyses.models import Analysis
@@ -245,8 +244,7 @@ async def index(
245
244
 
246
245
  log.info("decompressed reference otus json")
247
246
 
248
- async with aiofiles.open(json_path) as f:
249
- data = json.loads(await f.read())
247
+ data = await asyncio.to_thread(lambda: json.loads(json_path.read_text()))
250
248
 
251
249
  sequence_lengths = {}
252
250
  sequence_otu_map = {}
@@ -88,7 +88,11 @@ def import_module_from_file(module_name: str, path: Path) -> ModuleType:
88
88
  spec = spec_from_file_location(module_name, path)
89
89
  if spec is None:
90
90
  raise ImportError(f"could not import {path}")
91
- module = spec.loader.load_module(module_from_spec(spec).__name__)
91
+
92
+ module = module_from_spec(spec)
93
+ if spec.loader is None:
94
+ raise ImportError(f"could not load {path}")
95
+ spec.loader.exec_module(module)
92
96
 
93
97
  sys.path.remove(module_parent)
94
98
 
@@ -38,7 +38,7 @@ from virtool_workflow.runtime.redis import (
38
38
  get_next_job_with_timeout,
39
39
  wait_for_cancellation,
40
40
  )
41
- from virtool_workflow.runtime.sentry import configure_sentry
41
+ from virtool_workflow.runtime.sentry import configure_sentry, set_workflow_context
42
42
  from virtool_workflow.utils import configure_logs, get_virtool_workflow_version
43
43
  from virtool_workflow.workflow import Workflow
44
44
 
@@ -182,6 +182,9 @@ async def run_workflow(
182
182
  scope["proc"] = config.proc
183
183
  scope["results"] = {}
184
184
 
185
+ # Set Sentry context with workflow metadata
186
+ set_workflow_context(job.workflow, job.id)
187
+
185
188
  async with create_work_path(config) as work_path:
186
189
  scope["work_path"] = work_path
187
190
 
@@ -9,7 +9,7 @@ from virtool_workflow.utils import get_virtool_workflow_version
9
9
  logger = get_logger("runtime")
10
10
 
11
11
 
12
- def configure_sentry(dsn: str):
12
+ def configure_sentry(dsn: str | None) -> None:
13
13
  """Initialize Sentry for log aggregation."""
14
14
  if dsn:
15
15
  logger.info("initializing sentry", dsn=f"{dsn[:15]}...")
@@ -26,3 +26,25 @@ def configure_sentry(dsn: str):
26
26
  )
27
27
  else:
28
28
  logger.info("sentry disabled because no dsn was provided")
29
+
30
+
31
+ def set_workflow_context(
32
+ workflow_name: str,
33
+ job_id: str,
34
+ ):
35
+ """Set workflow context for Sentry reporting."""
36
+ try:
37
+ with open("VERSION") as f:
38
+ workflow_version = f.read().strip()
39
+ except FileNotFoundError:
40
+ workflow_version = "UNKNOWN"
41
+
42
+ sentry_sdk.set_context(
43
+ "workflow",
44
+ {
45
+ "workflow_name": workflow_name,
46
+ "workflow_version": workflow_version,
47
+ "virtool_workflow_version": get_virtool_workflow_version(),
48
+ "job_id": job_id,
49
+ },
50
+ )
virtool_workflow/utils.py CHANGED
@@ -82,7 +82,7 @@ async def make_directory(path: Path):
82
82
 
83
83
  def untar(path: Path, target_path: Path):
84
84
  with tarfile.open(path, "r:gz") as tar:
85
- tar.extractall(target_path)
85
+ tar.extractall(target_path, filter="data")
86
86
 
87
87
 
88
88
  def move_all_model_files(source_path: Path, target_path: Path):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: virtool-workflow
3
- Version: 7.1.5
3
+ Version: 7.2.1
4
4
  Summary: A framework for developing bioinformatics workflows for Virtool.
5
5
  License: MIT
6
6
  Author: Ian Boyes
@@ -8,10 +8,10 @@ Maintainer: Ian Boyes
8
8
  Requires-Python: >=3.12.3,<3.13.0
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
- Requires-Dist: aiofiles (>=0.7.0,<0.8.0)
11
+ Requires-Dist: aiofiles (>=24.1.0,<25.0.0)
12
12
  Requires-Dist: aiohttp (>=3.8.1,<4.0.0)
13
13
  Requires-Dist: biopython (>=1.81,<2.0)
14
- Requires-Dist: click (>=8.1.7,<9.0.0)
14
+ Requires-Dist: click (>=8.2.1,<9.0.0)
15
15
  Requires-Dist: orjson (>=3.9.9,<4.0.0)
16
16
  Requires-Dist: pydantic-factories (>=1.17.3,<2.0.0)
17
17
  Requires-Dist: pyfixtures (>=1.0.0,<2.0.0)
@@ -6,13 +6,13 @@ virtool_workflow/analysis/trimming.py,sha256=3Dk0J322ZhBzhuplaVgxZv4l7MLu7ZhqNQH
6
6
  virtool_workflow/analysis/utils.py,sha256=YU1_yInZzTNl9nKQTebUz47kUEqZ__d0k-RMLX8DWOA,1108
7
7
  virtool_workflow/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  virtool_workflow/api/acquire.py,sha256=M8Wf6ck6YOPQU_0dalAYxuQbukj6c4Uat1OPhV7Gj2w,2157
9
- virtool_workflow/api/client.py,sha256=wOAAX0Na1IrULWUTGq0wzbjquZqmZqCnc0gb2A2JvLE,4604
10
- virtool_workflow/api/utils.py,sha256=UrOsrGMMJ6PbWcl84A4tKD6Lq7C8hVQIZ_TpQR75KHU,3123
9
+ virtool_workflow/api/client.py,sha256=fXCriMv0s2L5O78fn8uIBWWPtxtPz0tUk1XHb1L7RGU,4766
10
+ virtool_workflow/api/utils.py,sha256=Wzf1YseJKUBnUk0Ge8DMwgigWMS7Dn-VFNLlBFo6SW4,4341
11
11
  virtool_workflow/cli.py,sha256=yEl1LziABKbjc5MCOoUGLy-iuqehOou37x9ox4LA92M,1441
12
12
  virtool_workflow/data/__init__.py,sha256=L2bQ5rMpnCMuLWDMwel4-wPchfQCTHH3EEwxDqwGOsY,610
13
13
  virtool_workflow/data/analyses.py,sha256=tGUU3gLyk_4vn1Xb7FccvBc_HQzYW_AoU7Dh1OkWhAA,3105
14
- virtool_workflow/data/hmms.py,sha256=pntKYyWeipApMNUGYh90ReyTbJgbCPlE-cm66Eo6teA,3225
15
- virtool_workflow/data/indexes.py,sha256=hxqxJBTt2VnclR8YjqAAEtk_Ig7OEPBx0sjxf4Puy9Y,9007
14
+ virtool_workflow/data/hmms.py,sha256=qHtkQO0bF6V7GGVoGDpdhXXR9RxaetWiTL3of6Pd88Y,3210
15
+ virtool_workflow/data/indexes.py,sha256=vmrqZjndrTV9tUfVjudIzPa0clJHy1kWrxzRQH81Ess,8981
16
16
  virtool_workflow/data/jobs.py,sha256=YYWxWoiVtHIBKkpOTpZl-Ute5rg-RAnMGAzBoixDNQo,1663
17
17
  virtool_workflow/data/ml.py,sha256=haYHZfDbYOk0ftg6MCXbQpoDFl1VbdnQ_TuNvI9pmw0,2186
18
18
  virtool_workflow/data/samples.py,sha256=fgyuQSavZiqwrqEH7Y_CoAR5gJIcO1ujrIyO5tDrXeI,4971
@@ -27,19 +27,19 @@ virtool_workflow/pytest_plugin/data.py,sha256=objP7cn_4u2MtsDaf0amG0EtBv3-jthj9l
27
27
  virtool_workflow/pytest_plugin/utils.py,sha256=lxyWqHKWXGxQXBIbcYUCC6PvjSeruhSDnD-dPX1mL5Y,211
28
28
  virtool_workflow/runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  virtool_workflow/runtime/config.py,sha256=fHkprWxxqWeEWKOobxpVvSOS5Njk5Mq32sH-p5o_d8g,546
30
- virtool_workflow/runtime/discover.py,sha256=DsXII4f1cO0JT-csardzN-PfcuJ09SAFBgfDydZikFo,2933
30
+ virtool_workflow/runtime/discover.py,sha256=-ki36Z4tV0uH_HDP4iD94KT0-JIZ8kHGciuH6wxYDdg,3016
31
31
  virtool_workflow/runtime/events.py,sha256=ZIX3veBfC_2yNTdClHJaYMPn4KAF9JZqpQ3qDq4ox_E,138
32
32
  virtool_workflow/runtime/hook.py,sha256=ZfmvDHoSE7Qwk4Rpcjyd-pYuwP55mzEddec2ngRZpsg,4328
33
33
  virtool_workflow/runtime/path.py,sha256=J8CsNMTg4XgDtib0gVSsLNvv17q793M-ydsxN6pkHrI,562
34
34
  virtool_workflow/runtime/ping.py,sha256=Xm4udRCcldHfSV1Rjpiqr-6vE7G6kaVaZgDlzNSxdao,1434
35
35
  virtool_workflow/runtime/redis.py,sha256=m-Dtdpbho-Qa9W5IYJCeEEZ7vv04hYu5yALsxOJr0FY,1813
36
- virtool_workflow/runtime/run.py,sha256=l0HguLLdYRVKvYTnIjoUB-9h6oDxffK-xzZYqO9lSIM,7599
36
+ virtool_workflow/runtime/run.py,sha256=pkfNyjaDBsVHrEGqmz5Ah5FkHJN4SkCHKkEmfDO1QRU,7725
37
37
  virtool_workflow/runtime/run_subprocess.py,sha256=OFHVQ2ao16X8I9nl6Nm_f3IL7rHnmLH8ixtlAFKlOLk,4790
38
- virtool_workflow/runtime/sentry.py,sha256=y4mh1sT-hoijRtKr4meE7yn1E-alxRKDylzGxmN6ndU,755
39
- virtool_workflow/utils.py,sha256=jLsOtwh3RP7BCdgcr_rQr3DqzK1nLP2NILFXxbgAyGk,2530
38
+ virtool_workflow/runtime/sentry.py,sha256=WJ9CI2fshJD5As6s3umUtJrUq4Z-hQRrSxAgbjceXrk,1326
39
+ virtool_workflow/utils.py,sha256=9rHSlcvZsV2qE1Wo-cUDqESIATswr1zR_P8V-wP6jdk,2545
40
40
  virtool_workflow/workflow.py,sha256=W8IEFzd28wjdNNlqRrPDKyX9LeQpR6Vxy7zKeYEMQEc,2655
41
- virtool_workflow-7.1.5.dist-info/LICENSE,sha256=nkoVQw9W4aoQM9zgtNzHDmztap5TuXZ1L2-87vNr3w8,1097
42
- virtool_workflow-7.1.5.dist-info/METADATA,sha256=2noOApATUCmR9G0i7FCgHWjzATEaC1KkV_90p9Aso_A,1757
43
- virtool_workflow-7.1.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- virtool_workflow-7.1.5.dist-info/entry_points.txt,sha256=d4MA8ZDTJOU0jKZ3ymtHZbfLVoRPgItEIN5U4uIqay8,62
45
- virtool_workflow-7.1.5.dist-info/RECORD,,
41
+ virtool_workflow-7.2.1.dist-info/LICENSE,sha256=nkoVQw9W4aoQM9zgtNzHDmztap5TuXZ1L2-87vNr3w8,1097
42
+ virtool_workflow-7.2.1.dist-info/METADATA,sha256=NoJWREvu-JpIz3LcAUZscYbXA-srOf--v9X94A3EygI,1759
43
+ virtool_workflow-7.2.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ virtool_workflow-7.2.1.dist-info/entry_points.txt,sha256=d4MA8ZDTJOU0jKZ3ymtHZbfLVoRPgItEIN5U4uIqay8,62
45
+ virtool_workflow-7.2.1.dist-info/RECORD,,