tonik 0.1.20__tar.gz → 0.1.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {tonik-0.1.20 → tonik-0.1.22}/.devcontainer/devcontainer.json +1 -1
  2. {tonik-0.1.20 → tonik-0.1.22}/PKG-INFO +22 -22
  3. tonik-0.1.20/pyproject.toml~ → tonik-0.1.22/pyproject.toml +30 -26
  4. tonik-0.1.22/src/tonik/ingest.py +166 -0
  5. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/storage.py +83 -67
  6. tonik-0.1.22/src/tonik/xarray2zarr.py +321 -0
  7. {tonik-0.1.20 → tonik-0.1.22}/tests/conftest.py +11 -8
  8. tonik-0.1.22/tests/test_netcdf.py +191 -0
  9. {tonik-0.1.20 → tonik-0.1.22}/tests/test_storage.py +95 -7
  10. tonik-0.1.20/tests/test_save.py → tonik-0.1.22/tests/test_zarr.py +99 -185
  11. tonik-0.1.20/.gitattributes +0 -2
  12. tonik-0.1.20/pixi.lock +0 -4050
  13. tonik-0.1.20/pyproject.toml +0 -95
  14. tonik-0.1.20/src/tonik/xarray2zarr.py +0 -310
  15. {tonik-0.1.20 → tonik-0.1.22}/.gitignore +0 -0
  16. {tonik-0.1.20 → tonik-0.1.22}/HOW_TO_RELEASE.md +0 -0
  17. {tonik-0.1.20 → tonik-0.1.22}/LICENSE +0 -0
  18. {tonik-0.1.20 → tonik-0.1.22}/README.md +0 -0
  19. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/Dockerfile_api +0 -0
  20. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/Dockerfile_grafana +0 -0
  21. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/dashboards/demo_dashboard.json +0 -0
  22. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/docker-compose.yml +0 -0
  23. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/grafana.ini +0 -0
  24. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/provisioning/dashboards/default.yaml +0 -0
  25. {tonik-0.1.20 → tonik-0.1.22}/grafana_example/provisioning/datasources/default.yaml +0 -0
  26. {tonik-0.1.20 → tonik-0.1.22}/mkdocs.yml +0 -0
  27. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/__init__.py +0 -0
  28. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/api.py +0 -0
  29. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/grafana_annotations.py +0 -0
  30. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/package_data/index.html +0 -0
  31. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/package_data/whakaari_labels.json +0 -0
  32. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/utils.py +0 -0
  33. {tonik-0.1.20 → tonik-0.1.22}/src/tonik/xarray2netcdf.py +0 -0
  34. {tonik-0.1.20 → tonik-0.1.22}/tests/backend_speed_test.py +0 -0
  35. {tonik-0.1.20 → tonik-0.1.22}/tests/test_api.py +0 -0
  36. {tonik-0.1.20 → tonik-0.1.22}/tests/test_utils.py +0 -0
@@ -14,7 +14,7 @@
14
14
  // "appPort": ["8003:8003"],
15
15
 
16
16
  // Use 'postCreateCommand' to run commands after the container is created.
17
- "postCreateCommand": "pip3 install -e . && pip3 install httpx pytest ipykernel hatch",
17
+ "postCreateCommand": "pip3 install -e '.[dev]'",
18
18
 
19
19
  // Configure tool-specific properties.
20
20
  "customizations": {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tonik
3
- Version: 0.1.20
3
+ Version: 0.1.22
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -9,29 +9,29 @@ License-File: LICENSE
9
9
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
- Requires-Python: >=3.9
13
- Requires-Dist: datashader>=0.14
14
- Requires-Dist: fastapi>=0.112
15
- Requires-Dist: h5netcdf>=1.1
16
- Requires-Dist: h5py>=3.8
17
- Requires-Dist: matplotlib
18
- Requires-Dist: netcdf4>=1.6
19
- Requires-Dist: pandas>=2.0
20
- Requires-Dist: python-json-logger>=2.0
21
- Requires-Dist: s3fs
22
- Requires-Dist: uvicorn[standard]>=0.22
23
- Requires-Dist: xarray[accel,io,parallel]
24
- Requires-Dist: zarr
12
+ Requires-Python: >=3.10
13
+ Requires-Dist: datashader<0.19,>=0.18.2
14
+ Requires-Dist: fastapi<0.129,>=0.128.0
15
+ Requires-Dist: h5netcdf<2,>=1.7.3
16
+ Requires-Dist: h5py<4,>=3.15.1
17
+ Requires-Dist: matplotlib<4,>=3.10.8
18
+ Requires-Dist: pandas<3,>=2.3.3
19
+ Requires-Dist: s3fs<2026,>=2025.12.0
20
+ Requires-Dist: uvicorn[standard]<0.41,>=0.40.0
21
+ Requires-Dist: xarray[accel,io,parallel]<2026,>=2025.6.1
22
+ Requires-Dist: zarr<4,>=3.1.5
25
23
  Provides-Extra: dev
26
- Requires-Dist: build; extra == 'dev'
27
- Requires-Dist: httpx; extra == 'dev'
28
- Requires-Dist: ipykernel; extra == 'dev'
29
- Requires-Dist: mkdocs; extra == 'dev'
30
- Requires-Dist: mkdocs-jupyter; extra == 'dev'
31
- Requires-Dist: mkdocstrings[python]; extra == 'dev'
24
+ Requires-Dist: build<2,>=1.4.0; extra == 'dev'
25
+ Requires-Dist: hatch<2,>=1.16.2; extra == 'dev'
26
+ Requires-Dist: httpx<0.29,>=0.28.1; extra == 'dev'
27
+ Requires-Dist: ipykernel<7,>=6.31.0; extra == 'dev'
28
+ Requires-Dist: mkdocs-jupyter<0.26,>=0.25.1; extra == 'dev'
29
+ Requires-Dist: mkdocs<2,>=1.6.1; extra == 'dev'
30
+ Requires-Dist: mkdocstrings[python]<2,>=1.0.0; extra == 'dev'
31
+ Requires-Dist: moto[s3]<6,>=5.1.19; extra == 'dev'
32
32
  Requires-Dist: pytest; extra == 'dev'
33
- Requires-Dist: twine; extra == 'dev'
34
- Requires-Dist: zarr[remote-tests]; extra == 'dev'
33
+ Requires-Dist: twine<7,>=6.2.0; extra == 'dev'
34
+ Requires-Dist: zarr[remote-tests]<4,>=3.1.5; extra == 'dev'
35
35
  Description-Content-Type: text/markdown
36
36
 
37
37
  # Tonik
@@ -12,7 +12,7 @@ exclude = [
12
12
 
13
13
  [project]
14
14
  name = "tonik"
15
- version = "0.1.16"
15
+ version = "0.1.22"
16
16
  authors = [
17
17
  { name="Yannik Behr", email="y.behr@gns.cri.nz" },
18
18
  { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -20,35 +20,24 @@ authors = [
20
20
 
21
21
  description = "Store time series data as HDF5 files and access them through an API."
22
22
  readme = "README.md"
23
- requires-python = ">=3.9"
23
+ requires-python = ">=3.10"
24
24
  classifiers = [
25
25
  "Programming Language :: Python :: 3",
26
26
  "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
27
27
  "Operating System :: OS Independent",
28
28
  ]
29
- dependencies = [
30
- "h5py>=3.8",
31
- "datashader>=0.14",
32
- "xarray[io,accel,parallel]",
33
- "pandas>=2.0",
34
- "netcdf4>=1.6",
35
- "h5netcdf>=1.1",
36
- "python-json-logger>=2.0",
37
- "uvicorn[standard]>=0.22",
38
- "fastapi>=0.112",
39
- "matplotlib",
40
- "zarr[remote_tests]>=3.0.3; python_version >= '3.11'",
41
- "zarr[remote_tests]<3; python_version < '3.11'",
42
- "s3fs"
43
- ]
44
29
 
45
- [project.optional-dependencies]
46
- dev = ["pytest",
47
- "httpx",
48
- "ipykernel",
49
- "mkdocs",
50
- "mkdocstrings[python]",
51
- "mkdocs-jupyter"]
30
+ dependencies = [
31
+ "xarray[io,accel,parallel]>=2025.6.1,<2026",
32
+ "datashader>=0.18.2,<0.19",
33
+ "h5py>=3.15.1,<4",
34
+ "pandas>=2.3.3,<3",
35
+ "h5netcdf>=1.7.3,<2",
36
+ "uvicorn[standard]>=0.40.0,<0.41",
37
+ "fastapi>=0.128.0,<0.129",
38
+ "matplotlib>=3.10.8,<4",
39
+ "zarr>=3.1.5,<4",
40
+ "s3fs>=2025.12.0,<2026"]
52
41
 
53
42
  [project.urls]
54
43
  Homepage = "https://tsc-tools.github.io/tonik"
@@ -59,8 +48,23 @@ tonik_api = "tonik.api:main"
59
48
  test_data = "tonik.utils:main"
60
49
  grafana_annotations = "tonik.grafana_annotations:main"
61
50
 
51
+ [project.optional-dependencies]
52
+ dev = ["pytest",
53
+ "httpx>=0.28.1,<0.29",
54
+ "ipykernel>=6.31.0,<7",
55
+ "build>=1.4.0,<2",
56
+ "twine>=6.2.0,<7",
57
+ "mkdocs>=1.6.1,<2",
58
+ "mkdocstrings[python]>=1.0.0,<2",
59
+ "mkdocs-jupyter>=0.25.1,<0.26",
60
+ "zarr[remote-tests]>=3.1.5,<4",
61
+ "moto[s3]>=5.1.19,<6",
62
+ "hatch>=1.16.2,<2"
63
+ ]
64
+
62
65
  [tool.pytest.ini_options]
63
66
  log_cli = true
67
+ addopts = "-s"
64
68
 
65
69
  [tool.hatch.envs.test]
66
70
  dependencies = [
@@ -70,7 +74,7 @@ dependencies = [
70
74
  ]
71
75
 
72
76
  [[tool.hatch.envs.test.matrix]]
73
- python = ["3.11", "3.9"]
77
+ python = ["3.10", "3.11", "3.12", "3.13"]
74
78
 
75
79
  [tool.hatch.envs.test.scripts]
76
- run-pytest = "pytest tests"
80
+ run-pytest = "pytest tests"
@@ -0,0 +1,166 @@
1
+ # src/tonik/ingest.py
2
+ import json
3
+ import logging
4
+ import os
5
+ import pickle
6
+ import threading
7
+ import uuid
8
+ from datetime import datetime, timezone
9
+ from typing import Optional
10
+
11
+ import xarray as xr
12
+
13
+ from .xarray2netcdf import xarray2netcdf
14
+ from .xarray2zarr import xarray2zarr
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ __all__ = ["enqueue_dataset", "IngestWorker"]
19
+
20
+
21
+ def _norm_timeseries(xds: xr.Dataset, timedim: str) -> xr.Dataset:
22
+ xds = xds.sortby(timedim)
23
+ xds = xds.drop_duplicates(timedim, keep='last')
24
+ xds[timedim] = xds[timedim].astype('datetime64[ns]')
25
+ return xds
26
+
27
+
28
+ def enqueue_dataset(data: xr.Dataset, target_path: str, *, backend: str,
29
+ ingest_config: dict, save_kwargs: Optional[dict] = None) -> dict:
30
+ """
31
+ Enqueue a dataset for ingestion.
32
+ Parameters
33
+ ----------
34
+ data : xr.Dataset
35
+ The dataset to enqueue.
36
+ target_path : str
37
+ The target path where the dataset should be saved.
38
+ backend : str
39
+ The backend to use for saving the dataset ('zarr' or 'netcdf').
40
+ ingest_config : dict
41
+ Configuration for the ingest queue, must include 'queue_path'.
42
+ save_kwargs : Optional[dict], optional
43
+ Additional keyword arguments to pass to the save function, by default None.
44
+ Returns
45
+ -------
46
+ dict
47
+ A message dictionary representing the enqueued dataset.
48
+ """
49
+
50
+ queue_path = ingest_config.get("queue_path")
51
+ if not queue_path:
52
+ raise ValueError("ingest_config must provide a 'queue_path'.")
53
+ queue_path = os.path.abspath(queue_path)
54
+ payload_dir = os.path.join(queue_path, "payloads")
55
+ message_dir = os.path.join(queue_path, "messages")
56
+ os.makedirs(payload_dir, exist_ok=True)
57
+ os.makedirs(message_dir, exist_ok=True)
58
+ timedim = save_kwargs.get(
59
+ "timedim", "datetime") if save_kwargs else "datetime"
60
+
61
+ if isinstance(data, xr.DataArray):
62
+ name = data.name or "data"
63
+ data = data.to_dataset(name=name)
64
+
65
+ dataset = _norm_timeseries(data, timedim=timedim)
66
+ entry_id = uuid.uuid4().hex
67
+ payload_path = os.path.join(payload_dir, f"{entry_id}.nc")
68
+ kwargs_path = os.path.join(payload_dir, f"{entry_id}.pkl")
69
+
70
+ dataset.to_netcdf(payload_path, engine="h5netcdf")
71
+ with open(kwargs_path, "wb") as handle:
72
+ pickle.dump(save_kwargs or {}, handle)
73
+
74
+ message = {
75
+ "id": entry_id,
76
+ "target_path": os.path.abspath(target_path),
77
+ "backend": backend,
78
+ "payload_path": payload_path,
79
+ "kwargs_path": kwargs_path,
80
+ "created_at": datetime.now(tz=timezone.utc).isoformat(),
81
+ }
82
+ tmp_path = os.path.join(message_dir, f"{entry_id}.json.tmp")
83
+ final_path = os.path.join(message_dir, f"{entry_id}.json")
84
+ with open(tmp_path, "w", encoding="utf-8") as handle:
85
+ json.dump(message, handle)
86
+ os.replace(tmp_path, final_path)
87
+ logger.debug("Queued dataset %s for %s backend at %s",
88
+ entry_id, backend, target_path)
89
+ return message
90
+
91
+
92
+ class IngestWorker:
93
+ def __init__(self, queue_path: str, poll_interval: float = 10.0,
94
+ target_prefix: Optional[str] = None):
95
+ self.queue_path = os.path.abspath(queue_path)
96
+ self.messages_dir = os.path.join(self.queue_path, "messages")
97
+ self.payloads_dir = os.path.join(self.queue_path, "payloads")
98
+ os.makedirs(self.messages_dir, exist_ok=True)
99
+ os.makedirs(self.payloads_dir, exist_ok=True)
100
+ self.poll_interval = poll_interval
101
+ self.target_prefix = os.path.abspath(
102
+ target_prefix) if target_prefix else None
103
+
104
+ def _iter_messages(self):
105
+ for name in sorted(os.listdir(self.messages_dir)):
106
+ if not name.endswith(".json"):
107
+ continue
108
+ msg_path = os.path.join(self.messages_dir, name)
109
+ with open(msg_path, "r", encoding="utf-8") as handle:
110
+ message = json.load(handle)
111
+ target = os.path.abspath(message.get("target_path", ""))
112
+ if self.target_prefix and not target.startswith(self.target_prefix):
113
+ continue
114
+ yield msg_path, message
115
+
116
+ def run_once(self) -> int:
117
+ processed = 0
118
+ for msg_path, message in self._iter_messages():
119
+ payload_path = message.get("payload_path")
120
+ kwargs_path = message.get("kwargs_path")
121
+ if not payload_path or not os.path.exists(payload_path):
122
+ logger.warning(
123
+ "Missing payload for %s, dropping message", msg_path)
124
+ os.remove(msg_path)
125
+ if kwargs_path and os.path.exists(kwargs_path):
126
+ os.remove(kwargs_path)
127
+ continue
128
+
129
+ dataset = None
130
+ try:
131
+ with xr.open_dataset(payload_path, engine='h5netcdf') as ds_on_disk:
132
+ dataset = ds_on_disk.load()
133
+
134
+ kwargs = {}
135
+ if kwargs_path and os.path.exists(kwargs_path):
136
+ with open(kwargs_path, "rb") as handle:
137
+ kwargs = pickle.load(handle)
138
+
139
+ backend = message.get("backend", "zarr")
140
+ if backend == "zarr":
141
+ xarray2zarr(dataset, message["target_path"], **kwargs)
142
+ elif backend == "netcdf":
143
+ xarray2netcdf(dataset, message["target_path"], **kwargs)
144
+ else:
145
+ raise ValueError(f"Unsupported backend '{backend}'")
146
+ except Exception as exc:
147
+ logger.error("Failed to ingest %s: %s",
148
+ msg_path, exc, exc_info=True)
149
+ continue
150
+ finally:
151
+ if dataset is not None:
152
+ dataset.close()
153
+
154
+ os.remove(payload_path)
155
+ if kwargs_path and os.path.exists(kwargs_path):
156
+ os.remove(kwargs_path)
157
+ os.remove(msg_path)
158
+ processed += 1
159
+ return processed
160
+
161
+ def run_forever(self, stop_event: Optional[threading.Event] = None) -> None:
162
+ stop_event = stop_event or threading.Event()
163
+ while not stop_event.is_set():
164
+ processed = self.run_once()
165
+ if processed == 0:
166
+ stop_event.wait(self.poll_interval)
@@ -1,80 +1,27 @@
1
+ from datetime import datetime
1
2
  import json
2
3
  import logging
3
- import logging.config
4
4
  import os
5
+ import threading
6
+ from typing import Optional
5
7
 
6
8
  import xarray as xr
7
9
 
10
+ from .ingest import IngestWorker, enqueue_dataset
8
11
  from .xarray2netcdf import xarray2netcdf
9
12
  from .xarray2zarr import xarray2zarr
10
13
 
11
- LOGGING_CONFIG = {
12
- "version": 1,
13
- "disable_existing_loggers": False,
14
- "formatters": {
15
- "default": { # The formatter name, it can be anything that I wish
16
- # What to add in the message
17
- "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
18
- "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
19
- },
20
- "json": { # The formatter name
21
- "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
22
- # Json is more complex, but easier to read, display all attributes!
23
- "format": """
24
- asctime: %(asctime)s
25
- created: %(created)f
26
- filename: %(filename)s
27
- funcName: %(funcName)s
28
- levelname: %(levelname)s
29
- levelno: %(levelno)s
30
- lineno: %(lineno)d
31
- message: %(message)s
32
- module: %(module)s
33
- msec: %(msecs)d
34
- name: %(name)s
35
- pathname: %(pathname)s
36
- process: %(process)d
37
- processName: %(processName)s
38
- relativeCreated: %(relativeCreated)d
39
- thread: %(thread)d
40
- threadName: %(threadName)s
41
- exc_info: %(exc_info)s
42
- """,
43
- "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
44
- },
45
- },
46
- "handlers": {
47
- "simple": { # The handler name
48
- "formatter": "default", # Refer to the formatter defined above
49
- "class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
50
- "stream": "ext://sys.stdout",
51
- },
52
- },
53
- "loggers": {
54
- "storage": { # The name of the logger, this SHOULD match your module!
55
- "level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
56
- "handlers": [
57
- "simple", # Refer the handler defined above
58
- ],
59
- },
60
- },
61
- "root": {
62
- "level": "INFO", # FILTER: only INFO logs onwards
63
- "handlers": [
64
- "simple", # Refer the handler defined above
65
- ]
66
- },
67
- }
68
-
69
- logging.config.dictConfig(LOGGING_CONFIG)
70
- logger = logging.getLogger("__name__")
14
+
15
+ logger = logging.getLogger(__name__)
71
16
 
72
17
 
73
18
  class Path(object):
74
- def __init__(self, name, parentdir, create=True, backend='zarr'):
19
+ def __init__(self, name, parentdir, create=True, backend='zarr',
20
+ archive_starttime=datetime(2000, 1, 1), ingest_config=None):
75
21
  self.name = name
76
22
  self.create = create
77
23
  self.backend = backend
24
+ self.archive_starttime = archive_starttime
78
25
  self.engine = 'h5netcdf' if self.backend == 'netcdf' else self.backend
79
26
  self.path = os.path.join(parentdir, name)
80
27
  if create:
@@ -86,6 +33,7 @@ class Path(object):
86
33
  if not os.path.exists(self.path):
87
34
  raise FileNotFoundError(f"Path {self.path} not found")
88
35
  self.children = {}
36
+ self.ingest_config = ingest_config.copy() if ingest_config else None
89
37
 
90
38
  def __str__(self):
91
39
  return self.path
@@ -97,7 +45,8 @@ class Path(object):
97
45
  return self.children[key]
98
46
  except KeyError:
99
47
  self.children[key] = Path(
100
- key, self.path, self.create, self.backend)
48
+ key, self.path, self.create, self.backend, self.archive_starttime,
49
+ ingest_config=self.ingest_config)
101
50
  return self.children[key]
102
51
 
103
52
  def feature_path(self, feature):
@@ -149,10 +98,24 @@ class Path(object):
149
98
  """
150
99
  Save a feature to disk
151
100
  """
101
+ if self.ingest_config and self.ingest_config.get('queue_path'):
102
+ enqueue_dataset(
103
+ data,
104
+ target_path=self.path,
105
+ backend=self.backend,
106
+ ingest_config=self.ingest_config,
107
+ save_kwargs=kwargs,
108
+ )
109
+ logger.debug("Queued data for %s backend at %s",
110
+ self.backend, self.path)
111
+ return
112
+
152
113
  if self.backend == 'netcdf':
153
- xarray2netcdf(data, self.path, **kwargs)
114
+ xarray2netcdf(data, self.path,
115
+ archive_starttime=self.archive_starttime, **kwargs)
154
116
  elif self.backend == 'zarr':
155
- xarray2zarr(data, self.path, **kwargs)
117
+ xarray2zarr(data, self.path,
118
+ archive_starttime=self.archive_starttime, **kwargs)
156
119
 
157
120
  def shape(self, feature):
158
121
  """
@@ -208,11 +171,17 @@ class Storage(Path):
208
171
  >>> rsam = c("rsam")
209
172
  """
210
173
 
211
- def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf'):
174
+ def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf',
175
+ ingest_config=None, archive_starttime=datetime(2000, 1, 1)):
212
176
  self.stores = set()
213
177
  self.starttime = starttime
214
178
  self.endtime = endtime
215
- super().__init__(name, rootdir, create, backend)
179
+ self.archive_starttime = archive_starttime
180
+ self._ingest_worker: Optional[IngestWorker] = None
181
+ self._ingest_thread: Optional[threading.Thread] = None
182
+ self._ingest_stop_event: Optional[threading.Event] = None
183
+ super().__init__(name, rootdir, create, backend, archive_starttime,
184
+ ingest_config=ingest_config)
216
185
 
217
186
  def print_tree(self, site, indent=0, output=''):
218
187
  output += ' ' * indent + site.path + '\n'
@@ -317,3 +286,50 @@ class Storage(Path):
317
286
 
318
287
  starttime = property(get_starttime, set_starttime)
319
288
  endtime = property(get_endtime, set_endtime)
289
+
290
+ def _ensure_ingest_worker(self, poll_interval=None) -> IngestWorker:
291
+ if not (self.ingest_config and self.ingest_config.get('queue_path')):
292
+ raise RuntimeError(
293
+ "Ingestion queue is not configured for this Storage instance.")
294
+
295
+ if self._ingest_worker is None:
296
+ queue_path = self.ingest_config['queue_path']
297
+ poll = poll_interval or self.ingest_config.get(
298
+ 'poll_interval', 10.0)
299
+ self._ingest_worker = IngestWorker(
300
+ queue_path=queue_path,
301
+ poll_interval=poll
302
+ )
303
+ elif poll_interval:
304
+ self._ingest_worker.poll_interval = poll_interval
305
+ return self._ingest_worker
306
+
307
+ def run_ingest_once(self, poll_interval=None) -> int:
308
+ worker = self._ensure_ingest_worker(poll_interval)
309
+ return worker.run_once()
310
+
311
+ def start_ingest_worker(self, *, background=True, poll_interval=None):
312
+ worker = self._ensure_ingest_worker(poll_interval)
313
+ if not background:
314
+ return worker.run_once()
315
+ if self._ingest_thread and self._ingest_thread.is_alive():
316
+ return self._ingest_thread
317
+ stop_event = threading.Event()
318
+ thread = threading.Thread(
319
+ target=worker.run_forever,
320
+ kwargs={'stop_event': stop_event},
321
+ daemon=True,
322
+ name=f"tonik-ingest-{self.name}",
323
+ )
324
+ thread.start()
325
+ self._ingest_thread = thread
326
+ self._ingest_stop_event = stop_event
327
+ return thread
328
+
329
+ def stop_ingest_worker(self, timeout=None):
330
+ if self._ingest_thread and self._ingest_thread.is_alive():
331
+ if self._ingest_stop_event:
332
+ self._ingest_stop_event.set()
333
+ self._ingest_thread.join(timeout=timeout)
334
+ self._ingest_thread = None
335
+ self._ingest_stop_event = None