tonik 0.1.20__tar.gz → 0.1.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tonik-0.1.20 → tonik-0.1.22}/.devcontainer/devcontainer.json +1 -1
- {tonik-0.1.20 → tonik-0.1.22}/PKG-INFO +22 -22
- tonik-0.1.20/pyproject.toml~ → tonik-0.1.22/pyproject.toml +30 -26
- tonik-0.1.22/src/tonik/ingest.py +166 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/storage.py +83 -67
- tonik-0.1.22/src/tonik/xarray2zarr.py +321 -0
- {tonik-0.1.20 → tonik-0.1.22}/tests/conftest.py +11 -8
- tonik-0.1.22/tests/test_netcdf.py +191 -0
- {tonik-0.1.20 → tonik-0.1.22}/tests/test_storage.py +95 -7
- tonik-0.1.20/tests/test_save.py → tonik-0.1.22/tests/test_zarr.py +99 -185
- tonik-0.1.20/.gitattributes +0 -2
- tonik-0.1.20/pixi.lock +0 -4050
- tonik-0.1.20/pyproject.toml +0 -95
- tonik-0.1.20/src/tonik/xarray2zarr.py +0 -310
- {tonik-0.1.20 → tonik-0.1.22}/.gitignore +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/HOW_TO_RELEASE.md +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/LICENSE +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/README.md +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/Dockerfile_api +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/Dockerfile_grafana +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/dashboards/demo_dashboard.json +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/docker-compose.yml +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/grafana.ini +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/provisioning/dashboards/default.yaml +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/grafana_example/provisioning/datasources/default.yaml +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/mkdocs.yml +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/__init__.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/api.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/grafana_annotations.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/package_data/index.html +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/package_data/whakaari_labels.json +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/utils.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/src/tonik/xarray2netcdf.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/tests/backend_speed_test.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/tests/test_api.py +0 -0
- {tonik-0.1.20 → tonik-0.1.22}/tests/test_utils.py +0 -0
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
// "appPort": ["8003:8003"],
|
|
15
15
|
|
|
16
16
|
// Use 'postCreateCommand' to run commands after the container is created.
|
|
17
|
-
"postCreateCommand": "pip3 install -e .
|
|
17
|
+
"postCreateCommand": "pip3 install -e '.[dev]'",
|
|
18
18
|
|
|
19
19
|
// Configure tool-specific properties.
|
|
20
20
|
"customizations": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.22
|
|
4
4
|
Summary: Store time series data as HDF5 files and access them through an API.
|
|
5
5
|
Project-URL: Homepage, https://tsc-tools.github.io/tonik
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
@@ -9,29 +9,29 @@ License-File: LICENSE
|
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Requires-Python: >=3.
|
|
13
|
-
Requires-Dist: datashader
|
|
14
|
-
Requires-Dist: fastapi
|
|
15
|
-
Requires-Dist: h5netcdf
|
|
16
|
-
Requires-Dist: h5py
|
|
17
|
-
Requires-Dist: matplotlib
|
|
18
|
-
Requires-Dist:
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist: xarray[accel,io,parallel]
|
|
24
|
-
Requires-Dist: zarr
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Requires-Dist: datashader<0.19,>=0.18.2
|
|
14
|
+
Requires-Dist: fastapi<0.129,>=0.128.0
|
|
15
|
+
Requires-Dist: h5netcdf<2,>=1.7.3
|
|
16
|
+
Requires-Dist: h5py<4,>=3.15.1
|
|
17
|
+
Requires-Dist: matplotlib<4,>=3.10.8
|
|
18
|
+
Requires-Dist: pandas<3,>=2.3.3
|
|
19
|
+
Requires-Dist: s3fs<2026,>=2025.12.0
|
|
20
|
+
Requires-Dist: uvicorn[standard]<0.41,>=0.40.0
|
|
21
|
+
Requires-Dist: xarray[accel,io,parallel]<2026,>=2025.6.1
|
|
22
|
+
Requires-Dist: zarr<4,>=3.1.5
|
|
25
23
|
Provides-Extra: dev
|
|
26
|
-
Requires-Dist: build; extra == 'dev'
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist: mkdocs-jupyter; extra == 'dev'
|
|
31
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: build<2,>=1.4.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: hatch<2,>=1.16.2; extra == 'dev'
|
|
26
|
+
Requires-Dist: httpx<0.29,>=0.28.1; extra == 'dev'
|
|
27
|
+
Requires-Dist: ipykernel<7,>=6.31.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: mkdocs-jupyter<0.26,>=0.25.1; extra == 'dev'
|
|
29
|
+
Requires-Dist: mkdocs<2,>=1.6.1; extra == 'dev'
|
|
30
|
+
Requires-Dist: mkdocstrings[python]<2,>=1.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: moto[s3]<6,>=5.1.19; extra == 'dev'
|
|
32
32
|
Requires-Dist: pytest; extra == 'dev'
|
|
33
|
-
Requires-Dist: twine; extra == 'dev'
|
|
34
|
-
Requires-Dist: zarr[remote-tests]; extra == 'dev'
|
|
33
|
+
Requires-Dist: twine<7,>=6.2.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: zarr[remote-tests]<4,>=3.1.5; extra == 'dev'
|
|
35
35
|
Description-Content-Type: text/markdown
|
|
36
36
|
|
|
37
37
|
# Tonik
|
|
@@ -12,7 +12,7 @@ exclude = [
|
|
|
12
12
|
|
|
13
13
|
[project]
|
|
14
14
|
name = "tonik"
|
|
15
|
-
version = "0.1.
|
|
15
|
+
version = "0.1.22"
|
|
16
16
|
authors = [
|
|
17
17
|
{ name="Yannik Behr", email="y.behr@gns.cri.nz" },
|
|
18
18
|
{ name="Christof Mueller", email="c.mueller@gns.cri.nz" }
|
|
@@ -20,35 +20,24 @@ authors = [
|
|
|
20
20
|
|
|
21
21
|
description = "Store time series data as HDF5 files and access them through an API."
|
|
22
22
|
readme = "README.md"
|
|
23
|
-
requires-python = ">=3.
|
|
23
|
+
requires-python = ">=3.10"
|
|
24
24
|
classifiers = [
|
|
25
25
|
"Programming Language :: Python :: 3",
|
|
26
26
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
27
27
|
"Operating System :: OS Independent",
|
|
28
28
|
]
|
|
29
|
-
dependencies = [
|
|
30
|
-
"h5py>=3.8",
|
|
31
|
-
"datashader>=0.14",
|
|
32
|
-
"xarray[io,accel,parallel]",
|
|
33
|
-
"pandas>=2.0",
|
|
34
|
-
"netcdf4>=1.6",
|
|
35
|
-
"h5netcdf>=1.1",
|
|
36
|
-
"python-json-logger>=2.0",
|
|
37
|
-
"uvicorn[standard]>=0.22",
|
|
38
|
-
"fastapi>=0.112",
|
|
39
|
-
"matplotlib",
|
|
40
|
-
"zarr[remote_tests]>=3.0.3; python_version >= '3.11'",
|
|
41
|
-
"zarr[remote_tests]<3; python_version < '3.11'",
|
|
42
|
-
"s3fs"
|
|
43
|
-
]
|
|
44
29
|
|
|
45
|
-
[
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
30
|
+
dependencies = [
|
|
31
|
+
"xarray[io,accel,parallel]>=2025.6.1,<2026",
|
|
32
|
+
"datashader>=0.18.2,<0.19",
|
|
33
|
+
"h5py>=3.15.1,<4",
|
|
34
|
+
"pandas>=2.3.3,<3",
|
|
35
|
+
"h5netcdf>=1.7.3,<2",
|
|
36
|
+
"uvicorn[standard]>=0.40.0,<0.41",
|
|
37
|
+
"fastapi>=0.128.0,<0.129",
|
|
38
|
+
"matplotlib>=3.10.8,<4",
|
|
39
|
+
"zarr>=3.1.5,<4",
|
|
40
|
+
"s3fs>=2025.12.0,<2026"]
|
|
52
41
|
|
|
53
42
|
[project.urls]
|
|
54
43
|
Homepage = "https://tsc-tools.github.io/tonik"
|
|
@@ -59,8 +48,23 @@ tonik_api = "tonik.api:main"
|
|
|
59
48
|
test_data = "tonik.utils:main"
|
|
60
49
|
grafana_annotations = "tonik.grafana_annotations:main"
|
|
61
50
|
|
|
51
|
+
[project.optional-dependencies]
|
|
52
|
+
dev = ["pytest",
|
|
53
|
+
"httpx>=0.28.1,<0.29",
|
|
54
|
+
"ipykernel>=6.31.0,<7",
|
|
55
|
+
"build>=1.4.0,<2",
|
|
56
|
+
"twine>=6.2.0,<7",
|
|
57
|
+
"mkdocs>=1.6.1,<2",
|
|
58
|
+
"mkdocstrings[python]>=1.0.0,<2",
|
|
59
|
+
"mkdocs-jupyter>=0.25.1,<0.26",
|
|
60
|
+
"zarr[remote-tests]>=3.1.5,<4",
|
|
61
|
+
"moto[s3]>=5.1.19,<6",
|
|
62
|
+
"hatch>=1.16.2,<2"
|
|
63
|
+
]
|
|
64
|
+
|
|
62
65
|
[tool.pytest.ini_options]
|
|
63
66
|
log_cli = true
|
|
67
|
+
addopts = "-s"
|
|
64
68
|
|
|
65
69
|
[tool.hatch.envs.test]
|
|
66
70
|
dependencies = [
|
|
@@ -70,7 +74,7 @@ dependencies = [
|
|
|
70
74
|
]
|
|
71
75
|
|
|
72
76
|
[[tool.hatch.envs.test.matrix]]
|
|
73
|
-
python = ["3.11", "3.
|
|
77
|
+
python = ["3.10", "3.11", "3.12", "3.13"]
|
|
74
78
|
|
|
75
79
|
[tool.hatch.envs.test.scripts]
|
|
76
|
-
run-pytest = "pytest tests"
|
|
80
|
+
run-pytest = "pytest tests"
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# src/tonik/ingest.py
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import pickle
|
|
6
|
+
import threading
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import xarray as xr
|
|
12
|
+
|
|
13
|
+
from .xarray2netcdf import xarray2netcdf
|
|
14
|
+
from .xarray2zarr import xarray2zarr
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
__all__ = ["enqueue_dataset", "IngestWorker"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _norm_timeseries(xds: xr.Dataset, timedim: str) -> xr.Dataset:
|
|
22
|
+
xds = xds.sortby(timedim)
|
|
23
|
+
xds = xds.drop_duplicates(timedim, keep='last')
|
|
24
|
+
xds[timedim] = xds[timedim].astype('datetime64[ns]')
|
|
25
|
+
return xds
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def enqueue_dataset(data: xr.Dataset, target_path: str, *, backend: str,
|
|
29
|
+
ingest_config: dict, save_kwargs: Optional[dict] = None) -> dict:
|
|
30
|
+
"""
|
|
31
|
+
Enqueue a dataset for ingestion.
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
data : xr.Dataset
|
|
35
|
+
The dataset to enqueue.
|
|
36
|
+
target_path : str
|
|
37
|
+
The target path where the dataset should be saved.
|
|
38
|
+
backend : str
|
|
39
|
+
The backend to use for saving the dataset ('zarr' or 'netcdf').
|
|
40
|
+
ingest_config : dict
|
|
41
|
+
Configuration for the ingest queue, must include 'queue_path'.
|
|
42
|
+
save_kwargs : Optional[dict], optional
|
|
43
|
+
Additional keyword arguments to pass to the save function, by default None.
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
dict
|
|
47
|
+
A message dictionary representing the enqueued dataset.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
queue_path = ingest_config.get("queue_path")
|
|
51
|
+
if not queue_path:
|
|
52
|
+
raise ValueError("ingest_config must provide a 'queue_path'.")
|
|
53
|
+
queue_path = os.path.abspath(queue_path)
|
|
54
|
+
payload_dir = os.path.join(queue_path, "payloads")
|
|
55
|
+
message_dir = os.path.join(queue_path, "messages")
|
|
56
|
+
os.makedirs(payload_dir, exist_ok=True)
|
|
57
|
+
os.makedirs(message_dir, exist_ok=True)
|
|
58
|
+
timedim = save_kwargs.get(
|
|
59
|
+
"timedim", "datetime") if save_kwargs else "datetime"
|
|
60
|
+
|
|
61
|
+
if isinstance(data, xr.DataArray):
|
|
62
|
+
name = data.name or "data"
|
|
63
|
+
data = data.to_dataset(name=name)
|
|
64
|
+
|
|
65
|
+
dataset = _norm_timeseries(data, timedim=timedim)
|
|
66
|
+
entry_id = uuid.uuid4().hex
|
|
67
|
+
payload_path = os.path.join(payload_dir, f"{entry_id}.nc")
|
|
68
|
+
kwargs_path = os.path.join(payload_dir, f"{entry_id}.pkl")
|
|
69
|
+
|
|
70
|
+
dataset.to_netcdf(payload_path, engine="h5netcdf")
|
|
71
|
+
with open(kwargs_path, "wb") as handle:
|
|
72
|
+
pickle.dump(save_kwargs or {}, handle)
|
|
73
|
+
|
|
74
|
+
message = {
|
|
75
|
+
"id": entry_id,
|
|
76
|
+
"target_path": os.path.abspath(target_path),
|
|
77
|
+
"backend": backend,
|
|
78
|
+
"payload_path": payload_path,
|
|
79
|
+
"kwargs_path": kwargs_path,
|
|
80
|
+
"created_at": datetime.now(tz=timezone.utc).isoformat(),
|
|
81
|
+
}
|
|
82
|
+
tmp_path = os.path.join(message_dir, f"{entry_id}.json.tmp")
|
|
83
|
+
final_path = os.path.join(message_dir, f"{entry_id}.json")
|
|
84
|
+
with open(tmp_path, "w", encoding="utf-8") as handle:
|
|
85
|
+
json.dump(message, handle)
|
|
86
|
+
os.replace(tmp_path, final_path)
|
|
87
|
+
logger.debug("Queued dataset %s for %s backend at %s",
|
|
88
|
+
entry_id, backend, target_path)
|
|
89
|
+
return message
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class IngestWorker:
|
|
93
|
+
def __init__(self, queue_path: str, poll_interval: float = 10.0,
|
|
94
|
+
target_prefix: Optional[str] = None):
|
|
95
|
+
self.queue_path = os.path.abspath(queue_path)
|
|
96
|
+
self.messages_dir = os.path.join(self.queue_path, "messages")
|
|
97
|
+
self.payloads_dir = os.path.join(self.queue_path, "payloads")
|
|
98
|
+
os.makedirs(self.messages_dir, exist_ok=True)
|
|
99
|
+
os.makedirs(self.payloads_dir, exist_ok=True)
|
|
100
|
+
self.poll_interval = poll_interval
|
|
101
|
+
self.target_prefix = os.path.abspath(
|
|
102
|
+
target_prefix) if target_prefix else None
|
|
103
|
+
|
|
104
|
+
def _iter_messages(self):
|
|
105
|
+
for name in sorted(os.listdir(self.messages_dir)):
|
|
106
|
+
if not name.endswith(".json"):
|
|
107
|
+
continue
|
|
108
|
+
msg_path = os.path.join(self.messages_dir, name)
|
|
109
|
+
with open(msg_path, "r", encoding="utf-8") as handle:
|
|
110
|
+
message = json.load(handle)
|
|
111
|
+
target = os.path.abspath(message.get("target_path", ""))
|
|
112
|
+
if self.target_prefix and not target.startswith(self.target_prefix):
|
|
113
|
+
continue
|
|
114
|
+
yield msg_path, message
|
|
115
|
+
|
|
116
|
+
def run_once(self) -> int:
|
|
117
|
+
processed = 0
|
|
118
|
+
for msg_path, message in self._iter_messages():
|
|
119
|
+
payload_path = message.get("payload_path")
|
|
120
|
+
kwargs_path = message.get("kwargs_path")
|
|
121
|
+
if not payload_path or not os.path.exists(payload_path):
|
|
122
|
+
logger.warning(
|
|
123
|
+
"Missing payload for %s, dropping message", msg_path)
|
|
124
|
+
os.remove(msg_path)
|
|
125
|
+
if kwargs_path and os.path.exists(kwargs_path):
|
|
126
|
+
os.remove(kwargs_path)
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
dataset = None
|
|
130
|
+
try:
|
|
131
|
+
with xr.open_dataset(payload_path, engine='h5netcdf') as ds_on_disk:
|
|
132
|
+
dataset = ds_on_disk.load()
|
|
133
|
+
|
|
134
|
+
kwargs = {}
|
|
135
|
+
if kwargs_path and os.path.exists(kwargs_path):
|
|
136
|
+
with open(kwargs_path, "rb") as handle:
|
|
137
|
+
kwargs = pickle.load(handle)
|
|
138
|
+
|
|
139
|
+
backend = message.get("backend", "zarr")
|
|
140
|
+
if backend == "zarr":
|
|
141
|
+
xarray2zarr(dataset, message["target_path"], **kwargs)
|
|
142
|
+
elif backend == "netcdf":
|
|
143
|
+
xarray2netcdf(dataset, message["target_path"], **kwargs)
|
|
144
|
+
else:
|
|
145
|
+
raise ValueError(f"Unsupported backend '{backend}'")
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
logger.error("Failed to ingest %s: %s",
|
|
148
|
+
msg_path, exc, exc_info=True)
|
|
149
|
+
continue
|
|
150
|
+
finally:
|
|
151
|
+
if dataset is not None:
|
|
152
|
+
dataset.close()
|
|
153
|
+
|
|
154
|
+
os.remove(payload_path)
|
|
155
|
+
if kwargs_path and os.path.exists(kwargs_path):
|
|
156
|
+
os.remove(kwargs_path)
|
|
157
|
+
os.remove(msg_path)
|
|
158
|
+
processed += 1
|
|
159
|
+
return processed
|
|
160
|
+
|
|
161
|
+
def run_forever(self, stop_event: Optional[threading.Event] = None) -> None:
|
|
162
|
+
stop_event = stop_event or threading.Event()
|
|
163
|
+
while not stop_event.is_set():
|
|
164
|
+
processed = self.run_once()
|
|
165
|
+
if processed == 0:
|
|
166
|
+
stop_event.wait(self.poll_interval)
|
|
@@ -1,80 +1,27 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
|
-
import logging.config
|
|
4
4
|
import os
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Optional
|
|
5
7
|
|
|
6
8
|
import xarray as xr
|
|
7
9
|
|
|
10
|
+
from .ingest import IngestWorker, enqueue_dataset
|
|
8
11
|
from .xarray2netcdf import xarray2netcdf
|
|
9
12
|
from .xarray2zarr import xarray2zarr
|
|
10
13
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
"disable_existing_loggers": False,
|
|
14
|
-
"formatters": {
|
|
15
|
-
"default": { # The formatter name, it can be anything that I wish
|
|
16
|
-
# What to add in the message
|
|
17
|
-
"format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
|
|
18
|
-
"datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
|
|
19
|
-
},
|
|
20
|
-
"json": { # The formatter name
|
|
21
|
-
"()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
|
|
22
|
-
# Json is more complex, but easier to read, display all attributes!
|
|
23
|
-
"format": """
|
|
24
|
-
asctime: %(asctime)s
|
|
25
|
-
created: %(created)f
|
|
26
|
-
filename: %(filename)s
|
|
27
|
-
funcName: %(funcName)s
|
|
28
|
-
levelname: %(levelname)s
|
|
29
|
-
levelno: %(levelno)s
|
|
30
|
-
lineno: %(lineno)d
|
|
31
|
-
message: %(message)s
|
|
32
|
-
module: %(module)s
|
|
33
|
-
msec: %(msecs)d
|
|
34
|
-
name: %(name)s
|
|
35
|
-
pathname: %(pathname)s
|
|
36
|
-
process: %(process)d
|
|
37
|
-
processName: %(processName)s
|
|
38
|
-
relativeCreated: %(relativeCreated)d
|
|
39
|
-
thread: %(thread)d
|
|
40
|
-
threadName: %(threadName)s
|
|
41
|
-
exc_info: %(exc_info)s
|
|
42
|
-
""",
|
|
43
|
-
"datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
|
|
44
|
-
},
|
|
45
|
-
},
|
|
46
|
-
"handlers": {
|
|
47
|
-
"simple": { # The handler name
|
|
48
|
-
"formatter": "default", # Refer to the formatter defined above
|
|
49
|
-
"class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
|
|
50
|
-
"stream": "ext://sys.stdout",
|
|
51
|
-
},
|
|
52
|
-
},
|
|
53
|
-
"loggers": {
|
|
54
|
-
"storage": { # The name of the logger, this SHOULD match your module!
|
|
55
|
-
"level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
|
|
56
|
-
"handlers": [
|
|
57
|
-
"simple", # Refer the handler defined above
|
|
58
|
-
],
|
|
59
|
-
},
|
|
60
|
-
},
|
|
61
|
-
"root": {
|
|
62
|
-
"level": "INFO", # FILTER: only INFO logs onwards
|
|
63
|
-
"handlers": [
|
|
64
|
-
"simple", # Refer the handler defined above
|
|
65
|
-
]
|
|
66
|
-
},
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
logging.config.dictConfig(LOGGING_CONFIG)
|
|
70
|
-
logger = logging.getLogger("__name__")
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
71
16
|
|
|
72
17
|
|
|
73
18
|
class Path(object):
|
|
74
|
-
def __init__(self, name, parentdir, create=True, backend='zarr'
|
|
19
|
+
def __init__(self, name, parentdir, create=True, backend='zarr',
|
|
20
|
+
archive_starttime=datetime(2000, 1, 1), ingest_config=None):
|
|
75
21
|
self.name = name
|
|
76
22
|
self.create = create
|
|
77
23
|
self.backend = backend
|
|
24
|
+
self.archive_starttime = archive_starttime
|
|
78
25
|
self.engine = 'h5netcdf' if self.backend == 'netcdf' else self.backend
|
|
79
26
|
self.path = os.path.join(parentdir, name)
|
|
80
27
|
if create:
|
|
@@ -86,6 +33,7 @@ class Path(object):
|
|
|
86
33
|
if not os.path.exists(self.path):
|
|
87
34
|
raise FileNotFoundError(f"Path {self.path} not found")
|
|
88
35
|
self.children = {}
|
|
36
|
+
self.ingest_config = ingest_config.copy() if ingest_config else None
|
|
89
37
|
|
|
90
38
|
def __str__(self):
|
|
91
39
|
return self.path
|
|
@@ -97,7 +45,8 @@ class Path(object):
|
|
|
97
45
|
return self.children[key]
|
|
98
46
|
except KeyError:
|
|
99
47
|
self.children[key] = Path(
|
|
100
|
-
key, self.path, self.create, self.backend
|
|
48
|
+
key, self.path, self.create, self.backend, self.archive_starttime,
|
|
49
|
+
ingest_config=self.ingest_config)
|
|
101
50
|
return self.children[key]
|
|
102
51
|
|
|
103
52
|
def feature_path(self, feature):
|
|
@@ -149,10 +98,24 @@ class Path(object):
|
|
|
149
98
|
"""
|
|
150
99
|
Save a feature to disk
|
|
151
100
|
"""
|
|
101
|
+
if self.ingest_config and self.ingest_config.get('queue_path'):
|
|
102
|
+
enqueue_dataset(
|
|
103
|
+
data,
|
|
104
|
+
target_path=self.path,
|
|
105
|
+
backend=self.backend,
|
|
106
|
+
ingest_config=self.ingest_config,
|
|
107
|
+
save_kwargs=kwargs,
|
|
108
|
+
)
|
|
109
|
+
logger.debug("Queued data for %s backend at %s",
|
|
110
|
+
self.backend, self.path)
|
|
111
|
+
return
|
|
112
|
+
|
|
152
113
|
if self.backend == 'netcdf':
|
|
153
|
-
xarray2netcdf(data, self.path,
|
|
114
|
+
xarray2netcdf(data, self.path,
|
|
115
|
+
archive_starttime=self.archive_starttime, **kwargs)
|
|
154
116
|
elif self.backend == 'zarr':
|
|
155
|
-
xarray2zarr(data, self.path,
|
|
117
|
+
xarray2zarr(data, self.path,
|
|
118
|
+
archive_starttime=self.archive_starttime, **kwargs)
|
|
156
119
|
|
|
157
120
|
def shape(self, feature):
|
|
158
121
|
"""
|
|
@@ -208,11 +171,17 @@ class Storage(Path):
|
|
|
208
171
|
>>> rsam = c("rsam")
|
|
209
172
|
"""
|
|
210
173
|
|
|
211
|
-
def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf'
|
|
174
|
+
def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf',
|
|
175
|
+
ingest_config=None, archive_starttime=datetime(2000, 1, 1)):
|
|
212
176
|
self.stores = set()
|
|
213
177
|
self.starttime = starttime
|
|
214
178
|
self.endtime = endtime
|
|
215
|
-
|
|
179
|
+
self.archive_starttime = archive_starttime
|
|
180
|
+
self._ingest_worker: Optional[IngestWorker] = None
|
|
181
|
+
self._ingest_thread: Optional[threading.Thread] = None
|
|
182
|
+
self._ingest_stop_event: Optional[threading.Event] = None
|
|
183
|
+
super().__init__(name, rootdir, create, backend, archive_starttime,
|
|
184
|
+
ingest_config=ingest_config)
|
|
216
185
|
|
|
217
186
|
def print_tree(self, site, indent=0, output=''):
|
|
218
187
|
output += ' ' * indent + site.path + '\n'
|
|
@@ -317,3 +286,50 @@ class Storage(Path):
|
|
|
317
286
|
|
|
318
287
|
starttime = property(get_starttime, set_starttime)
|
|
319
288
|
endtime = property(get_endtime, set_endtime)
|
|
289
|
+
|
|
290
|
+
def _ensure_ingest_worker(self, poll_interval=None) -> IngestWorker:
|
|
291
|
+
if not (self.ingest_config and self.ingest_config.get('queue_path')):
|
|
292
|
+
raise RuntimeError(
|
|
293
|
+
"Ingestion queue is not configured for this Storage instance.")
|
|
294
|
+
|
|
295
|
+
if self._ingest_worker is None:
|
|
296
|
+
queue_path = self.ingest_config['queue_path']
|
|
297
|
+
poll = poll_interval or self.ingest_config.get(
|
|
298
|
+
'poll_interval', 10.0)
|
|
299
|
+
self._ingest_worker = IngestWorker(
|
|
300
|
+
queue_path=queue_path,
|
|
301
|
+
poll_interval=poll
|
|
302
|
+
)
|
|
303
|
+
elif poll_interval:
|
|
304
|
+
self._ingest_worker.poll_interval = poll_interval
|
|
305
|
+
return self._ingest_worker
|
|
306
|
+
|
|
307
|
+
def run_ingest_once(self, poll_interval=None) -> int:
|
|
308
|
+
worker = self._ensure_ingest_worker(poll_interval)
|
|
309
|
+
return worker.run_once()
|
|
310
|
+
|
|
311
|
+
def start_ingest_worker(self, *, background=True, poll_interval=None):
|
|
312
|
+
worker = self._ensure_ingest_worker(poll_interval)
|
|
313
|
+
if not background:
|
|
314
|
+
return worker.run_once()
|
|
315
|
+
if self._ingest_thread and self._ingest_thread.is_alive():
|
|
316
|
+
return self._ingest_thread
|
|
317
|
+
stop_event = threading.Event()
|
|
318
|
+
thread = threading.Thread(
|
|
319
|
+
target=worker.run_forever,
|
|
320
|
+
kwargs={'stop_event': stop_event},
|
|
321
|
+
daemon=True,
|
|
322
|
+
name=f"tonik-ingest-{self.name}",
|
|
323
|
+
)
|
|
324
|
+
thread.start()
|
|
325
|
+
self._ingest_thread = thread
|
|
326
|
+
self._ingest_stop_event = stop_event
|
|
327
|
+
return thread
|
|
328
|
+
|
|
329
|
+
def stop_ingest_worker(self, timeout=None):
|
|
330
|
+
if self._ingest_thread and self._ingest_thread.is_alive():
|
|
331
|
+
if self._ingest_stop_event:
|
|
332
|
+
self._ingest_stop_event.set()
|
|
333
|
+
self._ingest_thread.join(timeout=timeout)
|
|
334
|
+
self._ingest_thread = None
|
|
335
|
+
self._ingest_stop_event = None
|