tonik 0.1.15__tar.gz → 0.1.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {tonik-0.1.15 → tonik-0.1.17}/.devcontainer/devcontainer.json +2 -2
  2. {tonik-0.1.15 → tonik-0.1.17}/PKG-INFO +5 -4
  3. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/docker-compose.yml +2 -2
  4. tonik-0.1.17/pyproject.toml +76 -0
  5. tonik-0.1.15/pyproject.toml → tonik-0.1.17/pyproject.toml~ +5 -3
  6. {tonik-0.1.15 → tonik-0.1.17}/src/tonik/api.py +9 -5
  7. tonik-0.1.17/src/tonik/grafana_annotations.py +152 -0
  8. tonik-0.1.17/src/tonik/package_data/whakaari_labels.json +162 -0
  9. {tonik-0.1.15 → tonik-0.1.17}/src/tonik/utils.py +54 -6
  10. tonik-0.1.17/src/tonik/xarray2zarr.py +236 -0
  11. {tonik-0.1.15 → tonik-0.1.17}/tests/conftest.py +51 -1
  12. {tonik-0.1.15 → tonik-0.1.17}/tests/test_api.py +30 -0
  13. {tonik-0.1.15 → tonik-0.1.17}/tests/test_save.py +43 -24
  14. tonik-0.1.15/.pytest_cache/CACHEDIR.TAG +0 -4
  15. tonik-0.1.15/.pytest_cache/README.md +0 -8
  16. tonik-0.1.15/.pytest_cache/v/cache/lastfailed +0 -16
  17. tonik-0.1.15/.pytest_cache/v/cache/nodeids +0 -17739
  18. tonik-0.1.15/.pytest_cache/v/cache/stepwise +0 -1
  19. tonik-0.1.15/src/tonik/xarray2zarr.py +0 -67
  20. {tonik-0.1.15 → tonik-0.1.17}/.gitignore +0 -0
  21. {tonik-0.1.15 → tonik-0.1.17}/HOW_TO_RELEASE.md +0 -0
  22. {tonik-0.1.15 → tonik-0.1.17}/LICENSE +0 -0
  23. {tonik-0.1.15 → tonik-0.1.17}/README.md +0 -0
  24. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/Dockerfile_api +0 -0
  25. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/Dockerfile_grafana +0 -0
  26. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/dashboards/demo_dashboard.json +0 -0
  27. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/grafana.ini +0 -0
  28. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/provisioning/dashboards/default.yaml +0 -0
  29. {tonik-0.1.15 → tonik-0.1.17}/grafana_example/provisioning/datasources/default.yaml +0 -0
  30. {tonik-0.1.15 → tonik-0.1.17}/mkdocs.yml +0 -0
  31. {tonik-0.1.15 → tonik-0.1.17}/src/tonik/__init__.py +0 -0
  32. {tonik-0.1.15 → tonik-0.1.17}/src/tonik/package_data/index.html +0 -0
  33. {tonik-0.1.15 → tonik-0.1.17}/src/tonik/storage.py +0 -0
  34. {tonik-0.1.15 → tonik-0.1.17}/src/tonik/xarray2netcdf.py +0 -0
  35. {tonik-0.1.15 → tonik-0.1.17}/tests/backend_speed_test.py +0 -0
  36. {tonik-0.1.15 → tonik-0.1.17}/tests/test_storage.py +0 -0
  37. {tonik-0.1.15 → tonik-0.1.17}/tests/test_utils.py +0 -0
@@ -1,9 +1,9 @@
1
1
  // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
2
  // README at: https://github.com/devcontainers/templates/tree/main/src/python
3
3
  {
4
- "name": "Python 3",
4
+ "name": "Tonik",
5
5
  // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
6
- "image": "mcr.microsoft.com/devcontainers/python:1-3.9-bullseye",
6
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
7
7
 
8
8
 
9
9
  // Features to add to the dev container. More info: https://containers.dev/features.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: tonik
3
- Version: 0.1.15
3
+ Version: 0.1.17
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -18,10 +18,11 @@ Requires-Dist: matplotlib
18
18
  Requires-Dist: netcdf4>=1.6
19
19
  Requires-Dist: pandas>=2.0
20
20
  Requires-Dist: python-json-logger>=2.0
21
+ Requires-Dist: s3fs
21
22
  Requires-Dist: uvicorn[standard]>=0.22
22
23
  Requires-Dist: xarray[accel,io,parallel]
23
- Requires-Dist: zarr<3; python_version < '3.11'
24
- Requires-Dist: zarr>=3.0.3; python_version >= '3.11'
24
+ Requires-Dist: zarr[remote-tests]<3; python_version < '3.11'
25
+ Requires-Dist: zarr[remote-tests]>=3.0.3; python_version >= '3.11'
25
26
  Provides-Extra: dev
26
27
  Requires-Dist: httpx; extra == 'dev'
27
28
  Requires-Dist: ipykernel; extra == 'dev'
@@ -10,7 +10,7 @@ services:
10
10
  environment:
11
11
  - GF_SECURITY_ADMIN_PASSWORD=tonikdemo
12
12
  ports:
13
- - "11223:3000"
13
+ - "11224:3000"
14
14
  networks:
15
15
  - tonik_network
16
16
 
@@ -21,7 +21,7 @@ services:
21
21
  container_name: tonik_example_api
22
22
  image: tonik_example_api
23
23
  ports:
24
- - "11222:8003"
24
+ - "11225:8003"
25
25
  networks:
26
26
  - tonik_network
27
27
 
@@ -0,0 +1,76 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [tool.hatch.build.targets.sdist]
6
+ exclude = [
7
+ "/.github",
8
+ "/site",
9
+ "/docs",
10
+ "/sandbox.ipynb"
11
+ ]
12
+
13
+ [project]
14
+ name = "tonik"
15
+ version = "0.1.17"
16
+ authors = [
17
+ { name="Yannik Behr", email="y.behr@gns.cri.nz" },
18
+ { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
19
+ ]
20
+
21
+ description = "Store time series data as HDF5 files and access them through an API."
22
+ readme = "README.md"
23
+ requires-python = ">=3.9"
24
+ classifiers = [
25
+ "Programming Language :: Python :: 3",
26
+ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
27
+ "Operating System :: OS Independent",
28
+ ]
29
+ dependencies = [
30
+ "h5py>=3.8",
31
+ "datashader>=0.14",
32
+ "xarray[io,accel,parallel]",
33
+ "pandas>=2.0",
34
+ "netcdf4>=1.6",
35
+ "h5netcdf>=1.1",
36
+ "python-json-logger>=2.0",
37
+ "uvicorn[standard]>=0.22",
38
+ "fastapi>=0.112",
39
+ "matplotlib",
40
+ "zarr[remote_tests]>=3.0.3; python_version >= '3.11'",
41
+ "zarr[remote_tests]<3; python_version < '3.11'",
42
+ "s3fs"
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ dev = ["pytest",
47
+ "httpx",
48
+ "ipykernel",
49
+ "mkdocs",
50
+ "mkdocstrings[python]",
51
+ "mkdocs-jupyter"]
52
+
53
+ [project.urls]
54
+ Homepage = "https://tsc-tools.github.io/tonik"
55
+ Issues = "https://github.com/tsc-tools/tonik/issues"
56
+
57
+ [project.scripts]
58
+ tonik_api = "tonik.api:main"
59
+ test_data = "tonik.utils:main"
60
+ grafana_annotations = "tonik.grafana_annotations:main"
61
+
62
+ [tool.pytest.ini_options]
63
+ log_cli = true
64
+
65
+ [tool.hatch.envs.test]
66
+ dependencies = [
67
+ "coverage[toml]",
68
+ "pytest",
69
+ "httpx"
70
+ ]
71
+
72
+ [[tool.hatch.envs.test.matrix]]
73
+ python = ["3.11", "3.9"]
74
+
75
+ [tool.hatch.envs.test.scripts]
76
+ run-pytest = "pytest tests"
@@ -12,7 +12,7 @@ exclude = [
12
12
 
13
13
  [project]
14
14
  name = "tonik"
15
- version = "0.1.15"
15
+ version = "0.1.16"
16
16
  authors = [
17
17
  { name="Yannik Behr", email="y.behr@gns.cri.nz" },
18
18
  { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -37,8 +37,9 @@ dependencies = [
37
37
  "uvicorn[standard]>=0.22",
38
38
  "fastapi>=0.112",
39
39
  "matplotlib",
40
- "zarr>=3.0.3; python_version >= '3.11'",
41
- "zarr<3; python_version < '3.11'",
40
+ "zarr[remote_tests]>=3.0.3; python_version >= '3.11'",
41
+ "zarr[remote_tests]<3; python_version < '3.11'",
42
+ "s3fs"
42
43
  ]
43
44
 
44
45
  [project.optional-dependencies]
@@ -56,6 +57,7 @@ Issues = "https://github.com/tsc-tools/tonik/issues"
56
57
  [project.scripts]
57
58
  tonik_api = "tonik.api:main"
58
59
  test_data = "tonik.utils:main"
60
+ grafana_annotations = "tonik.grafana_annotations:main"
59
61
 
60
62
  [tool.pytest.ini_options]
61
63
  log_cli = true
@@ -26,8 +26,9 @@ InventoryReturnType = Union[list, dict]
26
26
 
27
27
  class TonikAPI:
28
28
 
29
- def __init__(self, rootdir) -> None:
29
+ def __init__(self, rootdir, backend='netcdf') -> None:
30
30
  self.rootdir = rootdir
31
+ self.backend = backend
31
32
  self.app = FastAPI()
32
33
 
33
34
  # -- allow any origin to query API
@@ -72,7 +73,7 @@ class TonikAPI:
72
73
  _et = self.preprocess_datetime(endtime)
73
74
  g = Storage(group, rootdir=self.rootdir,
74
75
  starttime=_st, endtime=_et,
75
- create=False)
76
+ create=False, backend=self.backend)
76
77
  c = g
77
78
  if subdir:
78
79
  c = g.get_substore(*subdir)
@@ -147,7 +148,8 @@ class TonikAPI:
147
148
  return freq, dates, spec
148
149
 
149
150
  async def inventory(self, group: str, subdir: SubdirType = None, tree: bool = True) -> InventoryReturnType:
150
- sg = Storage(group, rootdir=self.rootdir, create=False)
151
+ sg = Storage(group, rootdir=self.rootdir,
152
+ create=False, backend=self.backend)
151
153
  try:
152
154
  c = sg.get_substore(*subdir)
153
155
  except TypeError:
@@ -168,7 +170,8 @@ class TonikAPI:
168
170
  _st = self.preprocess_datetime(starttime)
169
171
  _et = self.preprocess_datetime(endtime)
170
172
  sg = Storage(group, rootdir=self.rootdir,
171
- starttime=_st, endtime=_et, create=False)
173
+ starttime=_st, endtime=_et, create=False,
174
+ backend=self.backend)
172
175
  try:
173
176
  c = sg.get_substore(*subdir)
174
177
  except TypeError:
@@ -183,10 +186,11 @@ class TonikAPI:
183
186
  def main(argv=None):
184
187
  parser = ArgumentParser()
185
188
  parser.add_argument("--rootdir", default='/tmp')
189
+ parser.add_argument("--backend", default='netcdf')
186
190
  parser.add_argument("-p", "--port", default=8003, type=int)
187
191
  parser.add_argument("--host", default='0.0.0.0')
188
192
  args = parser.parse_args(argv)
189
- ta = TonikAPI(args.rootdir)
193
+ ta = TonikAPI(args.rootdir, backend=args.backend)
190
194
  uvicorn.run(ta.app, host=args.host, port=args.port)
191
195
 
192
196
 
@@ -0,0 +1,152 @@
1
+ """
2
+ Post labels as Grafana annotations and retrieve them via the Grafana API.
3
+ Labels are assumed to be in the following format:
4
+
5
+ [
6
+ {"time": "2023-01-01T00:00:00Z",
7
+ "title": "Label 1",
8
+ "description": "Some description",
9
+ "tags": ["tag1", "tag2"],
10
+ "id": 12345
11
+ },
12
+ {"time": "2023-02-01T00:00:00Z",
13
+ "timeEnd": "2023-02-01T01:00:00Z",
14
+ "title": "Label 2", description:
15
+ "description": "Another description",
16
+ "tags": ["tag1", "tag2"],
17
+ "id": 12346
18
+ },
19
+ ...
20
+ ]
21
+
22
+ Note: The `timeEnd` field is optional. If it is provided, the grafana annotation will
23
+ be a range annotation, otherwise it will be a point annotation. The label id is used to
24
+ check if an annotation was already added to Grafana. If the label id is not present in the
25
+ annotations, a new annotation will be created. If the label id is present, the annotation
26
+ will be skipped to avoid duplicates.
27
+ """
28
+
29
+ import numpy as np
30
+ import requests
31
+ import uuid
32
+ from typing import List, Dict, Any, Optional
33
+
34
+
35
+ def get_annotations(baseurl: str, tags: Optional[List[str]] = None) -> Dict[str, Dict[str, Any]]:
36
+ """
37
+ Get existing annotations from the Grafana API.
38
+
39
+ Parameters
40
+ ----------
41
+ baseurl : str
42
+ The base URL of the Grafana API. This is typically in the format
43
+ "http://<grafana-user>:<user-pwd>@<grafana-host>:<port>".
44
+ tags : list of str, optional
45
+ A list of tags to filter the annotations. If None, all annotations are retrieved.
46
+ Returns
47
+ -------
48
+ annotations : dict
49
+ A dictionary containing the annotations, where the keys are the label IDs
50
+ and the values are dictionaries with the following keys:
51
+ - 'text': The text of the annotation.
52
+ - 'time': The start time of the annotation in milliseconds since epoch.
53
+ - 'timeEnd': (optional) The end time of the annotation in milliseconds since epoch.
54
+ - 'tags': A list of tags associated with the annotation.
55
+ - 'id': The ID of the annotation.
56
+ """
57
+ url = baseurl + "/api/annotations"
58
+ params = {}
59
+ if tags is not None:
60
+ params = {"tags": tags}
61
+ rval = requests.get(url, params=params)
62
+ if rval.status_code != 200:
63
+ raise RuntimeError(
64
+ f"Failed to retrieve annotations: {rval.status_code} {rval.text}")
65
+ annotations = {}
66
+ for atn in rval.json():
67
+ # Extract label ID from text using regex
68
+ try:
69
+ label_id = atn["text"].split("Id: ")[-1].strip()
70
+ except IndexError:
71
+ label_id = str(uuid.uuid4())
72
+ annotations[label_id] = {"text": atn["text"],
73
+ "time": atn["time"],
74
+ "timeEnd": atn.get("timeEnd", None),
75
+ "tags": atn.get("tags", []),
76
+ "id": atn["id"]}
77
+ return annotations
78
+
79
+
80
+ def post_annotations(baseurl: str, labels: List[Dict[str, Any]]) -> None:
81
+ """
82
+ Post annotations from a list of labels if they do not already exist.
83
+
84
+ Parameters
85
+ ----------
86
+ baseurl : str
87
+ The base URL of the Grafana API. This is typically in the format
88
+ "http://<grafana-user>:<user-pwd>@<grafana-host>:<port>".
89
+ labels : list of dict
90
+ A list of dictionaries containing label information. Each dictionary
91
+ should have the following keys:
92
+ - 'time': The start time of the annotation in ISO format.
93
+ - 'timeEnd': (optional) The end time of the annotation in ISO format.
94
+ - 'title': The title of the annotation.
95
+ - 'description': The description of the annotation.
96
+ - 'tags': A list of tags associated with the annotation.
97
+ - 'id': A unique identifier for the label, used to check for duplicates.
98
+ """
99
+ url = baseurl + "/api/annotations"
100
+ header = {"Content-type": "application/json"}
101
+ for label in labels:
102
+ existing_labels = get_annotations(baseurl, tags=label['tags'])
103
+ if str(label['id']) in existing_labels:
104
+ print(
105
+ f"Label with ID {label['id']} already exists. Skipping post.")
106
+ continue
107
+ starttime = np.datetime64(label['time']).astype(
108
+ 'datetime64[ms]').astype(int)
109
+ try:
110
+ endtime = np.datetime64(label['timeEnd']).astype(
111
+ 'datetime64[ms]').astype(int)
112
+ endtime = int(endtime)
113
+ except KeyError:
114
+ endtime = None
115
+ text = f"{label['title']}\n{label['description']}\nId: {label['id']}"
116
+ new_annotation = {
117
+ "time": int(starttime),
118
+ "timeEnd": endtime,
119
+ "text": text,
120
+ "tags": label['tags']
121
+ }
122
+ rval = requests.post(url, headers=header, json=new_annotation)
123
+ if rval.status_code != 200:
124
+ raise RuntimeError(
125
+ f"Failed to post annotation: {rval.status_code} {rval.text}")
126
+ else:
127
+ print("Annotation posted successfully.")
128
+
129
+
130
+ def main(argv=None):
131
+ import argparse
132
+ import json
133
+ parser = argparse.ArgumentParser(
134
+ description=__doc__)
135
+ parser.add_argument('url', type=str,
136
+ help='Grafana API URL for annotations.')
137
+ parser.add_argument('--labels', type=str, default=None,
138
+ help='Path to JSON file containing labels to post.')
139
+ parser.add_argument('--get-annotations', action='store_true',
140
+ help='Get existing annotations from Grafana API.')
141
+ args = parser.parse_args(argv)
142
+ if args.labels is not None:
143
+ with open(args.labels, 'r') as f:
144
+ labels = json.load(f)
145
+ post_annotations(args.url, labels)
146
+ elif args.get_annotations:
147
+ annotations = get_annotations(args.url)
148
+ print(json.dumps(annotations, indent=2))
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()
@@ -0,0 +1,162 @@
1
+ [
2
+ {
3
+ "time": "2012-08-04T00:00:00",
4
+ "title": "Eruption",
5
+ "description": "Explosive eruption",
6
+ "tags": ["volcano", "eruption"],
7
+ "id": 1
8
+
9
+ },
10
+ {
11
+ "time": "2012-11-22T00:00:00",
12
+ "timeEnd": "2012-12-10T00:00:00",
13
+ "title": "Dome extrusion",
14
+ "description": "Dome extrusion",
15
+ "tags": ["volcano", "eruption"],
16
+ "id": 2
17
+
18
+ },
19
+ {
20
+ "time": "2013-01-15T00:00:00",
21
+ "timeEnd": "2013-04-10T00:00:00",
22
+ "title": "Geysering",
23
+ "description": "Geysering",
24
+ "tags": ["volcano", "eruption"],
25
+ "id": 3
26
+
27
+ },
28
+ {
29
+ "time": "2013-08-19T00:00:00",
30
+ "title": "Steam and mud eruption",
31
+ "description": "Minor steam and mud eruption",
32
+ "tags": ["volcano", "eruption"],
33
+ "id": 4
34
+ },
35
+ {
36
+ "time": "2013-10-04T00:00:00",
37
+ "title": "Steam and mud eruption",
38
+ "description": "Minor steam and mud eruption",
39
+ "tags": ["volcano", "eruption"],
40
+ "id": 5
41
+ },
42
+
43
+ {
44
+ "time": "2013-10-08T00:00:00",
45
+ "title": "Steam and mud eruption",
46
+ "description": "Minor steam and mud eruption",
47
+ "tags": ["volcano", "eruption"],
48
+ "id": 6
49
+ },
50
+ {
51
+ "time": "2013-10-11T00:00:00",
52
+ "title": "Eruption",
53
+ "description": "Explosive eruption",
54
+ "tags": ["volcano", "eruption"],
55
+ "id": 7
56
+
57
+ },
58
+ {
59
+ "time": "2015-10-13T00:00:00",
60
+ "timeEnd": "2015-10-20T00:00:00",
61
+ "title": "Banded tremor",
62
+ "description": "Banded tremor",
63
+ "tags": ["volcano", "tremor"],
64
+ "id": 8
65
+
66
+ },
67
+ {
68
+ "time": "2016-04-27T00:00:00",
69
+ "title": "Eruption",
70
+ "description": "Explosive eruption",
71
+ "tags": ["volcano", "eruption"],
72
+ "id": 9
73
+
74
+ },
75
+ {
76
+ "time": "2016-09-13T00:00:00",
77
+ "timeEnd": "2016-09-18T00:00:00",
78
+ "title": "Ashing",
79
+ "description": "Non-explosive ash venting",
80
+ "tags": ["volcano", "ash"],
81
+ "id": 10
82
+
83
+ },
84
+ {
85
+ "time": "2019-04-23T00:00:00",
86
+ "timeEnd": "2019-07-01T00:00:00",
87
+ "title": "Earthquake swarm",
88
+ "description": "Earthquake swarm",
89
+ "tags": ["volcano", "swarm"],
90
+ "id": 11
91
+
92
+ },
93
+ {
94
+ "time": "2019-12-09T00:00:00",
95
+ "title": "Eruption",
96
+ "description": "Explosive eruption",
97
+ "tags": ["volcano", "eruption"],
98
+ "id": 12
99
+
100
+ },
101
+ {
102
+ "time": "2019-12-23T00:00:00",
103
+ "timeEnd": "2019-12-29T00:00:00",
104
+ "title": "Ashing",
105
+ "description": "Minor ash emissions",
106
+ "tags": ["volcano", "ash"],
107
+ "id": 13
108
+
109
+ },
110
+ {
111
+ "time": "2020-01-10T00:00:00",
112
+ "timeEnd": "2020-01-20T00:00:00",
113
+ "title": "Dome extrusion",
114
+ "description": "Dome extrusion",
115
+ "tags": ["volcano", "eruption"],
116
+ "id": 14
117
+
118
+ },
119
+ {
120
+ "time": "2020-11-13T00:00:00",
121
+ "timeEnd": "2020-12-01T00:00:00",
122
+ "title": "Ashing",
123
+ "description": "Minor ash emissions",
124
+ "tags": ["volcano", "ash"],
125
+ "id": 15
126
+
127
+ },
128
+ {
129
+ "time": "2020-12-29T00:00:00",
130
+ "title": "Eruption",
131
+ "description": "Small steam explosions",
132
+ "tags": ["volcano", "eruption"],
133
+ "id": 16
134
+
135
+ },
136
+ {
137
+ "time": "2022-09-18T00:00:00",
138
+ "timeEnd": "2022-09-24T00:00:00",
139
+ "title": "Ashing",
140
+ "description": "Minor ash emissions",
141
+ "tags": ["volcano", "ash"],
142
+ "id": 17
143
+
144
+ },
145
+ {
146
+ "time": "2024-05-24T00:00:00",
147
+ "title": "Eruption",
148
+ "description": "Small steam explosions",
149
+ "tags": ["volcano", "eruption"],
150
+ "id": 18
151
+
152
+ },
153
+ {
154
+ "time": "2024-07-24T00:00:00",
155
+ "timeEnd": "2024-09-10T00:00:00",
156
+ "title": "Ashing",
157
+ "description": "Minor ash emissions",
158
+ "tags": ["volcano", "ash"],
159
+ "id": 19
160
+
161
+ }
162
+ ]
@@ -59,8 +59,55 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
59
59
  return xds
60
60
 
61
61
 
62
+ def get_dt(times):
63
+ """
64
+ Infer the sampling of the time dimension.
65
+ """
66
+ pd_times = pd.to_datetime(times)
67
+ dt = pd.infer_freq(pd_times)
68
+ if dt is None:
69
+ dt = pd_times.diff().median()
70
+ try:
71
+ dt = pd.Timedelta(dt)
72
+ except ValueError:
73
+ dt = pd.Timedelta(f"1{dt}")
74
+ return dt
75
+
76
+
77
+ def fill_time_gaps(xds: xr.Dataset, timedim: str = 'datetime') -> xr.Dataset:
78
+ """
79
+ Fill gaps in time series with NaN values by reindexing to a complete datetime range.
80
+
81
+ Parameters
82
+ ----------
83
+ xds : xr.Dataset
84
+ Input dataset with potential time gaps
85
+ freq : str, optional
86
+ Frequency string (e.g., 'H', 'D', '15min'). If None, will try to infer.
87
+ timedim : str
88
+ Name of the time dimension, by default 'datetime'
89
+
90
+ Returns
91
+ -------
92
+ xr.Dataset
93
+ Dataset with gaps filled with NaN
94
+ """
95
+ if timedim not in xds.coords:
96
+ raise ValueError(
97
+ f"{timedim} coordinate not found in dataset coordinates.")
98
+
99
+ # Infer sample interval
100
+ dt = get_dt(xds.coords[timedim])
101
+ start_time = xds[timedim].values[0]
102
+ end_time = xds[timedim].values[-1]
103
+ complete_time = pd.date_range(start=start_time, end=end_time, freq=dt)
104
+
105
+ # Reindex to fill gaps with NaN
106
+ return xds.reindex({timedim: complete_time})
107
+
108
+
62
109
  def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
63
- resolution: float = None) -> xr.DataArray:
110
+ timedim: str = 'datetime', resolution: float = None) -> xr.DataArray:
64
111
  """
65
112
  Merge two xarray datasets with the same datetime index.
66
113
 
@@ -79,16 +126,17 @@ def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
79
126
  Merged array.
80
127
  """
81
128
  xda_old = xds_old.drop_duplicates(
82
- 'datetime', keep='last')
129
+ timedim, keep='last')
83
130
  xda_new = xds_new.drop_duplicates(
84
- 'datetime', keep='last')
131
+ timedim, keep='last')
85
132
  xda_new = xda_new.combine_first(xda_old)
86
133
  if resolution is not None:
87
134
  new_dates = pd.date_range(
88
- xda_new.datetime.values[0],
89
- xda_new.datetime.values[-1],
135
+ xda_new[timedim].values[0],
136
+ xda_new[timedim].values[-1],
90
137
  freq=f'{resolution}h')
91
- xda_new = xda_new.reindex(datetime=new_dates)
138
+ xda_new = xda_new.reindex(dict(timedim=new_dates))
139
+ xda_new = fill_time_gaps(xda_new, timedim=timedim)
92
140
  return xda_new
93
141
 
94
142