tonik 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tonik/grafana_annotations.py +152 -0
- tonik/package_data/whakaari_labels.json +162 -0
- tonik/utils.py +54 -6
- tonik/xarray2zarr.py +201 -32
- {tonik-0.1.15.dist-info → tonik-0.1.16.dist-info}/METADATA +5 -4
- tonik-0.1.16.dist-info/RECORD +14 -0
- {tonik-0.1.15.dist-info → tonik-0.1.16.dist-info}/WHEEL +1 -1
- {tonik-0.1.15.dist-info → tonik-0.1.16.dist-info}/entry_points.txt +1 -0
- tonik-0.1.15.dist-info/RECORD +0 -12
- {tonik-0.1.15.dist-info → tonik-0.1.16.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Post labels as Grafana annotations and retrieve them via the Grafana API.
|
|
3
|
+
Labels are assumed to be in the following format:
|
|
4
|
+
|
|
5
|
+
[
|
|
6
|
+
{"time": "2023-01-01T00:00:00Z",
|
|
7
|
+
"title": "Label 1",
|
|
8
|
+
"description": "Some description",
|
|
9
|
+
"tags": ["tag1", "tag2"],
|
|
10
|
+
"id": 12345
|
|
11
|
+
},
|
|
12
|
+
{"time": "2023-02-01T00:00:00Z",
|
|
13
|
+
"timeEnd": "2023-02-01T01:00:00Z",
|
|
14
|
+
"title": "Label 2", description:
|
|
15
|
+
"description": "Another description",
|
|
16
|
+
"tags": ["tag1", "tag2"],
|
|
17
|
+
"id": 12346
|
|
18
|
+
},
|
|
19
|
+
...
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
Note: The `timeEnd` field is optional. If it is provided, the grafana annotation will
|
|
23
|
+
be a range annotation, otherwise it will be a point annotation. The label id is used to
|
|
24
|
+
check if an annotation was already added to Grafana. If the label id is not present in the
|
|
25
|
+
annotations, a new annotation will be created. If the label id is present, the annotation
|
|
26
|
+
will be skipped to avoid duplicates.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import numpy as np
|
|
30
|
+
import requests
|
|
31
|
+
import uuid
|
|
32
|
+
from typing import List, Dict, Any, Optional
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_annotations(baseurl: str, tags: Optional[List[str]] = None) -> Dict[str, Dict[str, Any]]:
|
|
36
|
+
"""
|
|
37
|
+
Get existing annotations from the Grafana API.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
baseurl : str
|
|
42
|
+
The base URL of the Grafana API. This is typically in the format
|
|
43
|
+
"http://<grafana-user>:<user-pwd>@<grafana-host>:<port>".
|
|
44
|
+
tags : list of str, optional
|
|
45
|
+
A list of tags to filter the annotations. If None, all annotations are retrieved.
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
annotations : dict
|
|
49
|
+
A dictionary containing the annotations, where the keys are the label IDs
|
|
50
|
+
and the values are dictionaries with the following keys:
|
|
51
|
+
- 'text': The text of the annotation.
|
|
52
|
+
- 'time': The start time of the annotation in milliseconds since epoch.
|
|
53
|
+
- 'timeEnd': (optional) The end time of the annotation in milliseconds since epoch.
|
|
54
|
+
- 'tags': A list of tags associated with the annotation.
|
|
55
|
+
- 'id': The ID of the annotation.
|
|
56
|
+
"""
|
|
57
|
+
url = baseurl + "/api/annotations"
|
|
58
|
+
params = {}
|
|
59
|
+
if tags is not None:
|
|
60
|
+
params = {"tags": tags}
|
|
61
|
+
rval = requests.get(url, params=params)
|
|
62
|
+
if rval.status_code != 200:
|
|
63
|
+
raise RuntimeError(
|
|
64
|
+
f"Failed to retrieve annotations: {rval.status_code} {rval.text}")
|
|
65
|
+
annotations = {}
|
|
66
|
+
for atn in rval.json():
|
|
67
|
+
# Extract label ID from text using regex
|
|
68
|
+
try:
|
|
69
|
+
label_id = atn["text"].split("Id: ")[-1].strip()
|
|
70
|
+
except IndexError:
|
|
71
|
+
label_id = str(uuid.uuid4())
|
|
72
|
+
annotations[label_id] = {"text": atn["text"],
|
|
73
|
+
"time": atn["time"],
|
|
74
|
+
"timeEnd": atn.get("timeEnd", None),
|
|
75
|
+
"tags": atn.get("tags", []),
|
|
76
|
+
"id": atn["id"]}
|
|
77
|
+
return annotations
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def post_annotations(baseurl: str, labels: List[Dict[str, Any]]) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Post annotations from a list of labels if they do not already exist.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
baseurl : str
|
|
87
|
+
The base URL of the Grafana API. This is typically in the format
|
|
88
|
+
"http://<grafana-user>:<user-pwd>@<grafana-host>:<port>".
|
|
89
|
+
labels : list of dict
|
|
90
|
+
A list of dictionaries containing label information. Each dictionary
|
|
91
|
+
should have the following keys:
|
|
92
|
+
- 'time': The start time of the annotation in ISO format.
|
|
93
|
+
- 'timeEnd': (optional) The end time of the annotation in ISO format.
|
|
94
|
+
- 'title': The title of the annotation.
|
|
95
|
+
- 'description': The description of the annotation.
|
|
96
|
+
- 'tags': A list of tags associated with the annotation.
|
|
97
|
+
- 'id': A unique identifier for the label, used to check for duplicates.
|
|
98
|
+
"""
|
|
99
|
+
url = baseurl + "/api/annotations"
|
|
100
|
+
header = {"Content-type": "application/json"}
|
|
101
|
+
for label in labels:
|
|
102
|
+
existing_labels = get_annotations(baseurl, tags=label['tags'])
|
|
103
|
+
if str(label['id']) in existing_labels:
|
|
104
|
+
print(
|
|
105
|
+
f"Label with ID {label['id']} already exists. Skipping post.")
|
|
106
|
+
continue
|
|
107
|
+
starttime = np.datetime64(label['time']).astype(
|
|
108
|
+
'datetime64[ms]').astype(int)
|
|
109
|
+
try:
|
|
110
|
+
endtime = np.datetime64(label['timeEnd']).astype(
|
|
111
|
+
'datetime64[ms]').astype(int)
|
|
112
|
+
endtime = int(endtime)
|
|
113
|
+
except KeyError:
|
|
114
|
+
endtime = None
|
|
115
|
+
text = f"{label['title']}\n{label['description']}\nId: {label['id']}"
|
|
116
|
+
new_annotation = {
|
|
117
|
+
"time": int(starttime),
|
|
118
|
+
"timeEnd": endtime,
|
|
119
|
+
"text": text,
|
|
120
|
+
"tags": label['tags']
|
|
121
|
+
}
|
|
122
|
+
rval = requests.post(url, headers=header, json=new_annotation)
|
|
123
|
+
if rval.status_code != 200:
|
|
124
|
+
raise RuntimeError(
|
|
125
|
+
f"Failed to post annotation: {rval.status_code} {rval.text}")
|
|
126
|
+
else:
|
|
127
|
+
print("Annotation posted successfully.")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def main(argv=None):
|
|
131
|
+
import argparse
|
|
132
|
+
import json
|
|
133
|
+
parser = argparse.ArgumentParser(
|
|
134
|
+
description=__doc__)
|
|
135
|
+
parser.add_argument('url', type=str,
|
|
136
|
+
help='Grafana API URL for annotations.')
|
|
137
|
+
parser.add_argument('--labels', type=str, default=None,
|
|
138
|
+
help='Path to JSON file containing labels to post.')
|
|
139
|
+
parser.add_argument('--get-annotations', action='store_true',
|
|
140
|
+
help='Get existing annotations from Grafana API.')
|
|
141
|
+
args = parser.parse_args(argv)
|
|
142
|
+
if args.labels is not None:
|
|
143
|
+
with open(args.labels, 'r') as f:
|
|
144
|
+
labels = json.load(f)
|
|
145
|
+
post_annotations(args.url, labels)
|
|
146
|
+
elif args.get_annotations:
|
|
147
|
+
annotations = get_annotations(args.url)
|
|
148
|
+
print(json.dumps(annotations, indent=2))
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if __name__ == "__main__":
|
|
152
|
+
main()
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"time": "2012-08-04T00:00:00",
|
|
4
|
+
"title": "Eruption",
|
|
5
|
+
"description": "Explosive eruption",
|
|
6
|
+
"tags": ["volcano", "eruption"],
|
|
7
|
+
"id": 1
|
|
8
|
+
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"time": "2012-11-22T00:00:00",
|
|
12
|
+
"timeEnd": "2012-12-10T00:00:00",
|
|
13
|
+
"title": "Dome extrusion",
|
|
14
|
+
"description": "Dome extrusion",
|
|
15
|
+
"tags": ["volcano", "eruption"],
|
|
16
|
+
"id": 2
|
|
17
|
+
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"time": "2013-01-15T00:00:00",
|
|
21
|
+
"timeEnd": "2013-04-10T00:00:00",
|
|
22
|
+
"title": "Geysering",
|
|
23
|
+
"description": "Geysering",
|
|
24
|
+
"tags": ["volcano", "eruption"],
|
|
25
|
+
"id": 3
|
|
26
|
+
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"time": "2013-08-19T00:00:00",
|
|
30
|
+
"title": "Steam and mud eruption",
|
|
31
|
+
"description": "Minor steam and mud eruption",
|
|
32
|
+
"tags": ["volcano", "eruption"],
|
|
33
|
+
"id": 4
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"time": "2013-10-04T00:00:00",
|
|
37
|
+
"title": "Steam and mud eruption",
|
|
38
|
+
"description": "Minor steam and mud eruption",
|
|
39
|
+
"tags": ["volcano", "eruption"],
|
|
40
|
+
"id": 5
|
|
41
|
+
},
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
"time": "2013-10-08T00:00:00",
|
|
45
|
+
"title": "Steam and mud eruption",
|
|
46
|
+
"description": "Minor steam and mud eruption",
|
|
47
|
+
"tags": ["volcano", "eruption"],
|
|
48
|
+
"id": 6
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"time": "2013-10-11T00:00:00",
|
|
52
|
+
"title": "Eruption",
|
|
53
|
+
"description": "Explosive eruption",
|
|
54
|
+
"tags": ["volcano", "eruption"],
|
|
55
|
+
"id": 7
|
|
56
|
+
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"time": "2015-10-13T00:00:00",
|
|
60
|
+
"timeEnd": "2015-10-20T00:00:00",
|
|
61
|
+
"title": "Banded tremor",
|
|
62
|
+
"description": "Banded tremor",
|
|
63
|
+
"tags": ["volcano", "tremor"],
|
|
64
|
+
"id": 8
|
|
65
|
+
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"time": "2016-04-27T00:00:00",
|
|
69
|
+
"title": "Eruption",
|
|
70
|
+
"description": "Explosive eruption",
|
|
71
|
+
"tags": ["volcano", "eruption"],
|
|
72
|
+
"id": 9
|
|
73
|
+
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"time": "2016-09-13T00:00:00",
|
|
77
|
+
"timeEnd": "2016-09-18T00:00:00",
|
|
78
|
+
"title": "Ashing",
|
|
79
|
+
"description": "Non-explosive ash venting",
|
|
80
|
+
"tags": ["volcano", "ash"],
|
|
81
|
+
"id": 10
|
|
82
|
+
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"time": "2019-04-23T00:00:00",
|
|
86
|
+
"timeEnd": "2019-07-01T00:00:00",
|
|
87
|
+
"title": "Earthquake swarm",
|
|
88
|
+
"description": "Earthquake swarm",
|
|
89
|
+
"tags": ["volcano", "swarm"],
|
|
90
|
+
"id": 11
|
|
91
|
+
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"time": "2019-12-09T00:00:00",
|
|
95
|
+
"title": "Eruption",
|
|
96
|
+
"description": "Explosive eruption",
|
|
97
|
+
"tags": ["volcano", "eruption"],
|
|
98
|
+
"id": 12
|
|
99
|
+
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"time": "2019-12-23T00:00:00",
|
|
103
|
+
"timeEnd": "2019-12-29T00:00:00",
|
|
104
|
+
"title": "Ashing",
|
|
105
|
+
"description": "Minor ash emissions",
|
|
106
|
+
"tags": ["volcano", "ash"],
|
|
107
|
+
"id": 13
|
|
108
|
+
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"time": "2020-01-10T00:00:00",
|
|
112
|
+
"timeEnd": "2020-01-20T00:00:00",
|
|
113
|
+
"title": "Dome extrusion",
|
|
114
|
+
"description": "Dome extrusion",
|
|
115
|
+
"tags": ["volcano", "eruption"],
|
|
116
|
+
"id": 14
|
|
117
|
+
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"time": "2020-11-13T00:00:00",
|
|
121
|
+
"timeEnd": "2020-12-01T00:00:00",
|
|
122
|
+
"title": "Ashing",
|
|
123
|
+
"description": "Minor ash emissions",
|
|
124
|
+
"tags": ["volcano", "ash"],
|
|
125
|
+
"id": 15
|
|
126
|
+
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"time": "2020-12-29T00:00:00",
|
|
130
|
+
"title": "Eruption",
|
|
131
|
+
"description": "Small steam explosions",
|
|
132
|
+
"tags": ["volcano", "eruption"],
|
|
133
|
+
"id": 16
|
|
134
|
+
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"time": "2022-09-18T00:00:00",
|
|
138
|
+
"timeEnd": "2022-09-24T00:00:00",
|
|
139
|
+
"title": "Ashing",
|
|
140
|
+
"description": "Minor ash emissions",
|
|
141
|
+
"tags": ["volcano", "ash"],
|
|
142
|
+
"id": 17
|
|
143
|
+
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"time": "2024-05-24T00:00:00",
|
|
147
|
+
"title": "Eruption",
|
|
148
|
+
"description": "Small steam explosions",
|
|
149
|
+
"tags": ["volcano", "eruption"],
|
|
150
|
+
"id": 18
|
|
151
|
+
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"time": "2024-07-24T00:00:00",
|
|
155
|
+
"timeEnd": "2024-09-10T00:00:00",
|
|
156
|
+
"title": "Ashing",
|
|
157
|
+
"description": "Minor ash emissions",
|
|
158
|
+
"tags": ["volcano", "ash"],
|
|
159
|
+
"id": 19
|
|
160
|
+
|
|
161
|
+
}
|
|
162
|
+
]
|
tonik/utils.py
CHANGED
|
@@ -59,8 +59,55 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
59
59
|
return xds
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
def get_dt(times):
|
|
63
|
+
"""
|
|
64
|
+
Infer the sampling of the time dimension.
|
|
65
|
+
"""
|
|
66
|
+
pd_times = pd.to_datetime(times)
|
|
67
|
+
dt = pd.infer_freq(pd_times)
|
|
68
|
+
if dt is None:
|
|
69
|
+
dt = pd_times.diff().median()
|
|
70
|
+
try:
|
|
71
|
+
dt = pd.Timedelta(dt)
|
|
72
|
+
except ValueError:
|
|
73
|
+
dt = pd.Timedelta(f"1{dt}")
|
|
74
|
+
return dt
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def fill_time_gaps(xds: xr.Dataset, timedim: str = 'datetime') -> xr.Dataset:
|
|
78
|
+
"""
|
|
79
|
+
Fill gaps in time series with NaN values by reindexing to a complete datetime range.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
xds : xr.Dataset
|
|
84
|
+
Input dataset with potential time gaps
|
|
85
|
+
freq : str, optional
|
|
86
|
+
Frequency string (e.g., 'H', 'D', '15min'). If None, will try to infer.
|
|
87
|
+
timedim : str
|
|
88
|
+
Name of the time dimension, by default 'datetime'
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
xr.Dataset
|
|
93
|
+
Dataset with gaps filled with NaN
|
|
94
|
+
"""
|
|
95
|
+
if timedim not in xds.coords:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"{timedim} coordinate not found in dataset coordinates.")
|
|
98
|
+
|
|
99
|
+
# Infer sample interval
|
|
100
|
+
dt = get_dt(xds.coords[timedim])
|
|
101
|
+
start_time = xds[timedim].values[0]
|
|
102
|
+
end_time = xds[timedim].values[-1]
|
|
103
|
+
complete_time = pd.date_range(start=start_time, end=end_time, freq=dt)
|
|
104
|
+
|
|
105
|
+
# Reindex to fill gaps with NaN
|
|
106
|
+
return xds.reindex({timedim: complete_time})
|
|
107
|
+
|
|
108
|
+
|
|
62
109
|
def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
|
|
63
|
-
resolution: float = None) -> xr.DataArray:
|
|
110
|
+
timedim: str = 'datetime', resolution: float = None) -> xr.DataArray:
|
|
64
111
|
"""
|
|
65
112
|
Merge two xarray datasets with the same datetime index.
|
|
66
113
|
|
|
@@ -79,16 +126,17 @@ def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
|
|
|
79
126
|
Merged array.
|
|
80
127
|
"""
|
|
81
128
|
xda_old = xds_old.drop_duplicates(
|
|
82
|
-
|
|
129
|
+
timedim, keep='last')
|
|
83
130
|
xda_new = xds_new.drop_duplicates(
|
|
84
|
-
|
|
131
|
+
timedim, keep='last')
|
|
85
132
|
xda_new = xda_new.combine_first(xda_old)
|
|
86
133
|
if resolution is not None:
|
|
87
134
|
new_dates = pd.date_range(
|
|
88
|
-
xda_new.
|
|
89
|
-
xda_new.
|
|
135
|
+
xda_new[timedim].values[0],
|
|
136
|
+
xda_new[timedim].values[-1],
|
|
90
137
|
freq=f'{resolution}h')
|
|
91
|
-
xda_new = xda_new.reindex(
|
|
138
|
+
xda_new = xda_new.reindex(dict(timedim=new_dates))
|
|
139
|
+
xda_new = fill_time_gaps(xda_new, timedim=timedim)
|
|
92
140
|
return xda_new
|
|
93
141
|
|
|
94
142
|
|
tonik/xarray2zarr.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
4
6
|
import xarray as xr
|
|
5
7
|
try:
|
|
6
8
|
from zarr.errors import PathNotFoundError
|
|
@@ -8,12 +10,142 @@ except ImportError:
|
|
|
8
10
|
class PathNotFoundError(Exception):
|
|
9
11
|
pass
|
|
10
12
|
|
|
11
|
-
from .utils import merge_arrays
|
|
13
|
+
from .utils import merge_arrays, fill_time_gaps, get_dt
|
|
12
14
|
|
|
13
15
|
logger = logging.getLogger(__name__)
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
def
|
|
18
|
+
def get_chunks(xda: xr.DataArray, chunks: int = 1,
|
|
19
|
+
timedim: str = 'datetime') -> dict:
|
|
20
|
+
"""
|
|
21
|
+
Determine the chunk size for the datetime dimension. Other dimensions are assumed to be
|
|
22
|
+
small enough to not require chunking.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
coords : xr.core.coordinates.DatasetCoordinates
|
|
27
|
+
Coordinates of the dataset.
|
|
28
|
+
chunks : int, optional
|
|
29
|
+
Number of chunks in days to divide the datetime dimension into, by default 1.
|
|
30
|
+
"""
|
|
31
|
+
if timedim not in xda.coords:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"Datetime coordinate {timedim} not found in dataset coordinates.")
|
|
34
|
+
dt = get_dt(xda.coords[timedim])
|
|
35
|
+
chunklength = int(pd.Timedelta('%dD' % chunks) / dt)
|
|
36
|
+
return chunklength
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def fill_time_gaps_between_datasets(xds_existing: xr.DataArray, xds_new: xr.DataArray, mode: str,
|
|
40
|
+
timedim: str = 'datetime') -> xr.DataArray:
|
|
41
|
+
"""
|
|
42
|
+
Fill gaps between existing and new datasets.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
xds_existing : xr.Dataset
|
|
47
|
+
Existing dataset on disk
|
|
48
|
+
xds_new : xr.Dataset
|
|
49
|
+
New dataset to append
|
|
50
|
+
timedim : str
|
|
51
|
+
Name of the time dimension
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
xr.Dataset
|
|
56
|
+
Combined dataset with gaps filled
|
|
57
|
+
"""
|
|
58
|
+
if mode not in ['a', 'p']:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
'Mode has to be either "a" for append or "p" for prepend')
|
|
61
|
+
|
|
62
|
+
# get the sample interval
|
|
63
|
+
dt = get_dt(xds_new.coords[timedim])
|
|
64
|
+
|
|
65
|
+
existing_endpoint = xds_existing[timedim].values
|
|
66
|
+
# Get time ranges
|
|
67
|
+
if mode == 'a':
|
|
68
|
+
gap_start = existing_endpoint + dt
|
|
69
|
+
gap_end = xds_new[timedim].values[0] - dt
|
|
70
|
+
elif mode == 'p':
|
|
71
|
+
gap_end = existing_endpoint - dt
|
|
72
|
+
gap_start = xds_new[timedim].values[-1] + dt
|
|
73
|
+
|
|
74
|
+
if gap_start <= gap_end:
|
|
75
|
+
gap_times = pd.date_range(start=gap_start, end=gap_end, freq=dt)
|
|
76
|
+
|
|
77
|
+
# Create NaN array with same shape as variable but for gap times
|
|
78
|
+
gap_shape = (len(gap_times),) + \
|
|
79
|
+
xds_new.shape[1:] # Skip time dimension
|
|
80
|
+
gap_values = np.full(gap_shape, np.nan)
|
|
81
|
+
|
|
82
|
+
# Create coordinates for gap dataset
|
|
83
|
+
gap_coords = {timedim: gap_times}
|
|
84
|
+
for coord_name, coord in xds_new.coords.items():
|
|
85
|
+
if coord_name != timedim:
|
|
86
|
+
gap_coords[coord_name] = coord
|
|
87
|
+
|
|
88
|
+
gap_data = xr.DataArray(
|
|
89
|
+
gap_values,
|
|
90
|
+
coords=gap_coords,
|
|
91
|
+
dims=xds_new.dims,
|
|
92
|
+
name=xds_new.name
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Combine: existing + gap + new
|
|
96
|
+
if mode == 'a':
|
|
97
|
+
combined = xr.concat([gap_data, xds_new], dim=timedim)
|
|
98
|
+
elif mode == 'p':
|
|
99
|
+
combined = xr.concat([xds_new, gap_data], dim=timedim)
|
|
100
|
+
return combined
|
|
101
|
+
else:
|
|
102
|
+
return xds_new
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _build_append_payload_full_chunks(payload: xr.DataArray, mode: str,
|
|
106
|
+
chunklen: int, timedim: str = "datetime") -> xr.DataArray:
|
|
107
|
+
"""
|
|
108
|
+
Construct the sequence to append so that the final total length is a multiple of `chunklen`
|
|
109
|
+
"""
|
|
110
|
+
if mode not in ['a', 'p']:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
'Mode has to be either "a" for append or "p" for prepend')
|
|
113
|
+
|
|
114
|
+
# pad the tail so that payload_len % chunklen == 0
|
|
115
|
+
pay_len = payload.sizes[timedim]
|
|
116
|
+
need = -pay_len % chunklen # 0..chunklen-1
|
|
117
|
+
|
|
118
|
+
if need > 0:
|
|
119
|
+
dt = get_dt(payload.coords[timedim])
|
|
120
|
+
if mode == 'a':
|
|
121
|
+
start = payload[timedim].values[-1] + dt
|
|
122
|
+
elif mode == 'p':
|
|
123
|
+
start = payload[timedim].values[0] - (need+1)*dt
|
|
124
|
+
pad_times = pd.date_range(start=start, periods=need, freq=dt)
|
|
125
|
+
pad_shape = []
|
|
126
|
+
for i, d in enumerate(payload.dims):
|
|
127
|
+
if d == timedim:
|
|
128
|
+
pad_shape.append(need)
|
|
129
|
+
else:
|
|
130
|
+
pad_shape.append(payload.shape[i])
|
|
131
|
+
pad_vals = np.full(pad_shape, np.nan)
|
|
132
|
+
pad_coords = {timedim: pad_times}
|
|
133
|
+
for c in payload.coords:
|
|
134
|
+
if c != timedim:
|
|
135
|
+
pad_coords[c] = payload.coords[c]
|
|
136
|
+
pad_da = xr.DataArray(pad_vals, coords=pad_coords,
|
|
137
|
+
dims=payload.dims, name=payload.name, attrs=payload.attrs)
|
|
138
|
+
if mode == 'a':
|
|
139
|
+
payload = xr.concat([payload, pad_da], dim=timedim)
|
|
140
|
+
elif mode == 'p':
|
|
141
|
+
payload = xr.concat([pad_da, payload], dim=timedim)
|
|
142
|
+
payload = payload.chunk({timedim: chunklen})
|
|
143
|
+
return payload
|
|
144
|
+
#
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
|
|
148
|
+
chunks: int = 10, timedim: str = 'datetime') -> None:
|
|
17
149
|
"""
|
|
18
150
|
Write xarray dataset to zarr files.
|
|
19
151
|
|
|
@@ -27,41 +159,78 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original'):
|
|
|
27
159
|
Write mode, by default 'a'.
|
|
28
160
|
group : str, optional
|
|
29
161
|
Group name, by default 'original'
|
|
162
|
+
chunks : int, optional
|
|
163
|
+
Chunk size as the number of days.
|
|
164
|
+
timedim : str
|
|
165
|
+
Name of the time dimension, by default 'datetime'
|
|
166
|
+
fill_gaps : bool, optional
|
|
167
|
+
Whether to fill time gaps with NaN before writing, by default False
|
|
30
168
|
|
|
31
169
|
Returns
|
|
32
170
|
-------
|
|
33
171
|
None
|
|
34
172
|
"""
|
|
173
|
+
|
|
174
|
+
if timedim not in xds.dims:
|
|
175
|
+
raise ValueError(f"{timedim} dimension not found in Dataset.")
|
|
176
|
+
|
|
177
|
+
# Fill gaps
|
|
178
|
+
xds = xds.drop_duplicates(timedim, keep='last')
|
|
179
|
+
xds = fill_time_gaps(xds, timedim=timedim)
|
|
180
|
+
|
|
35
181
|
for feature in xds.data_vars.keys():
|
|
36
182
|
fout = os.path.join(path, feature + '.zarr')
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
183
|
+
# nchunks = get_chunks(xds[feature], chunks)
|
|
184
|
+
nchunks = chunks
|
|
185
|
+
try:
|
|
186
|
+
xds_existing = xr.open_zarr(fout, group=group)
|
|
187
|
+
has_store = True
|
|
188
|
+
except (PathNotFoundError, FileNotFoundError, KeyError):
|
|
189
|
+
has_store = False
|
|
190
|
+
|
|
191
|
+
if not has_store:
|
|
192
|
+
xda_new = _build_append_payload_full_chunks(
|
|
193
|
+
xds[feature], 'a', nchunks)
|
|
194
|
+
xda_new.to_zarr(fout, group=group, mode='w',
|
|
195
|
+
write_empty_chunks=True)
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
if xds_existing[timedim][0] > xds[timedim][-1]:
|
|
199
|
+
# prepend
|
|
200
|
+
xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: 0}),
|
|
201
|
+
xds[feature], mode='p')
|
|
202
|
+
xda_new = _build_append_payload_full_chunks(
|
|
203
|
+
xda_new, 'p', nchunks)
|
|
204
|
+
combined = xda_new.combine_first(xds_existing[feature]).compute()
|
|
205
|
+
combined.chunk({timedim: nchunks}).to_zarr(fout, group=group, mode='w',
|
|
206
|
+
write_empty_chunks=True)
|
|
207
|
+
elif xds_existing[timedim][-1] < xds[timedim][0]:
|
|
208
|
+
# append
|
|
209
|
+
xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: -1}),
|
|
210
|
+
xds[feature], mode='a')
|
|
211
|
+
xda_new = _build_append_payload_full_chunks(
|
|
212
|
+
xda_new, 'a', nchunks)
|
|
213
|
+
xda_new.to_zarr(fout, group=group, mode='a',
|
|
214
|
+
append_dim=timedim)
|
|
215
|
+
elif xds_existing[timedim][0] > xds[timedim][0] and xds_existing[timedim][-1] < xds[timedim][-1]:
|
|
216
|
+
# existing datetimes are contained in new array
|
|
217
|
+
xda_new = _build_append_payload_full_chunks(
|
|
218
|
+
xds[feature], 'a', nchunks)
|
|
219
|
+
xda_new.to_zarr(fout, group=group, mode='w',
|
|
220
|
+
write_empty_chunks=True)
|
|
40
221
|
else:
|
|
41
|
-
|
|
42
|
-
xds_existing
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
xds[feature].drop_sel(datetime=overlap).to_zarr(
|
|
57
|
-
fout, group=group, mode='a', append_dim="datetime")
|
|
58
|
-
else:
|
|
59
|
-
xds[feature].to_zarr(
|
|
60
|
-
fout, group=group, append_dim='datetime')
|
|
61
|
-
except Exception as e:
|
|
62
|
-
msg = f"Appending {feature} to {fout} failed: {e}\n"
|
|
63
|
-
msg += "Attempting to merge the two datasets."
|
|
64
|
-
logger.error(msg)
|
|
65
|
-
# remove duplicate datetime entries
|
|
66
|
-
xda_new = merge_arrays(xds_existing[feature], xds[feature])
|
|
67
|
-
xda_new.to_zarr(fout, group=group, mode='w')
|
|
222
|
+
overlap = xds_existing[timedim].where(
|
|
223
|
+
xds_existing[timedim] == xds[timedim])
|
|
224
|
+
xds[feature].loc[{timedim: overlap}].to_zarr(
|
|
225
|
+
fout, group=group, mode='r+', region='auto')
|
|
226
|
+
remainder = xds[feature].drop_sel({timedim: overlap})
|
|
227
|
+
if remainder.sizes[timedim] > 0:
|
|
228
|
+
mode = 'a'
|
|
229
|
+
if remainder[timedim][-1] < xds_existing[timedim][0]:
|
|
230
|
+
mode = 'p'
|
|
231
|
+
xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: 0}),
|
|
232
|
+
xds[feature], mode=mode)
|
|
233
|
+
xda_new = _build_append_payload_full_chunks(
|
|
234
|
+
xda_new, mode, nchunks)
|
|
235
|
+
xda_new.to_zarr(fout, group=group, mode='a',
|
|
236
|
+
append_dim=timedim)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.16
|
|
4
4
|
Summary: Store time series data as HDF5 files and access them through an API.
|
|
5
5
|
Project-URL: Homepage, https://tsc-tools.github.io/tonik
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
@@ -18,10 +18,11 @@ Requires-Dist: matplotlib
|
|
|
18
18
|
Requires-Dist: netcdf4>=1.6
|
|
19
19
|
Requires-Dist: pandas>=2.0
|
|
20
20
|
Requires-Dist: python-json-logger>=2.0
|
|
21
|
+
Requires-Dist: s3fs
|
|
21
22
|
Requires-Dist: uvicorn[standard]>=0.22
|
|
22
23
|
Requires-Dist: xarray[accel,io,parallel]
|
|
23
|
-
Requires-Dist: zarr<3; python_version < '3.11'
|
|
24
|
-
Requires-Dist: zarr>=3.0.3; python_version >= '3.11'
|
|
24
|
+
Requires-Dist: zarr[remote-tests]<3; python_version < '3.11'
|
|
25
|
+
Requires-Dist: zarr[remote-tests]>=3.0.3; python_version >= '3.11'
|
|
25
26
|
Provides-Extra: dev
|
|
26
27
|
Requires-Dist: httpx; extra == 'dev'
|
|
27
28
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
tonik/__init__.py,sha256=dov-nMeGFBzLspmj4rWKjC4r736vmaPDgMEkHSUfP98,523
|
|
2
|
+
tonik/api.py,sha256=XDKiz1AzYNBOwYfaRxpMgqGRDAPJEE6wWJyBxuYPRLc,7751
|
|
3
|
+
tonik/grafana_annotations.py,sha256=ZU9Cy-HT4vvMfYIQzD9WboaDVOCBDv__NmXbk1qKWJo,5838
|
|
4
|
+
tonik/storage.py,sha256=vFxIrY92cSYOYOpNXHxCAjdXgkrRytaRDpudtK0glmg,10608
|
|
5
|
+
tonik/utils.py,sha256=vRFMoCU7dbfnnm5RALBR-XrpPGDFtQoeTDzxFiYf3bo,7522
|
|
6
|
+
tonik/xarray2netcdf.py,sha256=gDNT6nxnRbXPeRqZ3URW5oXY3Nfh3TCrfueE-eUrIoY,5181
|
|
7
|
+
tonik/xarray2zarr.py,sha256=SSchDqy5oyYrIG4smV8fslsUg2UPSyyQjUA5ZlP1P4I,8630
|
|
8
|
+
tonik/package_data/index.html,sha256=ZCZ-BtGRERsL-6c_dfY43qd2WAaggH7xereennGL6ww,4372
|
|
9
|
+
tonik/package_data/whakaari_labels.json,sha256=96UZSq41yXgAJxuKivLBKlRTw-33jkjh7AGKTsDQ9Yg,3993
|
|
10
|
+
tonik-0.1.16.dist-info/METADATA,sha256=EOwmXNC5b6IJsnTLMelBZ3vL1ljkfZwhM8Hoz6iHiZQ,2191
|
|
11
|
+
tonik-0.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
+
tonik-0.1.16.dist-info/entry_points.txt,sha256=y82XyTeQddM87gCTzgSQaTlKF3VFicO4hhClHUv6j1A,127
|
|
13
|
+
tonik-0.1.16.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
14
|
+
tonik-0.1.16.dist-info/RECORD,,
|
tonik-0.1.15.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
tonik/__init__.py,sha256=dov-nMeGFBzLspmj4rWKjC4r736vmaPDgMEkHSUfP98,523
|
|
2
|
-
tonik/api.py,sha256=XDKiz1AzYNBOwYfaRxpMgqGRDAPJEE6wWJyBxuYPRLc,7751
|
|
3
|
-
tonik/storage.py,sha256=vFxIrY92cSYOYOpNXHxCAjdXgkrRytaRDpudtK0glmg,10608
|
|
4
|
-
tonik/utils.py,sha256=9eSVKIbs8TIZlJCz_-B7FrvOUQCQHO3K52v4Heus-uE,6135
|
|
5
|
-
tonik/xarray2netcdf.py,sha256=gDNT6nxnRbXPeRqZ3URW5oXY3Nfh3TCrfueE-eUrIoY,5181
|
|
6
|
-
tonik/xarray2zarr.py,sha256=RhCnS6g3yqe8mrEXhD_4PCN0EI3QPhp5X7ui_wvb_jY,2445
|
|
7
|
-
tonik/package_data/index.html,sha256=ZCZ-BtGRERsL-6c_dfY43qd2WAaggH7xereennGL6ww,4372
|
|
8
|
-
tonik-0.1.15.dist-info/METADATA,sha256=fgLopxgfF1ooNHl2DEYN3dFivpSknlfRrAL2eledYKo,2143
|
|
9
|
-
tonik-0.1.15.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
|
|
10
|
-
tonik-0.1.15.dist-info/entry_points.txt,sha256=mT3B4eBE8SHlAeMhFnZGor9-YkVtoWM1NVHVuypJ-uY,74
|
|
11
|
-
tonik-0.1.15.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
12
|
-
tonik-0.1.15.dist-info/RECORD,,
|
|
File without changes
|