tonik 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ """
2
+ Post labels as Grafana annotations and retrieve them via the Grafana API.
3
+ Labels are assumed to be in the following format:
4
+
5
+ [
6
+ {"time": "2023-01-01T00:00:00Z",
7
+ "title": "Label 1",
8
+ "description": "Some description",
9
+ "tags": ["tag1", "tag2"],
10
+ "id": 12345
11
+ },
12
+ {"time": "2023-02-01T00:00:00Z",
13
+ "timeEnd": "2023-02-01T01:00:00Z",
14
+ "title": "Label 2", description:
15
+ "description": "Another description",
16
+ "tags": ["tag1", "tag2"],
17
+ "id": 12346
18
+ },
19
+ ...
20
+ ]
21
+
22
+ Note: The `timeEnd` field is optional. If it is provided, the grafana annotation will
23
+ be a range annotation, otherwise it will be a point annotation. The label id is used to
24
+ check if an annotation was already added to Grafana. If the label id is not present in the
25
+ annotations, a new annotation will be created. If the label id is present, the annotation
26
+ will be skipped to avoid duplicates.
27
+ """
28
+
29
+ import numpy as np
30
+ import requests
31
+ import uuid
32
+ from typing import List, Dict, Any, Optional
33
+
34
+
35
+ def get_annotations(baseurl: str, tags: Optional[List[str]] = None) -> Dict[str, Dict[str, Any]]:
36
+ """
37
+ Get existing annotations from the Grafana API.
38
+
39
+ Parameters
40
+ ----------
41
+ baseurl : str
42
+ The base URL of the Grafana API. This is typically in the format
43
+ "http://<grafana-user>:<user-pwd>@<grafana-host>:<port>".
44
+ tags : list of str, optional
45
+ A list of tags to filter the annotations. If None, all annotations are retrieved.
46
+ Returns
47
+ -------
48
+ annotations : dict
49
+ A dictionary containing the annotations, where the keys are the label IDs
50
+ and the values are dictionaries with the following keys:
51
+ - 'text': The text of the annotation.
52
+ - 'time': The start time of the annotation in milliseconds since epoch.
53
+ - 'timeEnd': (optional) The end time of the annotation in milliseconds since epoch.
54
+ - 'tags': A list of tags associated with the annotation.
55
+ - 'id': The ID of the annotation.
56
+ """
57
+ url = baseurl + "/api/annotations"
58
+ params = {}
59
+ if tags is not None:
60
+ params = {"tags": tags}
61
+ rval = requests.get(url, params=params)
62
+ if rval.status_code != 200:
63
+ raise RuntimeError(
64
+ f"Failed to retrieve annotations: {rval.status_code} {rval.text}")
65
+ annotations = {}
66
+ for atn in rval.json():
67
+ # Extract label ID from text using regex
68
+ try:
69
+ label_id = atn["text"].split("Id: ")[-1].strip()
70
+ except IndexError:
71
+ label_id = str(uuid.uuid4())
72
+ annotations[label_id] = {"text": atn["text"],
73
+ "time": atn["time"],
74
+ "timeEnd": atn.get("timeEnd", None),
75
+ "tags": atn.get("tags", []),
76
+ "id": atn["id"]}
77
+ return annotations
78
+
79
+
80
+ def post_annotations(baseurl: str, labels: List[Dict[str, Any]]) -> None:
81
+ """
82
+ Post annotations from a list of labels if they do not already exist.
83
+
84
+ Parameters
85
+ ----------
86
+ baseurl : str
87
+ The base URL of the Grafana API. This is typically in the format
88
+ "http://<grafana-user>:<user-pwd>@<grafana-host>:<port>".
89
+ labels : list of dict
90
+ A list of dictionaries containing label information. Each dictionary
91
+ should have the following keys:
92
+ - 'time': The start time of the annotation in ISO format.
93
+ - 'timeEnd': (optional) The end time of the annotation in ISO format.
94
+ - 'title': The title of the annotation.
95
+ - 'description': The description of the annotation.
96
+ - 'tags': A list of tags associated with the annotation.
97
+ - 'id': A unique identifier for the label, used to check for duplicates.
98
+ """
99
+ url = baseurl + "/api/annotations"
100
+ header = {"Content-type": "application/json"}
101
+ for label in labels:
102
+ existing_labels = get_annotations(baseurl, tags=label['tags'])
103
+ if str(label['id']) in existing_labels:
104
+ print(
105
+ f"Label with ID {label['id']} already exists. Skipping post.")
106
+ continue
107
+ starttime = np.datetime64(label['time']).astype(
108
+ 'datetime64[ms]').astype(int)
109
+ try:
110
+ endtime = np.datetime64(label['timeEnd']).astype(
111
+ 'datetime64[ms]').astype(int)
112
+ endtime = int(endtime)
113
+ except KeyError:
114
+ endtime = None
115
+ text = f"{label['title']}\n{label['description']}\nId: {label['id']}"
116
+ new_annotation = {
117
+ "time": int(starttime),
118
+ "timeEnd": endtime,
119
+ "text": text,
120
+ "tags": label['tags']
121
+ }
122
+ rval = requests.post(url, headers=header, json=new_annotation)
123
+ if rval.status_code != 200:
124
+ raise RuntimeError(
125
+ f"Failed to post annotation: {rval.status_code} {rval.text}")
126
+ else:
127
+ print("Annotation posted successfully.")
128
+
129
+
130
+ def main(argv=None):
131
+ import argparse
132
+ import json
133
+ parser = argparse.ArgumentParser(
134
+ description=__doc__)
135
+ parser.add_argument('url', type=str,
136
+ help='Grafana API URL for annotations.')
137
+ parser.add_argument('--labels', type=str, default=None,
138
+ help='Path to JSON file containing labels to post.')
139
+ parser.add_argument('--get-annotations', action='store_true',
140
+ help='Get existing annotations from Grafana API.')
141
+ args = parser.parse_args(argv)
142
+ if args.labels is not None:
143
+ with open(args.labels, 'r') as f:
144
+ labels = json.load(f)
145
+ post_annotations(args.url, labels)
146
+ elif args.get_annotations:
147
+ annotations = get_annotations(args.url)
148
+ print(json.dumps(annotations, indent=2))
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()
@@ -0,0 +1,162 @@
1
+ [
2
+ {
3
+ "time": "2012-08-04T00:00:00",
4
+ "title": "Eruption",
5
+ "description": "Explosive eruption",
6
+ "tags": ["volcano", "eruption"],
7
+ "id": 1
8
+
9
+ },
10
+ {
11
+ "time": "2012-11-22T00:00:00",
12
+ "timeEnd": "2012-12-10T00:00:00",
13
+ "title": "Dome extrusion",
14
+ "description": "Dome extrusion",
15
+ "tags": ["volcano", "eruption"],
16
+ "id": 2
17
+
18
+ },
19
+ {
20
+ "time": "2013-01-15T00:00:00",
21
+ "timeEnd": "2013-04-10T00:00:00",
22
+ "title": "Geysering",
23
+ "description": "Geysering",
24
+ "tags": ["volcano", "eruption"],
25
+ "id": 3
26
+
27
+ },
28
+ {
29
+ "time": "2013-08-19T00:00:00",
30
+ "title": "Steam and mud eruption",
31
+ "description": "Minor steam and mud eruption",
32
+ "tags": ["volcano", "eruption"],
33
+ "id": 4
34
+ },
35
+ {
36
+ "time": "2013-10-04T00:00:00",
37
+ "title": "Steam and mud eruption",
38
+ "description": "Minor steam and mud eruption",
39
+ "tags": ["volcano", "eruption"],
40
+ "id": 5
41
+ },
42
+
43
+ {
44
+ "time": "2013-10-08T00:00:00",
45
+ "title": "Steam and mud eruption",
46
+ "description": "Minor steam and mud eruption",
47
+ "tags": ["volcano", "eruption"],
48
+ "id": 6
49
+ },
50
+ {
51
+ "time": "2013-10-11T00:00:00",
52
+ "title": "Eruption",
53
+ "description": "Explosive eruption",
54
+ "tags": ["volcano", "eruption"],
55
+ "id": 7
56
+
57
+ },
58
+ {
59
+ "time": "2015-10-13T00:00:00",
60
+ "timeEnd": "2015-10-20T00:00:00",
61
+ "title": "Banded tremor",
62
+ "description": "Banded tremor",
63
+ "tags": ["volcano", "tremor"],
64
+ "id": 8
65
+
66
+ },
67
+ {
68
+ "time": "2016-04-27T00:00:00",
69
+ "title": "Eruption",
70
+ "description": "Explosive eruption",
71
+ "tags": ["volcano", "eruption"],
72
+ "id": 9
73
+
74
+ },
75
+ {
76
+ "time": "2016-09-13T00:00:00",
77
+ "timeEnd": "2016-09-18T00:00:00",
78
+ "title": "Ashing",
79
+ "description": "Non-explosive ash venting",
80
+ "tags": ["volcano", "ash"],
81
+ "id": 10
82
+
83
+ },
84
+ {
85
+ "time": "2019-04-23T00:00:00",
86
+ "timeEnd": "2019-07-01T00:00:00",
87
+ "title": "Earthquake swarm",
88
+ "description": "Earthquake swarm",
89
+ "tags": ["volcano", "swarm"],
90
+ "id": 11
91
+
92
+ },
93
+ {
94
+ "time": "2019-12-09T00:00:00",
95
+ "title": "Eruption",
96
+ "description": "Explosive eruption",
97
+ "tags": ["volcano", "eruption"],
98
+ "id": 12
99
+
100
+ },
101
+ {
102
+ "time": "2019-12-23T00:00:00",
103
+ "timeEnd": "2019-12-29T00:00:00",
104
+ "title": "Ashing",
105
+ "description": "Minor ash emissions",
106
+ "tags": ["volcano", "ash"],
107
+ "id": 13
108
+
109
+ },
110
+ {
111
+ "time": "2020-01-10T00:00:00",
112
+ "timeEnd": "2020-01-20T00:00:00",
113
+ "title": "Dome extrusion",
114
+ "description": "Dome extrusion",
115
+ "tags": ["volcano", "eruption"],
116
+ "id": 14
117
+
118
+ },
119
+ {
120
+ "time": "2020-11-13T00:00:00",
121
+ "timeEnd": "2020-12-01T00:00:00",
122
+ "title": "Ashing",
123
+ "description": "Minor ash emissions",
124
+ "tags": ["volcano", "ash"],
125
+ "id": 15
126
+
127
+ },
128
+ {
129
+ "time": "2020-12-29T00:00:00",
130
+ "title": "Eruption",
131
+ "description": "Small steam explosions",
132
+ "tags": ["volcano", "eruption"],
133
+ "id": 16
134
+
135
+ },
136
+ {
137
+ "time": "2022-09-18T00:00:00",
138
+ "timeEnd": "2022-09-24T00:00:00",
139
+ "title": "Ashing",
140
+ "description": "Minor ash emissions",
141
+ "tags": ["volcano", "ash"],
142
+ "id": 17
143
+
144
+ },
145
+ {
146
+ "time": "2024-05-24T00:00:00",
147
+ "title": "Eruption",
148
+ "description": "Small steam explosions",
149
+ "tags": ["volcano", "eruption"],
150
+ "id": 18
151
+
152
+ },
153
+ {
154
+ "time": "2024-07-24T00:00:00",
155
+ "timeEnd": "2024-09-10T00:00:00",
156
+ "title": "Ashing",
157
+ "description": "Minor ash emissions",
158
+ "tags": ["volcano", "ash"],
159
+ "id": 19
160
+
161
+ }
162
+ ]
tonik/utils.py CHANGED
@@ -59,8 +59,55 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
59
59
  return xds
60
60
 
61
61
 
62
+ def get_dt(times):
63
+ """
64
+ Infer the sampling of the time dimension.
65
+ """
66
+ pd_times = pd.to_datetime(times)
67
+ dt = pd.infer_freq(pd_times)
68
+ if dt is None:
69
+ dt = pd_times.diff().median()
70
+ try:
71
+ dt = pd.Timedelta(dt)
72
+ except ValueError:
73
+ dt = pd.Timedelta(f"1{dt}")
74
+ return dt
75
+
76
+
77
+ def fill_time_gaps(xds: xr.Dataset, timedim: str = 'datetime') -> xr.Dataset:
78
+ """
79
+ Fill gaps in time series with NaN values by reindexing to a complete datetime range.
80
+
81
+ Parameters
82
+ ----------
83
+ xds : xr.Dataset
84
+ Input dataset with potential time gaps
85
+ freq : str, optional
86
+ Frequency string (e.g., 'H', 'D', '15min'). If None, will try to infer.
87
+ timedim : str
88
+ Name of the time dimension, by default 'datetime'
89
+
90
+ Returns
91
+ -------
92
+ xr.Dataset
93
+ Dataset with gaps filled with NaN
94
+ """
95
+ if timedim not in xds.coords:
96
+ raise ValueError(
97
+ f"{timedim} coordinate not found in dataset coordinates.")
98
+
99
+ # Infer sample interval
100
+ dt = get_dt(xds.coords[timedim])
101
+ start_time = xds[timedim].values[0]
102
+ end_time = xds[timedim].values[-1]
103
+ complete_time = pd.date_range(start=start_time, end=end_time, freq=dt)
104
+
105
+ # Reindex to fill gaps with NaN
106
+ return xds.reindex({timedim: complete_time})
107
+
108
+
62
109
  def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
63
- resolution: float = None) -> xr.DataArray:
110
+ timedim: str = 'datetime', resolution: float = None) -> xr.DataArray:
64
111
  """
65
112
  Merge two xarray datasets with the same datetime index.
66
113
 
@@ -79,16 +126,17 @@ def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
79
126
  Merged array.
80
127
  """
81
128
  xda_old = xds_old.drop_duplicates(
82
- 'datetime', keep='last')
129
+ timedim, keep='last')
83
130
  xda_new = xds_new.drop_duplicates(
84
- 'datetime', keep='last')
131
+ timedim, keep='last')
85
132
  xda_new = xda_new.combine_first(xda_old)
86
133
  if resolution is not None:
87
134
  new_dates = pd.date_range(
88
- xda_new.datetime.values[0],
89
- xda_new.datetime.values[-1],
135
+ xda_new[timedim].values[0],
136
+ xda_new[timedim].values[-1],
90
137
  freq=f'{resolution}h')
91
- xda_new = xda_new.reindex(datetime=new_dates)
138
+ xda_new = xda_new.reindex(dict(timedim=new_dates))
139
+ xda_new = fill_time_gaps(xda_new, timedim=timedim)
92
140
  return xda_new
93
141
 
94
142
 
tonik/xarray2zarr.py CHANGED
@@ -1,6 +1,8 @@
1
1
  import logging
2
2
  import os
3
3
 
4
+ import numpy as np
5
+ import pandas as pd
4
6
  import xarray as xr
5
7
  try:
6
8
  from zarr.errors import PathNotFoundError
@@ -8,12 +10,142 @@ except ImportError:
8
10
  class PathNotFoundError(Exception):
9
11
  pass
10
12
 
11
- from .utils import merge_arrays
13
+ from .utils import merge_arrays, fill_time_gaps, get_dt
12
14
 
13
15
  logger = logging.getLogger(__name__)
14
16
 
15
17
 
16
- def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original'):
18
+ def get_chunks(xda: xr.DataArray, chunks: int = 1,
19
+ timedim: str = 'datetime') -> dict:
20
+ """
21
+ Determine the chunk size for the datetime dimension. Other dimensions are assumed to be
22
+ small enough to not require chunking.
23
+
24
+ Parameters
25
+ ----------
26
+ coords : xr.core.coordinates.DatasetCoordinates
27
+ Coordinates of the dataset.
28
+ chunks : int, optional
29
+ Number of chunks in days to divide the datetime dimension into, by default 1.
30
+ """
31
+ if timedim not in xda.coords:
32
+ raise ValueError(
33
+ f"Datetime coordinate {timedim} not found in dataset coordinates.")
34
+ dt = get_dt(xda.coords[timedim])
35
+ chunklength = int(pd.Timedelta('%dD' % chunks) / dt)
36
+ return chunklength
37
+
38
+
39
+ def fill_time_gaps_between_datasets(xds_existing: xr.DataArray, xds_new: xr.DataArray, mode: str,
40
+ timedim: str = 'datetime') -> xr.DataArray:
41
+ """
42
+ Fill gaps between existing and new datasets.
43
+
44
+ Parameters
45
+ ----------
46
+ xds_existing : xr.Dataset
47
+ Existing dataset on disk
48
+ xds_new : xr.Dataset
49
+ New dataset to append
50
+ timedim : str
51
+ Name of the time dimension
52
+
53
+ Returns
54
+ -------
55
+ xr.Dataset
56
+ Combined dataset with gaps filled
57
+ """
58
+ if mode not in ['a', 'p']:
59
+ raise ValueError(
60
+ 'Mode has to be either "a" for append or "p" for prepend')
61
+
62
+ # get the sample interval
63
+ dt = get_dt(xds_new.coords[timedim])
64
+
65
+ existing_endpoint = xds_existing[timedim].values
66
+ # Get time ranges
67
+ if mode == 'a':
68
+ gap_start = existing_endpoint + dt
69
+ gap_end = xds_new[timedim].values[0] - dt
70
+ elif mode == 'p':
71
+ gap_end = existing_endpoint - dt
72
+ gap_start = xds_new[timedim].values[-1] + dt
73
+
74
+ if gap_start <= gap_end:
75
+ gap_times = pd.date_range(start=gap_start, end=gap_end, freq=dt)
76
+
77
+ # Create NaN array with same shape as variable but for gap times
78
+ gap_shape = (len(gap_times),) + \
79
+ xds_new.shape[1:] # Skip time dimension
80
+ gap_values = np.full(gap_shape, np.nan)
81
+
82
+ # Create coordinates for gap dataset
83
+ gap_coords = {timedim: gap_times}
84
+ for coord_name, coord in xds_new.coords.items():
85
+ if coord_name != timedim:
86
+ gap_coords[coord_name] = coord
87
+
88
+ gap_data = xr.DataArray(
89
+ gap_values,
90
+ coords=gap_coords,
91
+ dims=xds_new.dims,
92
+ name=xds_new.name
93
+ )
94
+
95
+ # Combine: existing + gap + new
96
+ if mode == 'a':
97
+ combined = xr.concat([gap_data, xds_new], dim=timedim)
98
+ elif mode == 'p':
99
+ combined = xr.concat([xds_new, gap_data], dim=timedim)
100
+ return combined
101
+ else:
102
+ return xds_new
103
+
104
+
105
+ def _build_append_payload_full_chunks(payload: xr.DataArray, mode: str,
106
+ chunklen: int, timedim: str = "datetime") -> xr.DataArray:
107
+ """
108
+ Construct the sequence to append so that the final total length is a multiple of `chunklen`
109
+ """
110
+ if mode not in ['a', 'p']:
111
+ raise ValueError(
112
+ 'Mode has to be either "a" for append or "p" for prepend')
113
+
114
+ # pad the tail so that payload_len % chunklen == 0
115
+ pay_len = payload.sizes[timedim]
116
+ need = -pay_len % chunklen # 0..chunklen-1
117
+
118
+ if need > 0:
119
+ dt = get_dt(payload.coords[timedim])
120
+ if mode == 'a':
121
+ start = payload[timedim].values[-1] + dt
122
+ elif mode == 'p':
123
+ start = payload[timedim].values[0] - (need+1)*dt
124
+ pad_times = pd.date_range(start=start, periods=need, freq=dt)
125
+ pad_shape = []
126
+ for i, d in enumerate(payload.dims):
127
+ if d == timedim:
128
+ pad_shape.append(need)
129
+ else:
130
+ pad_shape.append(payload.shape[i])
131
+ pad_vals = np.full(pad_shape, np.nan)
132
+ pad_coords = {timedim: pad_times}
133
+ for c in payload.coords:
134
+ if c != timedim:
135
+ pad_coords[c] = payload.coords[c]
136
+ pad_da = xr.DataArray(pad_vals, coords=pad_coords,
137
+ dims=payload.dims, name=payload.name, attrs=payload.attrs)
138
+ if mode == 'a':
139
+ payload = xr.concat([payload, pad_da], dim=timedim)
140
+ elif mode == 'p':
141
+ payload = xr.concat([pad_da, payload], dim=timedim)
142
+ payload = payload.chunk({timedim: chunklen})
143
+ return payload
144
+ #
145
+
146
+
147
+ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
148
+ chunks: int = 10, timedim: str = 'datetime') -> None:
17
149
  """
18
150
  Write xarray dataset to zarr files.
19
151
 
@@ -27,41 +159,78 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original'):
27
159
  Write mode, by default 'a'.
28
160
  group : str, optional
29
161
  Group name, by default 'original'
162
+ chunks : int, optional
163
+ Chunk size as the number of days.
164
+ timedim : str
165
+ Name of the time dimension, by default 'datetime'
166
+ fill_gaps : bool, optional
167
+ Whether to fill time gaps with NaN before writing, by default False
30
168
 
31
169
  Returns
32
170
  -------
33
171
  None
34
172
  """
173
+
174
+ if timedim not in xds.dims:
175
+ raise ValueError(f"{timedim} dimension not found in Dataset.")
176
+
177
+ # Fill gaps
178
+ xds = xds.drop_duplicates(timedim, keep='last')
179
+ xds = fill_time_gaps(xds, timedim=timedim)
180
+
35
181
  for feature in xds.data_vars.keys():
36
182
  fout = os.path.join(path, feature + '.zarr')
37
- if not os.path.exists(fout) or mode == 'w':
38
- xds[feature].to_zarr(
39
- fout, group=group, mode='w')
183
+ # nchunks = get_chunks(xds[feature], chunks)
184
+ nchunks = chunks
185
+ try:
186
+ xds_existing = xr.open_zarr(fout, group=group)
187
+ has_store = True
188
+ except (PathNotFoundError, FileNotFoundError, KeyError):
189
+ has_store = False
190
+
191
+ if not has_store:
192
+ xda_new = _build_append_payload_full_chunks(
193
+ xds[feature], 'a', nchunks)
194
+ xda_new.to_zarr(fout, group=group, mode='w',
195
+ write_empty_chunks=True)
196
+ continue
197
+
198
+ if xds_existing[timedim][0] > xds[timedim][-1]:
199
+ # prepend
200
+ xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: 0}),
201
+ xds[feature], mode='p')
202
+ xda_new = _build_append_payload_full_chunks(
203
+ xda_new, 'p', nchunks)
204
+ combined = xda_new.combine_first(xds_existing[feature]).compute()
205
+ combined.chunk({timedim: nchunks}).to_zarr(fout, group=group, mode='w',
206
+ write_empty_chunks=True)
207
+ elif xds_existing[timedim][-1] < xds[timedim][0]:
208
+ # append
209
+ xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: -1}),
210
+ xds[feature], mode='a')
211
+ xda_new = _build_append_payload_full_chunks(
212
+ xda_new, 'a', nchunks)
213
+ xda_new.to_zarr(fout, group=group, mode='a',
214
+ append_dim=timedim)
215
+ elif xds_existing[timedim][0] > xds[timedim][0] and xds_existing[timedim][-1] < xds[timedim][-1]:
216
+ # existing datetimes are contained in new array
217
+ xda_new = _build_append_payload_full_chunks(
218
+ xds[feature], 'a', nchunks)
219
+ xda_new.to_zarr(fout, group=group, mode='w',
220
+ write_empty_chunks=True)
40
221
  else:
41
- try:
42
- xds_existing = xr.open_zarr(fout, group=group)
43
- except (PathNotFoundError, FileNotFoundError):
44
- xds[feature].to_zarr(fout, group=group, mode='a')
45
- continue
46
- if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
47
- xda_new = merge_arrays(xds_existing[feature], xds[feature])
48
- xda_new.to_zarr(fout, group=group, mode='w')
49
- else:
50
- try:
51
- overlap = xds_existing.datetime.where(
52
- xds_existing.datetime == xds.datetime)
53
- if overlap.size > 0:
54
- xds[feature].loc[dict(datetime=overlap)].to_zarr(
55
- fout, group=group, mode='r+', region='auto')
56
- xds[feature].drop_sel(datetime=overlap).to_zarr(
57
- fout, group=group, mode='a', append_dim="datetime")
58
- else:
59
- xds[feature].to_zarr(
60
- fout, group=group, append_dim='datetime')
61
- except Exception as e:
62
- msg = f"Appending {feature} to {fout} failed: {e}\n"
63
- msg += "Attempting to merge the two datasets."
64
- logger.error(msg)
65
- # remove duplicate datetime entries
66
- xda_new = merge_arrays(xds_existing[feature], xds[feature])
67
- xda_new.to_zarr(fout, group=group, mode='w')
222
+ overlap = xds_existing[timedim].where(
223
+ xds_existing[timedim] == xds[timedim])
224
+ xds[feature].loc[{timedim: overlap}].to_zarr(
225
+ fout, group=group, mode='r+', region='auto')
226
+ remainder = xds[feature].drop_sel({timedim: overlap})
227
+ if remainder.sizes[timedim] > 0:
228
+ mode = 'a'
229
+ if remainder[timedim][-1] < xds_existing[timedim][0]:
230
+ mode = 'p'
231
+ xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: 0}),
232
+ xds[feature], mode=mode)
233
+ xda_new = _build_append_payload_full_chunks(
234
+ xda_new, mode, nchunks)
235
+ xda_new.to_zarr(fout, group=group, mode='a',
236
+ append_dim=timedim)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: tonik
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -18,10 +18,11 @@ Requires-Dist: matplotlib
18
18
  Requires-Dist: netcdf4>=1.6
19
19
  Requires-Dist: pandas>=2.0
20
20
  Requires-Dist: python-json-logger>=2.0
21
+ Requires-Dist: s3fs
21
22
  Requires-Dist: uvicorn[standard]>=0.22
22
23
  Requires-Dist: xarray[accel,io,parallel]
23
- Requires-Dist: zarr<3; python_version < '3.11'
24
- Requires-Dist: zarr>=3.0.3; python_version >= '3.11'
24
+ Requires-Dist: zarr[remote-tests]<3; python_version < '3.11'
25
+ Requires-Dist: zarr[remote-tests]>=3.0.3; python_version >= '3.11'
25
26
  Provides-Extra: dev
26
27
  Requires-Dist: httpx; extra == 'dev'
27
28
  Requires-Dist: ipykernel; extra == 'dev'
@@ -0,0 +1,14 @@
1
+ tonik/__init__.py,sha256=dov-nMeGFBzLspmj4rWKjC4r736vmaPDgMEkHSUfP98,523
2
+ tonik/api.py,sha256=XDKiz1AzYNBOwYfaRxpMgqGRDAPJEE6wWJyBxuYPRLc,7751
3
+ tonik/grafana_annotations.py,sha256=ZU9Cy-HT4vvMfYIQzD9WboaDVOCBDv__NmXbk1qKWJo,5838
4
+ tonik/storage.py,sha256=vFxIrY92cSYOYOpNXHxCAjdXgkrRytaRDpudtK0glmg,10608
5
+ tonik/utils.py,sha256=vRFMoCU7dbfnnm5RALBR-XrpPGDFtQoeTDzxFiYf3bo,7522
6
+ tonik/xarray2netcdf.py,sha256=gDNT6nxnRbXPeRqZ3URW5oXY3Nfh3TCrfueE-eUrIoY,5181
7
+ tonik/xarray2zarr.py,sha256=SSchDqy5oyYrIG4smV8fslsUg2UPSyyQjUA5ZlP1P4I,8630
8
+ tonik/package_data/index.html,sha256=ZCZ-BtGRERsL-6c_dfY43qd2WAaggH7xereennGL6ww,4372
9
+ tonik/package_data/whakaari_labels.json,sha256=96UZSq41yXgAJxuKivLBKlRTw-33jkjh7AGKTsDQ9Yg,3993
10
+ tonik-0.1.16.dist-info/METADATA,sha256=EOwmXNC5b6IJsnTLMelBZ3vL1ljkfZwhM8Hoz6iHiZQ,2191
11
+ tonik-0.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ tonik-0.1.16.dist-info/entry_points.txt,sha256=y82XyTeQddM87gCTzgSQaTlKF3VFicO4hhClHUv6j1A,127
13
+ tonik-0.1.16.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
14
+ tonik-0.1.16.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.17.1
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,3 +1,4 @@
1
1
  [console_scripts]
2
+ grafana_annotations = tonik.grafana_annotations:main
2
3
  test_data = tonik.utils:main
3
4
  tonik_api = tonik.api:main
@@ -1,12 +0,0 @@
1
- tonik/__init__.py,sha256=dov-nMeGFBzLspmj4rWKjC4r736vmaPDgMEkHSUfP98,523
2
- tonik/api.py,sha256=XDKiz1AzYNBOwYfaRxpMgqGRDAPJEE6wWJyBxuYPRLc,7751
3
- tonik/storage.py,sha256=vFxIrY92cSYOYOpNXHxCAjdXgkrRytaRDpudtK0glmg,10608
4
- tonik/utils.py,sha256=9eSVKIbs8TIZlJCz_-B7FrvOUQCQHO3K52v4Heus-uE,6135
5
- tonik/xarray2netcdf.py,sha256=gDNT6nxnRbXPeRqZ3URW5oXY3Nfh3TCrfueE-eUrIoY,5181
6
- tonik/xarray2zarr.py,sha256=RhCnS6g3yqe8mrEXhD_4PCN0EI3QPhp5X7ui_wvb_jY,2445
7
- tonik/package_data/index.html,sha256=ZCZ-BtGRERsL-6c_dfY43qd2WAaggH7xereennGL6ww,4372
8
- tonik-0.1.15.dist-info/METADATA,sha256=fgLopxgfF1ooNHl2DEYN3dFivpSknlfRrAL2eledYKo,2143
9
- tonik-0.1.15.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
10
- tonik-0.1.15.dist-info/entry_points.txt,sha256=mT3B4eBE8SHlAeMhFnZGor9-YkVtoWM1NVHVuypJ-uY,74
11
- tonik-0.1.15.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
- tonik-0.1.15.dist-info/RECORD,,