tsdownsample 0.1.5__cp314-cp314-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ """tsdownsample: high performance downsampling of time series data for visualization."""
2
+
3
+ from .downsamplers import (
4
+ EveryNthDownsampler,
5
+ LTTBDownsampler,
6
+ M4Downsampler,
7
+ MinMaxDownsampler,
8
+ MinMaxLTTBDownsampler,
9
+ NaNM4Downsampler,
10
+ NaNMinMaxDownsampler,
11
+ NaNMinMaxLTTBDownsampler,
12
+ )
13
+
14
+ __version__ = "0.1.5"
15
+ __author__ = "Jeroen Van Der Donckt"
16
+
17
+ __all__ = [
18
+ "EveryNthDownsampler",
19
+ "MinMaxDownsampler",
20
+ "M4Downsampler",
21
+ "LTTBDownsampler",
22
+ "MinMaxLTTBDownsampler",
23
+ "NaNMinMaxDownsampler",
24
+ "NaNM4Downsampler",
25
+ "NaNMinMaxLTTBDownsampler",
26
+ ]
File without changes
@@ -0,0 +1,257 @@
1
+ from typing import Union
2
+
3
+ import numpy as np
4
+
5
+ from ..downsampling_interface import AbstractDownsampler
6
+
7
+
8
+ def _get_bin_idxs(x: np.ndarray, nb_bins: int) -> np.ndarray:
9
+ """Get the equidistant indices of the bins to use for the aggregation.
10
+
11
+ Parameters
12
+ ----------
13
+ x : np.ndarray
14
+ The x values of the input data.
15
+ nb_bins : int
16
+ The number of bins.
17
+
18
+ Returns
19
+ -------
20
+ np.ndarray
21
+ The indices of the bins to use for the aggregation.
22
+ """
23
+ # Thanks to the `linspace` the data is evenly distributed over the index-range
24
+ # The searchsorted function returns the index positions
25
+ bins = np.searchsorted(x, np.linspace(x[0], x[-1], nb_bins + 1), side="right")
26
+ bins[0] = 0
27
+ bins[-1] = len(x)
28
+ return np.array(bins)
29
+
30
+
31
+ class LTTB_py(AbstractDownsampler):
32
+ @staticmethod
33
+ def _argmax_area(prev_x, prev_y, avg_next_x, avg_next_y, x_bucket, y_bucket) -> int:
34
+ """Vectorized triangular area argmax computation.
35
+
36
+ Parameters
37
+ ----------
38
+ prev_x : float
39
+ The previous selected point is x value.
40
+ prev_y : float
41
+ The previous selected point its y value.
42
+ avg_next_x : float
43
+ The x mean of the next bucket
44
+ avg_next_y : float
45
+ The y mean of the next bucket
46
+ x_bucket : np.ndarray
47
+ All x values in the bucket
48
+ y_bucket : np.ndarray
49
+ All y values in the bucket
50
+
51
+ Returns
52
+ -------
53
+ int
54
+ The index of the point with the largest triangular area.
55
+ """
56
+ return np.abs(
57
+ x_bucket * (prev_y - avg_next_y)
58
+ + y_bucket * (avg_next_x - prev_x)
59
+ + (prev_x * avg_next_y - avg_next_x * prev_y)
60
+ ).argmax()
61
+
62
+ def _downsample(
63
+ self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
64
+ ) -> np.ndarray:
65
+ """TODO complete docs"""
66
+ if x is None:
67
+ # Is fine for this implementation as this is only used for testing
68
+ x = np.arange(y.shape[0])
69
+
70
+ # Bucket size. Leave room for start and end data points
71
+ block_size = (y.shape[0] - 2) / (n_out - 2)
72
+ # Note this 'astype' cast must take place after array creation (and not with the
73
+ # aranage() its dtype argument) or it will cast the `block_size` step to an int
74
+ # before the arange array creation
75
+ offset = np.arange(start=1, stop=y.shape[0], step=block_size).astype(np.int64)
76
+
77
+ # Construct the output array
78
+ sampled_x = np.empty(n_out, dtype="int64")
79
+ sampled_x[0] = 0
80
+ sampled_x[-1] = x.shape[0] - 1
81
+
82
+ # Convert x & y to int if it is boolean
83
+ if x.dtype == np.bool_:
84
+ x = x.astype(np.int8)
85
+ if y.dtype == np.bool_:
86
+ y = y.astype(np.int8)
87
+
88
+ a = 0
89
+ for i in range(n_out - 3):
90
+ a = (
91
+ LTTB_py._argmax_area(
92
+ prev_x=x[a],
93
+ prev_y=y[a],
94
+ avg_next_x=np.mean(x[offset[i + 1] : offset[i + 2]]),
95
+ avg_next_y=y[offset[i + 1] : offset[i + 2]].mean(),
96
+ x_bucket=x[offset[i] : offset[i + 1]],
97
+ y_bucket=y[offset[i] : offset[i + 1]],
98
+ )
99
+ + offset[i]
100
+ )
101
+ sampled_x[i + 1] = a
102
+
103
+ # ------------ EDGE CASE ------------
104
+ # next-average of last bucket = last point
105
+ sampled_x[-2] = (
106
+ LTTB_py._argmax_area(
107
+ prev_x=x[a],
108
+ prev_y=y[a],
109
+ avg_next_x=x[-1], # last point
110
+ avg_next_y=y[-1],
111
+ x_bucket=x[offset[-2] : offset[-1]],
112
+ y_bucket=y[offset[-2] : offset[-1]],
113
+ )
114
+ + offset[-2]
115
+ )
116
+ return sampled_x
117
+
118
+
119
+ class MinMax_py(AbstractDownsampler):
120
+ """Aggregation method which performs binned min-max aggregation over fully
121
+ overlapping windows.
122
+ """
123
+
124
+ @staticmethod
125
+ def _check_valid_n_out(n_out: int):
126
+ assert n_out % 2 == 0, "n_out must be a multiple of 2"
127
+
128
+ def _downsample(
129
+ self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
130
+ ) -> np.ndarray:
131
+ if x is None:
132
+ # Is fine for this implementation as this is only used for testing
133
+ x = np.arange(y.shape[0])
134
+
135
+ xdt = x.dtype
136
+ if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
137
+ x = x.view(np.int64)
138
+
139
+ bins = _get_bin_idxs(x, n_out // 2)
140
+
141
+ rel_idxs = []
142
+ for lower, upper in zip(bins, bins[1:]):
143
+ y_slice = y[lower:upper]
144
+ if not len(y_slice):
145
+ continue
146
+ # calculate the argmin(slice) & argmax(slice)
147
+ rel_idxs.append(lower + np.nanargmin(y_slice))
148
+ rel_idxs.append(lower + np.nanargmax(y_slice))
149
+ return np.unique(rel_idxs)
150
+
151
+
152
+ class NaNMinMax_py(AbstractDownsampler):
153
+ @staticmethod
154
+ def _check_valid_n_out(n_out: int):
155
+ assert n_out % 2 == 0, "n_out must be a multiple of 2"
156
+
157
+ def _downsample(
158
+ self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
159
+ ) -> np.ndarray:
160
+ if x is None:
161
+ # Is fine for this implementation as this is only used for testing
162
+ x = np.arange(y.shape[0])
163
+
164
+ xdt = x.dtype
165
+ if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
166
+ x = x.view(np.int64)
167
+
168
+ bins = _get_bin_idxs(x, n_out // 2)
169
+
170
+ rel_idxs = []
171
+ for lower, upper in zip(bins, bins[1:]):
172
+ y_slice = y[lower:upper]
173
+ if not len(y_slice):
174
+ continue
175
+ # calculate the argmin(slice) & argmax(slice)
176
+ rel_idxs.append(lower + np.argmin(y_slice))
177
+ rel_idxs.append(lower + np.argmax(y_slice))
178
+ return np.array(sorted(rel_idxs))
179
+
180
+
181
+ class M4_py(AbstractDownsampler):
182
+ """Aggregation method which selects the 4 M-s, i.e y-argmin, y-argmax, x-argmin, and
183
+ x-argmax per bin.
184
+
185
+ .. note::
186
+ When `n_out` is 4 * the canvas its pixel widht it should create a pixel-perfect
187
+ visualization w.r.t. the raw data.
188
+
189
+ """
190
+
191
+ @staticmethod
192
+ def _check_valid_n_out(n_out: int):
193
+ assert n_out % 4 == 0, "n_out must be a multiple of 4"
194
+
195
+ def _downsample(
196
+ self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
197
+ ) -> np.ndarray:
198
+ """TODO complete docs"""
199
+ if x is None:
200
+ # Is fine for this implementation as this is only used for testing
201
+ x = np.arange(y.shape[0])
202
+
203
+ xdt = x.dtype
204
+ if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
205
+ x = x.view(np.int64)
206
+
207
+ bins = _get_bin_idxs(x, n_out // 4)
208
+
209
+ rel_idxs = []
210
+ for lower, upper in zip(bins, bins[1:]):
211
+ y_slice = y[lower:upper]
212
+ if not len(y_slice):
213
+ continue
214
+
215
+ # calculate the min(idx), argmin(slice), argmax(slice), max(idx)
216
+ rel_idxs.append(lower)
217
+ rel_idxs.append(lower + np.nanargmin(y_slice))
218
+ rel_idxs.append(lower + np.nanargmax(y_slice))
219
+ rel_idxs.append(upper - 1)
220
+
221
+ # NOTE: we do not use the np.unique so that all indices are retained
222
+ return np.array(sorted(rel_idxs))
223
+
224
+
225
+ class NaNM4_py(AbstractDownsampler):
226
+ @staticmethod
227
+ def _check_valid_n_out(n_out: int):
228
+ assert n_out % 4 == 0, "n_out must be a multiple of 4"
229
+
230
+ def _downsample(
231
+ self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
232
+ ) -> np.ndarray:
233
+ """TODO complete docs"""
234
+ if x is None:
235
+ # Is fine for this implementation as this is only used for testing
236
+ x = np.arange(y.shape[0])
237
+
238
+ xdt = x.dtype
239
+ if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
240
+ x = x.view(np.int64)
241
+
242
+ bins = _get_bin_idxs(x, n_out // 4)
243
+
244
+ rel_idxs = []
245
+ for lower, upper in zip(bins, bins[1:]):
246
+ y_slice = y[lower:upper]
247
+ if not len(y_slice):
248
+ continue
249
+
250
+ # calculate the min(idx), argmin(slice), argmax(slice), max(idx)
251
+ rel_idxs.append(lower)
252
+ rel_idxs.append(lower + y_slice.argmin())
253
+ rel_idxs.append(lower + y_slice.argmax())
254
+ rel_idxs.append(upper - 1)
255
+
256
+ # NOTE: we do not use the np.unique so that all indices are retained
257
+ return np.array(sorted(rel_idxs))
@@ -0,0 +1 @@
1
+ # In this folder the compiled rust code should be placed.
@@ -0,0 +1,158 @@
1
+ import warnings
2
+ from typing import Union
3
+
4
+ import numpy as np
5
+
6
+ # ------------------ Rust Downsamplers ------------------
7
+ from tsdownsample._rust import _tsdownsample_rs # type: ignore[attr-defined]
8
+
9
+ from .downsampling_interface import (
10
+ AbstractDownsampler,
11
+ AbstractRustDownsampler,
12
+ AbstractRustNaNDownsampler,
13
+ )
14
+
15
+
16
+ class MinMaxDownsampler(AbstractRustDownsampler):
17
+ """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, these
18
+ ignored (i.e. the NaNs are not taken into account when selecting data points).
19
+
20
+ For each bin, the indices of the minimum and maximum values are selected.
21
+ """
22
+
23
+ @property
24
+ def rust_mod(self):
25
+ return _tsdownsample_rs.minmax
26
+
27
+ @staticmethod
28
+ def _check_valid_n_out(n_out: int):
29
+ AbstractRustDownsampler._check_valid_n_out(n_out)
30
+ if n_out % 2 != 0:
31
+ raise ValueError("n_out must be even")
32
+
33
+
34
+ class NaNMinMaxDownsampler(AbstractRustNaNDownsampler):
35
+ """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, the
36
+ indices of these NaNs are returned.
37
+
38
+ For each bin, the indices of the minimum and maximum values are selected.
39
+ """
40
+
41
+ @property
42
+ def rust_mod(self):
43
+ return _tsdownsample_rs.minmax
44
+
45
+ @staticmethod
46
+ def _check_valid_n_out(n_out: int):
47
+ AbstractRustDownsampler._check_valid_n_out(n_out)
48
+ if n_out % 2 != 0:
49
+ raise ValueError("n_out must be even")
50
+
51
+
52
+ class M4Downsampler(AbstractRustDownsampler):
53
+ """Downsampler that uses the M4 algorithm. If the y data contains NaNs, these are
54
+ ignored (i.e. the NaNs are not taken into account when selecting data points).
55
+
56
+ For each bin, the indices of the first, last, minimum and maximum values are
57
+ selected.
58
+ """
59
+
60
+ @property
61
+ def rust_mod(self):
62
+ return _tsdownsample_rs.m4
63
+
64
+ @staticmethod
65
+ def _check_valid_n_out(n_out: int):
66
+ AbstractRustDownsampler._check_valid_n_out(n_out)
67
+ if n_out % 4 != 0:
68
+ raise ValueError("n_out must be a multiple of 4")
69
+
70
+
71
+ class NaNM4Downsampler(AbstractRustNaNDownsampler):
72
+ """Downsampler that uses the M4 algorithm. If the y data contains NaNs, the indices
73
+ of these NaNs are returned.
74
+
75
+ For each bin, the indices of the first, last, minimum and maximum values are
76
+ selected.
77
+ """
78
+
79
+ @property
80
+ def rust_mod(self):
81
+ return _tsdownsample_rs.m4
82
+
83
+ @staticmethod
84
+ def _check_valid_n_out(n_out: int):
85
+ AbstractRustDownsampler._check_valid_n_out(n_out)
86
+ if n_out % 4 != 0:
87
+ raise ValueError("n_out must be a multiple of 4")
88
+
89
+
90
+ class LTTBDownsampler(AbstractRustDownsampler):
91
+ """Downsampler that uses the LTTB algorithm."""
92
+
93
+ @property
94
+ def rust_mod(self):
95
+ return _tsdownsample_rs.lttb
96
+
97
+
98
+ class MinMaxLTTBDownsampler(AbstractRustDownsampler):
99
+ """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs,
100
+ these are ignored (i.e. the NaNs are not taken into account when selecting data
101
+ points).
102
+
103
+ MinMaxLTTB paper: https://arxiv.org/abs/2305.00332
104
+ """
105
+
106
+ @property
107
+ def rust_mod(self):
108
+ return _tsdownsample_rs.minmaxlttb
109
+
110
+ def downsample(
111
+ self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_
112
+ ):
113
+ assert minmax_ratio > 0, "minmax_ratio must be greater than 0"
114
+ return super().downsample(
115
+ *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio
116
+ )
117
+
118
+
119
+ class NaNMinMaxLTTBDownsampler(AbstractRustNaNDownsampler):
120
+ """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs, the
121
+ indices of these NaNs are returned.
122
+
123
+ MinMaxLTTB paper: https://arxiv.org/abs/2305.00332
124
+ """
125
+
126
+ @property
127
+ def rust_mod(self):
128
+ return _tsdownsample_rs.minmaxlttb
129
+
130
+ def downsample(
131
+ self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_
132
+ ):
133
+ assert minmax_ratio > 0, "minmax_ratio must be greater than 0"
134
+ return super().downsample(
135
+ *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio
136
+ )
137
+
138
+
139
+ # ------------------ EveryNth Downsampler ------------------
140
+
141
+
142
+ class EveryNthDownsampler(AbstractDownsampler):
143
+ """Downsampler that selects every nth data point"""
144
+
145
+ def __init__(self, **kwargs):
146
+ super().__init__(check_contiguous=False, **kwargs)
147
+
148
+ def _downsample(
149
+ self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **_
150
+ ) -> np.ndarray:
151
+ if x is not None:
152
+ name = self.__class__.__name__
153
+ warnings.warn(
154
+ f"x is passed to downsample method of {name}, but is not taken "
155
+ "into account by the current implementation of the EveryNth algorithm."
156
+ )
157
+ step = max(1, len(y) / n_out)
158
+ return np.arange(start=0, stop=len(y) - 0.1, step=step).astype(np.uint)