tobac 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tobac/__init__.py +112 -0
- tobac/analysis/__init__.py +31 -0
- tobac/analysis/cell_analysis.py +628 -0
- tobac/analysis/feature_analysis.py +212 -0
- tobac/analysis/spatial.py +619 -0
- tobac/centerofgravity.py +226 -0
- tobac/feature_detection.py +1758 -0
- tobac/merge_split.py +324 -0
- tobac/plotting.py +2321 -0
- tobac/segmentation/__init__.py +10 -0
- tobac/segmentation/watershed_segmentation.py +1316 -0
- tobac/testing.py +1179 -0
- tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
- tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
- tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
- tobac/tests/test_analysis_spatial.py +1109 -0
- tobac/tests/test_convert.py +265 -0
- tobac/tests/test_datetime.py +216 -0
- tobac/tests/test_decorators.py +148 -0
- tobac/tests/test_feature_detection.py +1321 -0
- tobac/tests/test_generators.py +273 -0
- tobac/tests/test_import.py +24 -0
- tobac/tests/test_iris_xarray_match_utils.py +244 -0
- tobac/tests/test_merge_split.py +351 -0
- tobac/tests/test_pbc_utils.py +497 -0
- tobac/tests/test_sample_data.py +197 -0
- tobac/tests/test_testing.py +747 -0
- tobac/tests/test_tracking.py +714 -0
- tobac/tests/test_utils.py +650 -0
- tobac/tests/test_utils_bulk_statistics.py +789 -0
- tobac/tests/test_utils_coordinates.py +328 -0
- tobac/tests/test_utils_internal.py +97 -0
- tobac/tests/test_xarray_utils.py +232 -0
- tobac/tracking.py +613 -0
- tobac/utils/__init__.py +27 -0
- tobac/utils/bulk_statistics.py +360 -0
- tobac/utils/datetime.py +184 -0
- tobac/utils/decorators.py +540 -0
- tobac/utils/general.py +753 -0
- tobac/utils/generators.py +87 -0
- tobac/utils/internal/__init__.py +2 -0
- tobac/utils/internal/coordinates.py +430 -0
- tobac/utils/internal/iris_utils.py +462 -0
- tobac/utils/internal/label_props.py +82 -0
- tobac/utils/internal/xarray_utils.py +439 -0
- tobac/utils/mask.py +364 -0
- tobac/utils/periodic_boundaries.py +419 -0
- tobac/wrapper.py +244 -0
- tobac-1.6.2.dist-info/METADATA +154 -0
- tobac-1.6.2.dist-info/RECORD +53 -0
- tobac-1.6.2.dist-info/WHEEL +5 -0
- tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
- tobac-1.6.2.dist-info/top_level.txt +1 -0
tobac/utils/general.py
ADDED
|
@@ -0,0 +1,753 @@
|
|
|
1
|
+
"""General tobac utilities"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import copy
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Callable, Optional, Union
|
|
7
|
+
from typing_extensions import Literal
|
|
8
|
+
import iris
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import iris.cube
|
|
11
|
+
|
|
12
|
+
from . import internal as internal_utils
|
|
13
|
+
from . import decorators
|
|
14
|
+
import numpy as np
|
|
15
|
+
import sklearn
|
|
16
|
+
import sklearn.neighbors
|
|
17
|
+
import datetime
|
|
18
|
+
import xarray as xr
|
|
19
|
+
import warnings
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def add_coordinates(
|
|
23
|
+
features: pd.DataFrame,
|
|
24
|
+
variable_cube: Union[xr.DataArray, iris.cube.Cube],
|
|
25
|
+
use_standard_names: Optional[bool] = None,
|
|
26
|
+
) -> pd.DataFrame:
|
|
27
|
+
"""Add coordinates from the input cube of the feature detection
|
|
28
|
+
to the trajectories/features.
|
|
29
|
+
|
|
30
|
+
:meta private:
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
features : pandas.DataFrame
|
|
35
|
+
Trajectories/features from feature detection or linking step.
|
|
36
|
+
|
|
37
|
+
variable_cube : iris.cube.Cube
|
|
38
|
+
Input data used for the tracking with coordinate information
|
|
39
|
+
to transfer to the resulting DataFrame. Needs to contain the
|
|
40
|
+
coordinate 'time'.
|
|
41
|
+
|
|
42
|
+
use_standard_names: bool
|
|
43
|
+
If true, when interpolating a coordinate, it looks for a standard_name
|
|
44
|
+
and uses that to name the output coordinate, to mimic iris functionality.
|
|
45
|
+
If false, uses the actual name of the coordinate to output.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
pandas.DataFrame
|
|
50
|
+
Trajectories with added coordinates.
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
if isinstance(variable_cube, iris.cube.Cube):
|
|
55
|
+
return internal_utils.iris_utils.add_coordinates(features, variable_cube)
|
|
56
|
+
if isinstance(variable_cube, xr.DataArray):
|
|
57
|
+
return internal_utils.xr_utils.add_coordinates_to_features(
|
|
58
|
+
features,
|
|
59
|
+
variable_cube,
|
|
60
|
+
use_standard_names=use_standard_names,
|
|
61
|
+
)
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"add_coordinates only supports xarray.DataArray and iris.cube.Cube"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def add_coordinates_3D(
|
|
68
|
+
t: pd.DataFrame,
|
|
69
|
+
variable_cube: Union[xr.DataArray, iris.cube.Cube],
|
|
70
|
+
vertical_coord: Union[str, int] = None,
|
|
71
|
+
vertical_axis: Union[int, None] = None,
|
|
72
|
+
assume_coords_fixed_in_time: bool = True,
|
|
73
|
+
use_standard_names: Optional[bool] = None,
|
|
74
|
+
):
|
|
75
|
+
"""Function adding coordinates from the tracking cube to the trajectories
|
|
76
|
+
for the 3D case: time, longitude&latitude, x&y dimensions, and altitude
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
t: pandas DataFrame
|
|
81
|
+
Input features
|
|
82
|
+
variable_cube: iris.cube.Cube
|
|
83
|
+
Cube (usually the one you are tracking on) at least conaining the dimension of 'time'.
|
|
84
|
+
Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate',
|
|
85
|
+
and 'altitude' (if 3D) are the coordinates that we expect, although this function
|
|
86
|
+
will happily interpolate along any dimension coordinates you give.
|
|
87
|
+
vertical_coord: str or int
|
|
88
|
+
Name or axis number of the vertical coordinate. If None, tries to auto-detect.
|
|
89
|
+
If it is a string, it looks for the coordinate or the dimension name corresponding
|
|
90
|
+
to the string. If it is an int, it assumes that it is the vertical axis.
|
|
91
|
+
Note that if you only have a 2D or 3D coordinate for altitude, you must
|
|
92
|
+
pass in an int.
|
|
93
|
+
vertical_axis: int or None
|
|
94
|
+
Axis number of the vertical.
|
|
95
|
+
assume_coords_fixed_in_time: bool
|
|
96
|
+
If true, it assumes that the coordinates are fixed in time, even if the
|
|
97
|
+
coordinates say they vary in time. This is, by default, True, to preserve
|
|
98
|
+
legacy functionality. If False, it assumes that if a coordinate says
|
|
99
|
+
it varies in time, it takes the coordinate at its word.
|
|
100
|
+
use_standard_names: bool
|
|
101
|
+
If true, when interpolating a coordinate, it looks for a standard_name
|
|
102
|
+
and uses that to name the output coordinate, to mimic iris functionality.
|
|
103
|
+
If false, uses the actual name of the coordinate to output.
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
pandas DataFrame
|
|
108
|
+
trajectories with added coordinates
|
|
109
|
+
"""
|
|
110
|
+
if isinstance(variable_cube, iris.cube.Cube):
|
|
111
|
+
return internal_utils.iris_utils.add_coordinates_3D(
|
|
112
|
+
t, variable_cube, vertical_coord, vertical_axis, assume_coords_fixed_in_time
|
|
113
|
+
)
|
|
114
|
+
if isinstance(variable_cube, xr.DataArray):
|
|
115
|
+
return internal_utils.xr_utils.add_coordinates_to_features(
|
|
116
|
+
t,
|
|
117
|
+
variable_cube,
|
|
118
|
+
vertical_coord=vertical_coord,
|
|
119
|
+
vertical_axis=vertical_axis,
|
|
120
|
+
use_standard_names=use_standard_names,
|
|
121
|
+
)
|
|
122
|
+
raise ValueError(
|
|
123
|
+
"add_coordinates_3D only supports xarray.DataArray and iris.cube.Cube"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_bounding_box(x, buffer=1):
|
|
128
|
+
"""Finds the bounding box of a ndarray
|
|
129
|
+
|
|
130
|
+
This is the smallest bounding rectangle for nonzero values as explained here:
|
|
131
|
+
https://stackoverflow.com/questions/31400769/bounding-box-of-numpy-array
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
x : numpy.ndarray
|
|
136
|
+
Array for which the bounding box is to be determined.
|
|
137
|
+
buffer : int, optional
|
|
138
|
+
Number to set a buffer between the nonzero values and
|
|
139
|
+
the edges of the box. Default is 1.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
bbox : list
|
|
144
|
+
Dimensionwise list of the indices representing the edges
|
|
145
|
+
of the bounding box.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
from numpy import delete, arange, diff, nonzero, array
|
|
149
|
+
|
|
150
|
+
mask = x == 0
|
|
151
|
+
|
|
152
|
+
bbox = []
|
|
153
|
+
all_axis = arange(x.ndim)
|
|
154
|
+
# loop over dimensions
|
|
155
|
+
for kdim in all_axis:
|
|
156
|
+
nk_dim = delete(all_axis, kdim)
|
|
157
|
+
mask_i = mask.all(axis=tuple(nk_dim))
|
|
158
|
+
dmask_i = diff(mask_i)
|
|
159
|
+
idx_i = nonzero(dmask_i)[0]
|
|
160
|
+
# for case where there is no value in idx_i
|
|
161
|
+
if len(idx_i) == 0:
|
|
162
|
+
idx_i = array([0, x.shape[kdim] - 1])
|
|
163
|
+
# for case where there is only one value in idx_i
|
|
164
|
+
elif len(idx_i) == 1:
|
|
165
|
+
idx_i = array([idx_i, idx_i])
|
|
166
|
+
# make sure there is two values in idx_i
|
|
167
|
+
elif len(idx_i) > 2:
|
|
168
|
+
idx_i = array([idx_i[0], idx_i[-1]])
|
|
169
|
+
# caluclate min and max values for idx_i and append them to list
|
|
170
|
+
idx_min = max(0, idx_i[0] + 1 - buffer)
|
|
171
|
+
idx_max = min(x.shape[kdim] - 1, idx_i[1] + 1 + buffer)
|
|
172
|
+
bbox.append([idx_min, idx_max])
|
|
173
|
+
return bbox
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@decorators.xarray_to_iris()
|
|
177
|
+
def get_spacings(
|
|
178
|
+
field_in, grid_spacing=None, time_spacing=None, average_method="arithmetic"
|
|
179
|
+
):
|
|
180
|
+
"""Determine spatial and temporal grid spacing of the
|
|
181
|
+
input data.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
field_in : iris.cube.Cube
|
|
186
|
+
Input field where to get spacings.
|
|
187
|
+
|
|
188
|
+
grid_spacing : float, optional
|
|
189
|
+
Manually sets the grid spacing if specified.
|
|
190
|
+
Default is None.
|
|
191
|
+
|
|
192
|
+
time_spacing : float, optional
|
|
193
|
+
Manually sets the time spacing if specified.
|
|
194
|
+
Default is None.
|
|
195
|
+
|
|
196
|
+
average_method : string, optional
|
|
197
|
+
Defines how spacings in x- and y-direction are
|
|
198
|
+
combined.
|
|
199
|
+
|
|
200
|
+
- 'arithmetic' : standard arithmetic mean like (dx+dy)/2
|
|
201
|
+
- 'geometric' : geometric mean; conserves gridbox area
|
|
202
|
+
|
|
203
|
+
Default is 'arithmetic'.
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
dxy : float
|
|
209
|
+
Grid spacing in metres.
|
|
210
|
+
|
|
211
|
+
dt : float
|
|
212
|
+
Time resolution in seconds.
|
|
213
|
+
|
|
214
|
+
Raises
|
|
215
|
+
------
|
|
216
|
+
ValueError
|
|
217
|
+
If input_cube does not contain projection_x_coord and
|
|
218
|
+
projection_y_coord or keyword argument grid_spacing.
|
|
219
|
+
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
from copy import deepcopy
|
|
223
|
+
|
|
224
|
+
# set horizontal grid spacing of input data
|
|
225
|
+
# If cartesian x and y corrdinates are present, use these to determine dxy (vertical grid spacing used to transfer pixel distances to real distances):
|
|
226
|
+
coord_names = [coord.name() for coord in field_in.coords()]
|
|
227
|
+
|
|
228
|
+
if (
|
|
229
|
+
"projection_x_coordinate" in coord_names
|
|
230
|
+
and "projection_y_coordinate" in coord_names
|
|
231
|
+
) and (grid_spacing is None):
|
|
232
|
+
x_coord = deepcopy(field_in.coord("projection_x_coordinate"))
|
|
233
|
+
x_coord.convert_units("metre")
|
|
234
|
+
dx = np.diff(x_coord[0:2].points)[0]
|
|
235
|
+
y_coord = deepcopy(field_in.coord("projection_y_coordinate"))
|
|
236
|
+
y_coord.convert_units("metre")
|
|
237
|
+
dy = np.diff(y_coord[0:2].points)[0]
|
|
238
|
+
|
|
239
|
+
if average_method == "arithmetic":
|
|
240
|
+
dxy = 0.5 * (np.abs(dx) + np.abs(dy))
|
|
241
|
+
elif average_method == "geometric":
|
|
242
|
+
dxy = np.sqrt(np.abs(dx) * np.abs(dy))
|
|
243
|
+
|
|
244
|
+
elif grid_spacing is not None:
|
|
245
|
+
dxy = grid_spacing
|
|
246
|
+
else:
|
|
247
|
+
raise ValueError(
|
|
248
|
+
"no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# set horizontal grid spacing of input data
|
|
252
|
+
if time_spacing is None:
|
|
253
|
+
# get time resolution of input data from first to steps of input cube:
|
|
254
|
+
time_coord = field_in.coord("time")
|
|
255
|
+
dt = (
|
|
256
|
+
time_coord.units.num2date(time_coord.points[1])
|
|
257
|
+
- time_coord.units.num2date(time_coord.points[0])
|
|
258
|
+
).seconds
|
|
259
|
+
elif time_spacing is not None:
|
|
260
|
+
# use value of time_spacing for dt:
|
|
261
|
+
dt = time_spacing
|
|
262
|
+
else:
|
|
263
|
+
raise ValueError(
|
|
264
|
+
"no information about time spacing, need either input cube with time or keyword argument time_spacing"
|
|
265
|
+
)
|
|
266
|
+
return dxy, dt
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def spectral_filtering(
|
|
270
|
+
dxy, field_in, lambda_min, lambda_max, return_transfer_function=False
|
|
271
|
+
):
|
|
272
|
+
"""This function creates and applies a 2D transfer function that
|
|
273
|
+
can be used as a bandpass filter to remove certain wavelengths
|
|
274
|
+
of an atmospheric input field (e.g. vorticity, IVT, etc).
|
|
275
|
+
|
|
276
|
+
Parameters:
|
|
277
|
+
-----------
|
|
278
|
+
dxy : float
|
|
279
|
+
Grid spacing in m.
|
|
280
|
+
|
|
281
|
+
field_in: numpy.array
|
|
282
|
+
2D field with input data.
|
|
283
|
+
|
|
284
|
+
lambda_min: float
|
|
285
|
+
Minimum wavelength in m.
|
|
286
|
+
|
|
287
|
+
lambda_max: float
|
|
288
|
+
Maximum wavelength in m.
|
|
289
|
+
|
|
290
|
+
return_transfer_function: boolean, optional
|
|
291
|
+
default: False. If set to True, then the 2D transfer function and
|
|
292
|
+
the corresponding wavelengths are returned.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
--------
|
|
296
|
+
filtered_field: numpy.array
|
|
297
|
+
Spectrally filtered 2D field of data (with same shape as input data).
|
|
298
|
+
|
|
299
|
+
transfer_function: tuple
|
|
300
|
+
Two 2D fields, where the first one corresponds to the wavelengths
|
|
301
|
+
in the spectral space of the domain and the second one to the 2D
|
|
302
|
+
transfer function of the bandpass filter. Only returned, if
|
|
303
|
+
return_transfer_function is True.
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
from scipy import signal
|
|
307
|
+
from scipy import fft
|
|
308
|
+
|
|
309
|
+
# check if valid value for dxy is given
|
|
310
|
+
if dxy <= 0:
|
|
311
|
+
raise ValueError(
|
|
312
|
+
"Invalid value for dxy. Please provide the grid spacing in meter."
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# get number of grid cells in x and y direction
|
|
316
|
+
Ni = field_in.shape[-2]
|
|
317
|
+
Nj = field_in.shape[-1]
|
|
318
|
+
# wavenumber space
|
|
319
|
+
m, n = np.meshgrid(np.arange(Ni), np.arange(Nj), indexing="ij")
|
|
320
|
+
|
|
321
|
+
# if domain is squared:
|
|
322
|
+
if Ni == Nj:
|
|
323
|
+
wavenumber = np.sqrt(m**2 + n**2)
|
|
324
|
+
lambda_mn = (2 * Ni * (dxy)) / wavenumber
|
|
325
|
+
else:
|
|
326
|
+
# if domain is a rectangle:
|
|
327
|
+
# alpha is the normalized wavenumber in wavenumber space
|
|
328
|
+
alpha = np.sqrt(m**2 / Ni**2 + n**2 / Nj**2)
|
|
329
|
+
# compute wavelengths for target grid in m
|
|
330
|
+
lambda_mn = 2 * dxy / alpha
|
|
331
|
+
|
|
332
|
+
############### create a 2D bandpass filter (butterworth) #######################
|
|
333
|
+
b, a = signal.iirfilter(
|
|
334
|
+
2,
|
|
335
|
+
[1 / lambda_max, 1 / lambda_min],
|
|
336
|
+
btype="band",
|
|
337
|
+
ftype="butter",
|
|
338
|
+
fs=1 / dxy,
|
|
339
|
+
output="ba",
|
|
340
|
+
)
|
|
341
|
+
w, h = signal.freqz(b, a, 1 / lambda_mn.flatten(), fs=1 / dxy)
|
|
342
|
+
transfer_function = np.reshape(abs(h), lambda_mn.shape)
|
|
343
|
+
|
|
344
|
+
# 2-dimensional discrete cosine transformation to convert data to spectral space
|
|
345
|
+
spectral = fft.dctn(field_in.data)
|
|
346
|
+
# multiplication of spectral coefficients with transfer function
|
|
347
|
+
filtered = spectral * transfer_function
|
|
348
|
+
# inverse discrete cosine transformation to go back from spectral to original space
|
|
349
|
+
filtered_field = fft.idctn(filtered)
|
|
350
|
+
|
|
351
|
+
if return_transfer_function is True:
|
|
352
|
+
return (lambda_mn, transfer_function), filtered_field
|
|
353
|
+
else:
|
|
354
|
+
return filtered_field
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def combine_tobac_feats(list_of_feats, preserve_old_feat_nums=None):
|
|
358
|
+
"""WARNING: This function has been deprecated and will be removed in a future
|
|
359
|
+
release, please use 'combine_feature_dataframes' instead
|
|
360
|
+
|
|
361
|
+
Function to combine a list of tobac feature detection dataframes
|
|
362
|
+
into one combined dataframe that can be used for tracking
|
|
363
|
+
or segmentation.
|
|
364
|
+
|
|
365
|
+
:meta private:
|
|
366
|
+
|
|
367
|
+
Parameters
|
|
368
|
+
----------
|
|
369
|
+
list_of_feats: array-like of Pandas DataFrames
|
|
370
|
+
A list of dataframes (generated, for example, by
|
|
371
|
+
running feature detection on multiple nodes).
|
|
372
|
+
preserve_old_feat_nums: str or None
|
|
373
|
+
The column name to preserve old feature numbers in. If None, these
|
|
374
|
+
old numbers will be deleted. Users may want to enable this feature
|
|
375
|
+
if they have run segmentation with the separate dataframes and
|
|
376
|
+
therefore old feature numbers.
|
|
377
|
+
Returns
|
|
378
|
+
-------
|
|
379
|
+
pd.DataFrame
|
|
380
|
+
One combined DataFrame."""
|
|
381
|
+
import warnings
|
|
382
|
+
|
|
383
|
+
warnings.warn(
|
|
384
|
+
"This function has been deprecated and will be removed in a future release, please use 'combine_feature_dataframes' instead",
|
|
385
|
+
DeprecationWarning,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return combine_feature_dataframes(
|
|
389
|
+
list_of_feats, old_feature_column_name=preserve_old_feat_nums
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def combine_feature_dataframes(
|
|
394
|
+
feature_df_list,
|
|
395
|
+
renumber_features=True,
|
|
396
|
+
old_feature_column_name=None,
|
|
397
|
+
sort_features_by=None,
|
|
398
|
+
):
|
|
399
|
+
"""Function to combine a list of tobac feature detection dataframes
|
|
400
|
+
into one combined dataframe that can be used for tracking
|
|
401
|
+
or segmentation.
|
|
402
|
+
|
|
403
|
+
Parameters
|
|
404
|
+
----------
|
|
405
|
+
feature_df_list: array-like of Pandas DataFrames
|
|
406
|
+
A list of dataframes (generated, for example, by
|
|
407
|
+
running feature detection on multiple nodes).
|
|
408
|
+
renumber_features: bool, optional (default: True)
|
|
409
|
+
If true, features are renumber with contiguous integers. If false, the
|
|
410
|
+
old feature numbers will be retained, but an exception will be raised if
|
|
411
|
+
there are any non-unique feature numbers. If you have non-unique feature
|
|
412
|
+
numbers and want to preserve them, use the old_feature_column_name to
|
|
413
|
+
save the old feature numbers to under a different column name.
|
|
414
|
+
old_feature_column_name: str or None, optional (default: None)
|
|
415
|
+
The column name to preserve old feature numbers in. If None, these
|
|
416
|
+
old numbers will be deleted. Users may want to enable this feature
|
|
417
|
+
if they have run segmentation with the separate dataframes and
|
|
418
|
+
therefore old feature numbers.
|
|
419
|
+
sort_features_by: list, str or None, optional (default: None)
|
|
420
|
+
The sorting order to pass to Dataframe.sort_values for the merged
|
|
421
|
+
dataframe. If None, will default to ["frame", "idx"] if
|
|
422
|
+
renumber_features is True, or "feature" if renumber_features is False.
|
|
423
|
+
|
|
424
|
+
Returns
|
|
425
|
+
-------
|
|
426
|
+
pd.DataFrame
|
|
427
|
+
One combined DataFrame.
|
|
428
|
+
"""
|
|
429
|
+
import pandas as pd
|
|
430
|
+
|
|
431
|
+
# first, let's just combine these.
|
|
432
|
+
combined_df = pd.concat(feature_df_list)
|
|
433
|
+
|
|
434
|
+
if not renumber_features and np.any(
|
|
435
|
+
np.bincount(combined_df["feature"] + np.nanmin(combined_df["feature"])) > 1
|
|
436
|
+
):
|
|
437
|
+
error = ValueError(
|
|
438
|
+
"Non-unique feature values detected. Combining feature dataframes with original feature numbers cannot be performed because duplicate feature numbers exist, please use 'renumber_features=True'. If you would like to preserve the original feature numbers, please use the 'old_feature_column_name' keyword to define a new column for these values in the returned dataframe"
|
|
439
|
+
)
|
|
440
|
+
# error.add_note(
|
|
441
|
+
# "Combining feature dataframes with original feature numbers cannot be performed because duplicate feature numbers exist, please use 'renumber_features=True'"
|
|
442
|
+
# )
|
|
443
|
+
# error.add_note(
|
|
444
|
+
# "If you would like to preserve the original feature numbers, please use the 'old_feature_column_name' keyword to define a new column for these values in the returned dataframe"
|
|
445
|
+
# )
|
|
446
|
+
raise error
|
|
447
|
+
|
|
448
|
+
if sort_features_by is None:
|
|
449
|
+
if renumber_features:
|
|
450
|
+
sort_features_by = ["frame", "idx"]
|
|
451
|
+
else:
|
|
452
|
+
sort_features_by = "feature"
|
|
453
|
+
# # Then, sort by time first, then by feature number
|
|
454
|
+
# combined_df = combined_df.sort_values(["time", "feature"])
|
|
455
|
+
# Save the old feature numbers if requested.
|
|
456
|
+
if old_feature_column_name is not None:
|
|
457
|
+
combined_df[old_feature_column_name] = combined_df["feature"]
|
|
458
|
+
# count_per_time = combined_feats.groupby('time')['index'].count()
|
|
459
|
+
original_frame_dtype = combined_df["frame"].dtype
|
|
460
|
+
combined_df["frame"] = (
|
|
461
|
+
combined_df["time"].rank(method="dense").astype(original_frame_dtype) - 1
|
|
462
|
+
)
|
|
463
|
+
combined_sorted = combined_df.sort_values(sort_features_by, ignore_index=True)
|
|
464
|
+
if renumber_features:
|
|
465
|
+
original_feature_dtype = combined_df["feature"].dtype
|
|
466
|
+
combined_sorted["feature"] = np.arange(
|
|
467
|
+
1, len(combined_sorted) + 1, dtype=original_feature_dtype
|
|
468
|
+
)
|
|
469
|
+
combined_sorted = combined_sorted.reset_index(drop=True)
|
|
470
|
+
return combined_sorted
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
@decorators.irispandas_to_xarray()
|
|
474
|
+
def transform_feature_points(
|
|
475
|
+
features,
|
|
476
|
+
new_dataset,
|
|
477
|
+
latitude_name=None,
|
|
478
|
+
longitude_name=None,
|
|
479
|
+
altitude_name=None,
|
|
480
|
+
max_time_away=None,
|
|
481
|
+
max_space_away=None,
|
|
482
|
+
max_vspace_away=None,
|
|
483
|
+
warn_dropped_features=True,
|
|
484
|
+
):
|
|
485
|
+
"""Function to transform input feature dataset horizontal grid points to a different grid.
|
|
486
|
+
The typical use case for this function is to transform detected features to perform
|
|
487
|
+
segmentation on a different grid.
|
|
488
|
+
|
|
489
|
+
The existing feature dataset must have some latitude/longitude coordinates associated
|
|
490
|
+
with each feature, and the new_dataset must have latitude/longitude available with
|
|
491
|
+
the same name. Note that due to xarray/iris incompatibilities, we suggest that the
|
|
492
|
+
input coordinates match the standard_name from Iris.
|
|
493
|
+
|
|
494
|
+
Parameters
|
|
495
|
+
----------
|
|
496
|
+
features: pd.DataFrame
|
|
497
|
+
Input feature dataframe
|
|
498
|
+
new_dataset: iris.cube.Cube or xarray
|
|
499
|
+
The dataset to transform the
|
|
500
|
+
latitude_name: str
|
|
501
|
+
The name of the latitude coordinate. If None, tries to auto-detect.
|
|
502
|
+
longitude_name: str
|
|
503
|
+
The name of the longitude coordinate. If None, tries to auto-detect.
|
|
504
|
+
altitude_name: str
|
|
505
|
+
The name of the altitude coordinate. If None, tries to auto-detect.
|
|
506
|
+
max_time_away: datetime.timedelta
|
|
507
|
+
The maximum time delta to associate feature points away from.
|
|
508
|
+
max_space_away: float
|
|
509
|
+
The maximum horizontal distance (in meters) to transform features to.
|
|
510
|
+
max_vspace_away: float
|
|
511
|
+
The maximum vertical distance (in meters) to transform features to.
|
|
512
|
+
warn_dropped_features: bool
|
|
513
|
+
Whether or not to print a warning message if one of the max_* options is
|
|
514
|
+
going to result in features that are dropped.
|
|
515
|
+
Returns
|
|
516
|
+
-------
|
|
517
|
+
transformed_features: pd.DataFrame
|
|
518
|
+
A new feature dataframe, with the coordinates transformed to
|
|
519
|
+
the new grid, suitable for use in segmentation
|
|
520
|
+
|
|
521
|
+
"""
|
|
522
|
+
|
|
523
|
+
RADIUS_EARTH_M = 6371000
|
|
524
|
+
is_3D = "vdim" in features
|
|
525
|
+
if is_3D:
|
|
526
|
+
vert_coord = internal_utils.find_vertical_coord_name(new_dataset, altitude_name)
|
|
527
|
+
|
|
528
|
+
lat_coord, lon_coord = internal_utils.detect_latlon_coord_name(
|
|
529
|
+
new_dataset, latitude_name=latitude_name, longitude_name=longitude_name
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
if lat_coord not in features or lon_coord not in features:
|
|
533
|
+
raise ValueError("Cannot find latitude and/or longitude coordinate")
|
|
534
|
+
|
|
535
|
+
lat_vals_new = new_dataset[lat_coord].values
|
|
536
|
+
lon_vals_new = new_dataset[lon_coord].values
|
|
537
|
+
|
|
538
|
+
if len(lat_vals_new.shape) != len(lon_vals_new.shape):
|
|
539
|
+
raise ValueError(
|
|
540
|
+
"Cannot work with lat/lon coordinates of unequal dimensionality"
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# the lat/lons must be a 2D grid, so if they aren't, make them one.
|
|
544
|
+
if len(lat_vals_new.shape) == 1:
|
|
545
|
+
lon_vals_new, lat_vals_new = np.meshgrid(lon_vals_new, lat_vals_new)
|
|
546
|
+
|
|
547
|
+
# we have to convert to radians because scikit-learn's haversine
|
|
548
|
+
# requires that the input be in radians.
|
|
549
|
+
flat_lats = np.deg2rad(lat_vals_new.ravel())
|
|
550
|
+
flat_lons = np.deg2rad(lon_vals_new.ravel())
|
|
551
|
+
|
|
552
|
+
# we have to drop NaN values.
|
|
553
|
+
either_nan = np.logical_or(np.isnan(flat_lats), np.isnan(flat_lons))
|
|
554
|
+
# we need to remember where these values are in the array so that we can
|
|
555
|
+
# appropriately unravel them.
|
|
556
|
+
loc_arr_trimmed = np.where(np.logical_not(either_nan))[0]
|
|
557
|
+
flat_lats_nona = flat_lats[~either_nan]
|
|
558
|
+
flat_lons_nona = flat_lons[~either_nan]
|
|
559
|
+
ll_tree = sklearn.neighbors.BallTree(
|
|
560
|
+
np.array([flat_lats_nona, flat_lons_nona]).T, metric="haversine"
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
ret_features = copy.deepcopy(features)
|
|
564
|
+
|
|
565
|
+
# there is almost certainly room for speedup in here.
|
|
566
|
+
rad_lats = np.deg2rad(features[lat_coord])
|
|
567
|
+
rad_lons = np.deg2rad(features[lon_coord])
|
|
568
|
+
dists, closest_pts = ll_tree.query(np.column_stack((rad_lats, rad_lons)))
|
|
569
|
+
unraveled_h1, unraveled_h2 = np.unravel_index(
|
|
570
|
+
loc_arr_trimmed[closest_pts[:, 0]], np.shape(lat_vals_new)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
ret_features["hdim_1"] = ("index", unraveled_h1)
|
|
574
|
+
ret_features["hdim_2"] = ("index", unraveled_h2)
|
|
575
|
+
|
|
576
|
+
# now interpolate vertical, if available.
|
|
577
|
+
if is_3D and max_space_away is not None and max_vspace_away is not None:
|
|
578
|
+
alt_tree = sklearn.neighbors.BallTree(
|
|
579
|
+
new_dataset[vert_coord].values[:, np.newaxis]
|
|
580
|
+
)
|
|
581
|
+
alt_dists, closest_alt_pts = alt_tree.query(
|
|
582
|
+
features[vert_coord].values[:, np.newaxis]
|
|
583
|
+
)
|
|
584
|
+
ret_features["vdim"] = ("index", closest_alt_pts[:, 0])
|
|
585
|
+
|
|
586
|
+
dist_cond = xr.DataArray(
|
|
587
|
+
np.logical_or(
|
|
588
|
+
(dists[:, 0] * RADIUS_EARTH_M) < max_space_away,
|
|
589
|
+
alt_dists[:, 0] < max_vspace_away,
|
|
590
|
+
),
|
|
591
|
+
dims="index",
|
|
592
|
+
)
|
|
593
|
+
elif max_space_away is not None:
|
|
594
|
+
dist_cond = xr.DataArray(
|
|
595
|
+
(dists[:, 0] * RADIUS_EARTH_M) < max_space_away, dims="index"
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
if max_space_away is not None or max_vspace_away is not None:
|
|
599
|
+
ret_features = ret_features.where(dist_cond, drop=True)
|
|
600
|
+
|
|
601
|
+
# force times to match, where appropriate.
|
|
602
|
+
if "time" in new_dataset.coords and max_time_away is not None:
|
|
603
|
+
# this is necessary due to the iris/xarray/pandas weirdness that we have.
|
|
604
|
+
old_feat_times = ret_features["time"].astype("datetime64[s]")
|
|
605
|
+
new_dataset_times = new_dataset["time"].astype("datetime64[s]")
|
|
606
|
+
closest_times = np.min(np.abs(old_feat_times - new_dataset_times), axis=1)
|
|
607
|
+
closest_time_locs = np.abs(old_feat_times - new_dataset_times).argmin(axis=1)
|
|
608
|
+
# force to seconds to deal with iris not accepting ms
|
|
609
|
+
ret_features["time"] = new_dataset["time"][closest_time_locs].astype(
|
|
610
|
+
"datetime64[s]"
|
|
611
|
+
)
|
|
612
|
+
ret_features = ret_features.where(
|
|
613
|
+
closest_times < np.timedelta64(max_time_away), drop=True
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
if warn_dropped_features:
|
|
617
|
+
removed_features = np.setdiff1d(features["feature"], ret_features["feature"])
|
|
618
|
+
if len(removed_features):
|
|
619
|
+
warnings.warn(
|
|
620
|
+
"Dropping feature numbers: " + str(removed_features.tolist()),
|
|
621
|
+
UserWarning,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
# make sure that feature points are converted back to int64
|
|
625
|
+
ret_features["feature"] = ret_features.feature.astype(int)
|
|
626
|
+
|
|
627
|
+
return ret_features
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def standardize_track_dataset(TrackedFeatures, Mask, Projection=None):
|
|
631
|
+
"""Combine a feature mask with the feature data table into a common dataset returned by tobac.segmentation
|
|
632
|
+
|
|
633
|
+
CAUTION: this function is experimental. No data structures output are guaranteed to be supported in future versions of tobac.
|
|
634
|
+
with the TrackedFeatures dataset returned by tobac.linking_trackpy.
|
|
635
|
+
Also rename the variables to be more descriptive and comply with cf-tree.
|
|
636
|
+
Convert the default cell parent ID to an integer table.
|
|
637
|
+
Add a cell dimension to reflect
|
|
638
|
+
Projection is an xarray DataArray
|
|
639
|
+
TODO: Add metadata attributes
|
|
640
|
+
|
|
641
|
+
Parameters
|
|
642
|
+
----------
|
|
643
|
+
TrackedFeatures : xarray.core.dataset.Dataset
|
|
644
|
+
xarray dataset of tobac Track information, the xarray dataset returned by tobac.tracking.linking_trackpy
|
|
645
|
+
Mask: xarray.core.dataset.Dataset
|
|
646
|
+
xarray dataset of tobac segmentation mask information, the xarray dataset returned
|
|
647
|
+
by tobac.segmentation.segmentation
|
|
648
|
+
Projection : xarray.core.dataarray.DataArray, default = None
|
|
649
|
+
array.DataArray of the original input dataset (gridded nexrad data for example).
|
|
650
|
+
If using gridded nexrad data, this can be input as: data['ProjectionCoordinateSystem']
|
|
651
|
+
An example of the type of information in the dataarray includes the following attributes:
|
|
652
|
+
latitude_of_projection_origin :29.471900939941406
|
|
653
|
+
longitude_of_projection_origin :-95.0787353515625
|
|
654
|
+
_CoordinateTransformType :Projection
|
|
655
|
+
_CoordinateAxes :x y z time
|
|
656
|
+
_CoordinateAxesTypes :GeoX GeoY Height Time
|
|
657
|
+
grid_mapping_name :azimuthal_equidistant
|
|
658
|
+
semi_major_axis :6370997.0
|
|
659
|
+
inverse_flattening :298.25
|
|
660
|
+
longitude_of_prime_meridian :0.0
|
|
661
|
+
false_easting :0.0
|
|
662
|
+
false_northing :0.0
|
|
663
|
+
|
|
664
|
+
Returns
|
|
665
|
+
-------
|
|
666
|
+
ds : xarray.core.dataset.Dataset
|
|
667
|
+
xarray dataset of merged Track and Segmentation Mask datasets with renamed variables.
|
|
668
|
+
"""
|
|
669
|
+
import xarray as xr
|
|
670
|
+
|
|
671
|
+
feature_standard_names = {
|
|
672
|
+
# new variable name, and long description for the NetCDF attribute
|
|
673
|
+
"frame": (
|
|
674
|
+
"feature_time_index",
|
|
675
|
+
"positional index of the feature along the time dimension of the mask, from 0 to N-1",
|
|
676
|
+
),
|
|
677
|
+
"hdim_1": (
|
|
678
|
+
"feature_hdim1_coordinate",
|
|
679
|
+
"position of the feature along the first horizontal dimension in grid point space; a north-south coordinate for dim order (time, y, x)."
|
|
680
|
+
"The numbering is consistent with positional indexing of the coordinate, but can be"
|
|
681
|
+
"fractional, to account for a centroid not aligned to the grid.",
|
|
682
|
+
),
|
|
683
|
+
"hdim_2": (
|
|
684
|
+
"feature_hdim2_coordinate",
|
|
685
|
+
"position of the feature along the second horizontal dimension in grid point space; an east-west coordinate for dim order (time, y, x)"
|
|
686
|
+
"The numbering is consistent with positional indexing of the coordinate, but can be"
|
|
687
|
+
"fractional, to account for a centroid not aligned to the grid.",
|
|
688
|
+
),
|
|
689
|
+
"idx": ("feature_id_this_frame",),
|
|
690
|
+
"num": (
|
|
691
|
+
"feature_grid_cell_count",
|
|
692
|
+
"Number of grid points that are within the threshold of this feature",
|
|
693
|
+
),
|
|
694
|
+
"threshold_value": (
|
|
695
|
+
"feature_threshold_max",
|
|
696
|
+
"Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments",
|
|
697
|
+
),
|
|
698
|
+
"feature": (
|
|
699
|
+
"feature",
|
|
700
|
+
"Unique number of the feature; starts from 1 and increments by 1 to the number of features",
|
|
701
|
+
),
|
|
702
|
+
"time": (
|
|
703
|
+
"feature_time",
|
|
704
|
+
"time of the feature, consistent with feature_time_index",
|
|
705
|
+
),
|
|
706
|
+
"timestr": (
|
|
707
|
+
"feature_time_str",
|
|
708
|
+
"String representation of the feature time, YYYY-MM-DD HH:MM:SS",
|
|
709
|
+
),
|
|
710
|
+
"projection_y_coordinate": (
|
|
711
|
+
"feature_projection_y_coordinate",
|
|
712
|
+
"y position of the feature in the projection given by ProjectionCoordinateSystem",
|
|
713
|
+
),
|
|
714
|
+
"projection_x_coordinate": (
|
|
715
|
+
"feature_projection_x_coordinate",
|
|
716
|
+
"x position of the feature in the projection given by ProjectionCoordinateSystem",
|
|
717
|
+
),
|
|
718
|
+
"lat": ("feature_latitude", "latitude of the feature"),
|
|
719
|
+
"lon": ("feature_longitude", "longitude of the feature"),
|
|
720
|
+
"ncells": (
|
|
721
|
+
"feature_ncells",
|
|
722
|
+
"number of grid cells for this feature (meaning uncertain)",
|
|
723
|
+
),
|
|
724
|
+
"areas": ("feature_area",),
|
|
725
|
+
"isolated": ("feature_isolation_flag",),
|
|
726
|
+
"num_objects": ("number_of_feature_neighbors",),
|
|
727
|
+
"cell": ("feature_parent_cell_id",),
|
|
728
|
+
"time_cell": ("feature_parent_cell_elapsed_time",),
|
|
729
|
+
"segmentation_mask": ("2d segmentation mask",),
|
|
730
|
+
}
|
|
731
|
+
new_feature_var_names = {
|
|
732
|
+
k: feature_standard_names[k][0]
|
|
733
|
+
for k in feature_standard_names.keys()
|
|
734
|
+
if k in TrackedFeatures.variables.keys()
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
# TrackedFeatures = TrackedFeatures.drop(["cell_parent_track_id"])
|
|
738
|
+
# Combine Track and Mask variables. Use the 'feature' variable as the coordinate variable instead of
|
|
739
|
+
# the 'index' variable and call the dimension 'feature'
|
|
740
|
+
ds = xr.merge(
|
|
741
|
+
[
|
|
742
|
+
TrackedFeatures.swap_dims({"index": "feature"})
|
|
743
|
+
.drop("index")
|
|
744
|
+
.rename_vars(new_feature_var_names),
|
|
745
|
+
Mask,
|
|
746
|
+
]
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
# Add the projection data back in
|
|
750
|
+
if Projection is not None:
|
|
751
|
+
ds["ProjectionCoordinateSystem"] = Projection
|
|
752
|
+
|
|
753
|
+
return ds
|