tobac 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tobac/__init__.py +112 -0
- tobac/analysis/__init__.py +31 -0
- tobac/analysis/cell_analysis.py +628 -0
- tobac/analysis/feature_analysis.py +212 -0
- tobac/analysis/spatial.py +619 -0
- tobac/centerofgravity.py +226 -0
- tobac/feature_detection.py +1758 -0
- tobac/merge_split.py +324 -0
- tobac/plotting.py +2321 -0
- tobac/segmentation/__init__.py +10 -0
- tobac/segmentation/watershed_segmentation.py +1316 -0
- tobac/testing.py +1179 -0
- tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
- tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
- tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
- tobac/tests/test_analysis_spatial.py +1109 -0
- tobac/tests/test_convert.py +265 -0
- tobac/tests/test_datetime.py +216 -0
- tobac/tests/test_decorators.py +148 -0
- tobac/tests/test_feature_detection.py +1321 -0
- tobac/tests/test_generators.py +273 -0
- tobac/tests/test_import.py +24 -0
- tobac/tests/test_iris_xarray_match_utils.py +244 -0
- tobac/tests/test_merge_split.py +351 -0
- tobac/tests/test_pbc_utils.py +497 -0
- tobac/tests/test_sample_data.py +197 -0
- tobac/tests/test_testing.py +747 -0
- tobac/tests/test_tracking.py +714 -0
- tobac/tests/test_utils.py +650 -0
- tobac/tests/test_utils_bulk_statistics.py +789 -0
- tobac/tests/test_utils_coordinates.py +328 -0
- tobac/tests/test_utils_internal.py +97 -0
- tobac/tests/test_xarray_utils.py +232 -0
- tobac/tracking.py +613 -0
- tobac/utils/__init__.py +27 -0
- tobac/utils/bulk_statistics.py +360 -0
- tobac/utils/datetime.py +184 -0
- tobac/utils/decorators.py +540 -0
- tobac/utils/general.py +753 -0
- tobac/utils/generators.py +87 -0
- tobac/utils/internal/__init__.py +2 -0
- tobac/utils/internal/coordinates.py +430 -0
- tobac/utils/internal/iris_utils.py +462 -0
- tobac/utils/internal/label_props.py +82 -0
- tobac/utils/internal/xarray_utils.py +439 -0
- tobac/utils/mask.py +364 -0
- tobac/utils/periodic_boundaries.py +419 -0
- tobac/wrapper.py +244 -0
- tobac-1.6.2.dist-info/METADATA +154 -0
- tobac-1.6.2.dist-info/RECORD +53 -0
- tobac-1.6.2.dist-info/WHEEL +5 -0
- tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
- tobac-1.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,619 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Calculate spatial properties (distances, velocities, areas, volumes) of tracked objects
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import warnings
|
|
7
|
+
from itertools import combinations
|
|
8
|
+
from typing import Literal, Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import xarray as xr
|
|
13
|
+
from iris.analysis.cartography import area_weights
|
|
14
|
+
|
|
15
|
+
from tobac.utils.bulk_statistics import get_statistics_from_mask
|
|
16
|
+
from tobac.utils import decorators
|
|
17
|
+
from tobac.utils.internal.coordinates import (
|
|
18
|
+
COMMON_LON_COORDS,
|
|
19
|
+
find_dataframe_horizontal_coords,
|
|
20
|
+
find_vertical_coord_name,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = (
|
|
24
|
+
"haversine",
|
|
25
|
+
"calculate_distance",
|
|
26
|
+
"calculate_velocity",
|
|
27
|
+
"calculate_velocity_individual",
|
|
28
|
+
"calculate_areas_2Dlatlon",
|
|
29
|
+
"calculate_area",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def haversine(lat1, lon1, lat2, lon2):
|
|
34
|
+
"""Computes the Haversine distance in kilometers.
|
|
35
|
+
|
|
36
|
+
Calculates the Haversine distance between two points
|
|
37
|
+
(based on implementation CIS https://github.com/cedadev/cis).
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
lat1, lon1 : array of latitude, longitude
|
|
42
|
+
First point or points as array in degrees.
|
|
43
|
+
|
|
44
|
+
lat2, lon2 : array of latitude, longitude
|
|
45
|
+
Second point or points as array in degrees.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
arclen * RADIUS_EARTH : array
|
|
50
|
+
Array of Distance(s) between the two points(-arrays) in
|
|
51
|
+
kilometers.
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
RADIUS_EARTH = 6378.0
|
|
56
|
+
lat1 = np.radians(lat1)
|
|
57
|
+
lat2 = np.radians(lat2)
|
|
58
|
+
lon1 = np.radians(lon1)
|
|
59
|
+
lon2 = np.radians(lon2)
|
|
60
|
+
# print(lat1,lat2,lon1,lon2)
|
|
61
|
+
arclen = 2 * np.arcsin(
|
|
62
|
+
np.sqrt(
|
|
63
|
+
(np.sin((lat2 - lat1) / 2)) ** 2
|
|
64
|
+
+ np.cos(lat1) * np.cos(lat2) * (np.sin((lon2 - lon1) / 2)) ** 2
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
return arclen * RADIUS_EARTH
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def calculate_distance(
|
|
71
|
+
feature_1: pd.DataFrame,
|
|
72
|
+
feature_2: pd.DataFrame,
|
|
73
|
+
method_distance: Optional[Literal["xy", "latlon"]] = None,
|
|
74
|
+
hdim1_coord: Optional[str] = None,
|
|
75
|
+
hdim2_coord: Optional[str] = None,
|
|
76
|
+
return_components: bool = False,
|
|
77
|
+
vertical_coord: Optional[str] = None,
|
|
78
|
+
use_3d: bool = False,
|
|
79
|
+
) -> Union[float, pd.Series, dict]:
|
|
80
|
+
"""Compute the distance between two features. It is based on
|
|
81
|
+
either lat/lon coordinates or x/y coordinates.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
feature_1, feature_2 : pandas.DataFrame or pandas.Series
|
|
86
|
+
Dataframes containing multiple features or pandas.Series
|
|
87
|
+
of one feature. Need to contain either projection_x_coordinate
|
|
88
|
+
and projection_y_coordinate or latitude and longitude
|
|
89
|
+
coordinates.
|
|
90
|
+
|
|
91
|
+
method_distance : {None, 'xy', 'latlon'}, optional
|
|
92
|
+
Method of distance calculation. 'xy' uses the length of the
|
|
93
|
+
vector between the two features, 'latlon' uses the haversine
|
|
94
|
+
distance. None checks wether the required coordinates are
|
|
95
|
+
present and starts with 'xy'. Default is None.
|
|
96
|
+
|
|
97
|
+
hdim1_coord, hdim2_coord : str, optional (default: None)
|
|
98
|
+
The names of the coordinates for the two horizontal dimensions to use.
|
|
99
|
+
If None, tobac.utils.internal.general_internal.find_dataframe_horizontal_coords
|
|
100
|
+
will be used to search for coordinate names present in both dataframes
|
|
101
|
+
|
|
102
|
+
return_components : bool, optional (default=False)
|
|
103
|
+
Flag to control whether the velocity is calculated and
|
|
104
|
+
returned as its vector components. If False, only the scalar
|
|
105
|
+
(absolute) value is returned. If True, the function returns a dictionary
|
|
106
|
+
containing the scalar value as well as its individual directional components
|
|
107
|
+
(e.g., 'vx', 'vy', 'vz' for velocity).
|
|
108
|
+
|
|
109
|
+
vertical_coord : str, optional (default=None)
|
|
110
|
+
Name of the column in the feature representing the vertical (z-axis) coordinate.
|
|
111
|
+
If the tracking data includes a vertical dimension, it is identified by checking for common names
|
|
112
|
+
such as 'z', 'height', or 'altitude'. If none of these are present, `vertical_coord` is set to None,
|
|
113
|
+
indicating that the data should be treated as 2D.
|
|
114
|
+
|
|
115
|
+
use_3d : bool, optional (default=False)
|
|
116
|
+
If True and a vertical coordinate is available, compute full 3D distances and velocities.
|
|
117
|
+
Otherwise, only 2D (horizontal) distances are used.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
distance : float or pandas.Series or dict
|
|
122
|
+
Float with the distance between the two features in meters if
|
|
123
|
+
the input are two pandas.Series containing one feature,
|
|
124
|
+
pandas.Series of the distances if one of the inputs contains
|
|
125
|
+
multiple features.
|
|
126
|
+
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
if method_distance is None and (hdim1_coord is not None or hdim2_coord is not None):
|
|
130
|
+
raise ValueError(
|
|
131
|
+
"method_distance parameter must be provided if eithe hdim1_coord or hdim2_coord are specified"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if method_distance not in [None, "xy", "latlon"]:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
"method_distance invalid, must be one of (None, 'xy', 'latlon')"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
feature_1_coord = find_dataframe_horizontal_coords(
|
|
140
|
+
feature_1,
|
|
141
|
+
hdim1_coord=hdim1_coord,
|
|
142
|
+
hdim2_coord=hdim2_coord,
|
|
143
|
+
coord_type=method_distance,
|
|
144
|
+
)
|
|
145
|
+
feature_2_coord = find_dataframe_horizontal_coords(
|
|
146
|
+
feature_2,
|
|
147
|
+
hdim1_coord=hdim1_coord,
|
|
148
|
+
hdim2_coord=hdim2_coord,
|
|
149
|
+
coord_type=method_distance,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if feature_1_coord != feature_2_coord:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
"Discovered coordinates in feature_1 and feature_2 do not match, please specify coordinates using hdim1_coord and hdim2_coord parameters"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
hdim1_coord = feature_1_coord[0]
|
|
158
|
+
hdim2_coord = feature_1_coord[1]
|
|
159
|
+
method_distance = feature_1_coord[2]
|
|
160
|
+
|
|
161
|
+
has_z = (
|
|
162
|
+
use_3d
|
|
163
|
+
and vertical_coord is not None
|
|
164
|
+
and vertical_coord in feature_1
|
|
165
|
+
and vertical_coord in feature_2
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if method_distance == "xy":
|
|
169
|
+
dy = feature_2[hdim1_coord] - feature_1[hdim1_coord]
|
|
170
|
+
dx = feature_2[hdim2_coord] - feature_1[hdim2_coord]
|
|
171
|
+
|
|
172
|
+
if has_z:
|
|
173
|
+
dz = feature_2[vertical_coord] - feature_1[vertical_coord]
|
|
174
|
+
distance_3d = np.sqrt(dx**2 + dy**2 + dz**2)
|
|
175
|
+
result = {"distance_3d": distance_3d, "dx": dx, "dy": dy, "dz": dz}
|
|
176
|
+
return result if return_components else distance_3d
|
|
177
|
+
else:
|
|
178
|
+
# Horizontal distance
|
|
179
|
+
distance = np.sqrt(dx**2 + dy**2)
|
|
180
|
+
result = {"distance": distance, "dx": dx, "dy": dy}
|
|
181
|
+
return result if return_components else distance
|
|
182
|
+
|
|
183
|
+
elif method_distance == "latlon":
|
|
184
|
+
# Check if order of coords is correct, and swap if mismatched:
|
|
185
|
+
if hdim1_coord.lower() in COMMON_LON_COORDS:
|
|
186
|
+
hdim1_coord, hdim2_coord = hdim2_coord, hdim1_coord
|
|
187
|
+
|
|
188
|
+
distance = 1000 * haversine(
|
|
189
|
+
feature_1[hdim1_coord],
|
|
190
|
+
feature_1[hdim2_coord],
|
|
191
|
+
feature_2[hdim1_coord],
|
|
192
|
+
feature_2[hdim2_coord],
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
RADIUS_EARTH = 6378.0
|
|
196
|
+
lat1_r, lat2_r = np.radians(feature_1[hdim1_coord]), np.radians(
|
|
197
|
+
feature_2[hdim1_coord]
|
|
198
|
+
)
|
|
199
|
+
lon1_r, lon2_r = np.radians(feature_1[hdim2_coord]), np.radians(
|
|
200
|
+
feature_2[hdim2_coord]
|
|
201
|
+
)
|
|
202
|
+
dlat = lat2_r - lat1_r
|
|
203
|
+
dlon = lon2_r - lon1_r
|
|
204
|
+
dx = RADIUS_EARTH * dlon * np.cos((lat1_r + lat2_r) / 2) * 1000
|
|
205
|
+
dy = RADIUS_EARTH * dlat * 1000
|
|
206
|
+
|
|
207
|
+
if has_z:
|
|
208
|
+
dz = feature_2[vertical_coord] - feature_1[vertical_coord]
|
|
209
|
+
distance_3d = np.sqrt(distance**2 + dz**2)
|
|
210
|
+
result = {"distance_3d": distance_3d, "dx": dx, "dy": dy, "dz": dz}
|
|
211
|
+
return result if return_components else distance_3d
|
|
212
|
+
else:
|
|
213
|
+
result = {"distance": distance, "dx": dx, "dy": dy}
|
|
214
|
+
return result if return_components else distance
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def calculate_velocity_individual(
|
|
218
|
+
feature_old,
|
|
219
|
+
feature_new,
|
|
220
|
+
method_distance=None,
|
|
221
|
+
return_components=False,
|
|
222
|
+
vertical_coord=None,
|
|
223
|
+
use_3d: bool = False,
|
|
224
|
+
):
|
|
225
|
+
"""Calculate the mean velocity of a feature between two timeframes.
|
|
226
|
+
|
|
227
|
+
Parameters
|
|
228
|
+
----------
|
|
229
|
+
feature_old : pandas.Series
|
|
230
|
+
pandas.Series of a feature at a certain timeframe. Needs to
|
|
231
|
+
contain a 'time' column and either projection_x_coordinate
|
|
232
|
+
and projection_y_coordinate or latitude and longitude coordinates.
|
|
233
|
+
|
|
234
|
+
feature_new : pandas.Series
|
|
235
|
+
pandas.Series of the same feature at a later timeframe. Needs
|
|
236
|
+
to contain a 'time' column and either projection_x_coordinate
|
|
237
|
+
and projection_y_coordinate or latitude and longitude coordinates.
|
|
238
|
+
|
|
239
|
+
method_distance : {None, 'xy', 'latlon'}, optional
|
|
240
|
+
Method of distance calculation, used to calculate the velocity.
|
|
241
|
+
'xy' uses the length of the vector between the two features,
|
|
242
|
+
'latlon' uses the haversine distance. None checks wether the
|
|
243
|
+
required coordinates are present and starts with 'xy'.
|
|
244
|
+
Default is None.
|
|
245
|
+
|
|
246
|
+
return_components : bool, optional (default=False)
|
|
247
|
+
Flag to control whether the velocity is calculated and
|
|
248
|
+
returned as its vector components. If False, only the scalar
|
|
249
|
+
(absolute) value is returned. If True, the function returns a dictionary
|
|
250
|
+
containing the scalar value as well as its individual directional components
|
|
251
|
+
(e.g., 'vx', 'vy', 'vz' for velocity).
|
|
252
|
+
|
|
253
|
+
vertical_coord : str, optional (default=None)
|
|
254
|
+
Name of the column in the feature representing the vertical (z-axis) coordinate.
|
|
255
|
+
If the tracking data includes a vertical dimension, it is identified by checking for common names
|
|
256
|
+
such as 'z', 'height', or 'altitude'. If none of these are present, `vertical_coord` is set to None,
|
|
257
|
+
indicating that the data should be treated as 2D.
|
|
258
|
+
|
|
259
|
+
use_3d : bool, optional (default=False)
|
|
260
|
+
If True and a vertical coordinate is available, compute full 3D distances and velocities.
|
|
261
|
+
Otherwise, only 2D (horizontal) distances are used.
|
|
262
|
+
|
|
263
|
+
Returns
|
|
264
|
+
-------
|
|
265
|
+
velocity : float
|
|
266
|
+
Value of the approximate velocity.
|
|
267
|
+
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
distance_result = calculate_distance(
|
|
271
|
+
feature_old,
|
|
272
|
+
feature_new,
|
|
273
|
+
method_distance=method_distance,
|
|
274
|
+
return_components=return_components,
|
|
275
|
+
vertical_coord=vertical_coord,
|
|
276
|
+
use_3d=use_3d,
|
|
277
|
+
)
|
|
278
|
+
diff_time = (feature_new["time"] - feature_old["time"]).total_seconds()
|
|
279
|
+
|
|
280
|
+
if not np.isfinite(diff_time) or diff_time == 0:
|
|
281
|
+
msg = (
|
|
282
|
+
f"Velocity calculation skipped: Δt={diff_time} s "
|
|
283
|
+
f"between feature_old (time={feature_old['time']}) and feature_new (time={feature_new['time']})."
|
|
284
|
+
)
|
|
285
|
+
warnings.warn(msg, RuntimeWarning)
|
|
286
|
+
|
|
287
|
+
if return_components and isinstance(distance_result, dict):
|
|
288
|
+
out = {}
|
|
289
|
+
if "distance_3d" in distance_result:
|
|
290
|
+
out["v_3d"] = np.nan
|
|
291
|
+
if "distance" in distance_result:
|
|
292
|
+
out["v"] = np.nan
|
|
293
|
+
for key in ("dx", "dy", "dz"):
|
|
294
|
+
if key in distance_result:
|
|
295
|
+
out["v" + key[1:]] = np.nan
|
|
296
|
+
return out
|
|
297
|
+
return np.nan
|
|
298
|
+
|
|
299
|
+
if return_components and isinstance(distance_result, dict):
|
|
300
|
+
velocity = {}
|
|
301
|
+
if "distance_3d" in distance_result:
|
|
302
|
+
velocity["v_3d"] = distance_result["distance_3d"] / diff_time
|
|
303
|
+
if "distance" in distance_result:
|
|
304
|
+
velocity["v"] = distance_result["distance"] / diff_time
|
|
305
|
+
for key in ["dx", "dy", "dz"]:
|
|
306
|
+
if key in distance_result:
|
|
307
|
+
velocity["v" + key[1:]] = distance_result[key] / diff_time
|
|
308
|
+
return velocity
|
|
309
|
+
else:
|
|
310
|
+
return distance_result / diff_time
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def calculate_velocity(
|
|
314
|
+
track, method_distance=None, return_components=False, use_3d: bool = False
|
|
315
|
+
):
|
|
316
|
+
"""Calculate the velocities of a set of linked features.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
track : pandas.DataFrame
|
|
321
|
+
Dataframe of linked features, containing the columns 'cell',
|
|
322
|
+
'time' and either 'projection_x_coordinate' and
|
|
323
|
+
'projection_y_coordinate' or 'latitude' and 'longitude'.
|
|
324
|
+
|
|
325
|
+
method_distance : {None, 'xy', 'latlon'}, optional
|
|
326
|
+
Method of distance calculation, used to calculate the
|
|
327
|
+
velocity. 'xy' uses the length of the vector between the
|
|
328
|
+
two features, 'latlon' uses the haversine distance. None
|
|
329
|
+
checks wether the required coordinates are present and
|
|
330
|
+
starts with 'xy'. Default is None.
|
|
331
|
+
|
|
332
|
+
return_components : bool, optional (default=False)
|
|
333
|
+
Flag to control whether the velocity is calculated and
|
|
334
|
+
returned as its vector components. If False, only the scalar
|
|
335
|
+
(absolute) value is returned. If True, the function returns a dictionary
|
|
336
|
+
containing the scalar value as well as its individual directional components
|
|
337
|
+
(e.g., 'vx', 'vy', 'vz' for velocity).
|
|
338
|
+
|
|
339
|
+
use_3d : bool, optional (default=False)
|
|
340
|
+
If True and a vertical coordinate is available, compute full 3D distances and velocities.
|
|
341
|
+
Otherwise, only 2D (horizontal) distances are used.
|
|
342
|
+
|
|
343
|
+
Returns
|
|
344
|
+
-------
|
|
345
|
+
track : pandas.DataFrame
|
|
346
|
+
DataFrame from the input, with an additional column 'v',
|
|
347
|
+
contain the value of the velocity for every feature at
|
|
348
|
+
every possible timestep
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
# Check if data is 3d
|
|
352
|
+
for cand in ["z", "height", "altitude"]:
|
|
353
|
+
if cand in track:
|
|
354
|
+
vertical_coord = cand
|
|
355
|
+
break
|
|
356
|
+
else:
|
|
357
|
+
vertical_coord = None
|
|
358
|
+
|
|
359
|
+
for cell_i, track_i in track.groupby("cell"):
|
|
360
|
+
index = track_i.index.values
|
|
361
|
+
for i, index_i in enumerate(index[:-1]):
|
|
362
|
+
velocity = calculate_velocity_individual(
|
|
363
|
+
track_i.loc[index[i]],
|
|
364
|
+
track_i.loc[index[i + 1]],
|
|
365
|
+
method_distance=method_distance,
|
|
366
|
+
return_components=return_components,
|
|
367
|
+
vertical_coord=vertical_coord,
|
|
368
|
+
use_3d=use_3d,
|
|
369
|
+
)
|
|
370
|
+
if return_components and isinstance(velocity, dict):
|
|
371
|
+
for key, value in velocity.items():
|
|
372
|
+
track.at[index_i, key] = value
|
|
373
|
+
else:
|
|
374
|
+
key = "v_3d" if (use_3d and vertical_coord is not None) else "v"
|
|
375
|
+
track.at[index_i, key] = velocity
|
|
376
|
+
|
|
377
|
+
return track
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def calculate_nearestneighbordistance(features, method_distance=None):
|
|
381
|
+
"""Calculate the distance between a feature and the nearest other
|
|
382
|
+
feature in the same timeframe.
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
features : pandas.DataFrame
|
|
387
|
+
DataFrame of the features whose nearest neighbor distance is to
|
|
388
|
+
be calculated. Needs to contain either projection_x_coordinate
|
|
389
|
+
and projection_y_coordinate or latitude and longitude coordinates.
|
|
390
|
+
|
|
391
|
+
method_distance : {None, 'xy', 'latlon'}, optional
|
|
392
|
+
Method of distance calculation. 'xy' uses the length of the vector
|
|
393
|
+
between the two features, 'latlon' uses the haversine distance.
|
|
394
|
+
None checks wether the required coordinates are present and starts
|
|
395
|
+
with 'xy'. Default is None.
|
|
396
|
+
|
|
397
|
+
Returns
|
|
398
|
+
-------
|
|
399
|
+
features : pandas.DataFrame
|
|
400
|
+
DataFrame of the features with a new column 'min_distance',
|
|
401
|
+
containing the calculated minimal distance to other features.
|
|
402
|
+
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
features["min_distance"] = np.nan
|
|
406
|
+
for time_i, features_i in features.groupby("time"):
|
|
407
|
+
logging.debug(str(time_i))
|
|
408
|
+
indeces = combinations(features_i.index.values, 2)
|
|
409
|
+
# Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area)
|
|
410
|
+
distances = []
|
|
411
|
+
for index_1, index_2 in indeces:
|
|
412
|
+
if index_1 is not index_2:
|
|
413
|
+
distance = calculate_distance(
|
|
414
|
+
features_i.loc[index_1],
|
|
415
|
+
features_i.loc[index_2],
|
|
416
|
+
method_distance=method_distance,
|
|
417
|
+
)
|
|
418
|
+
distances.append(
|
|
419
|
+
pd.DataFrame(
|
|
420
|
+
{"index_1": index_1, "index_2": index_2, "distance": distance},
|
|
421
|
+
index=[0],
|
|
422
|
+
)
|
|
423
|
+
)
|
|
424
|
+
if any([x is not None for x in distances]):
|
|
425
|
+
distances = pd.concat(distances, ignore_index=True)
|
|
426
|
+
for i in features_i.index:
|
|
427
|
+
min_distance = distances.loc[
|
|
428
|
+
(distances["index_1"] == i) | (distances["index_2"] == i),
|
|
429
|
+
"distance",
|
|
430
|
+
].min()
|
|
431
|
+
features.at[i, "min_distance"] = min_distance
|
|
432
|
+
return features
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def calculate_areas_2Dlatlon(_2Dlat_coord, _2Dlon_coord):
|
|
436
|
+
"""Calculate an array of cell areas when given two 2D arrays
|
|
437
|
+
of latitude and longitude values
|
|
438
|
+
|
|
439
|
+
NOTE: This currently assuems that the lat/lon grid is orthogonal,
|
|
440
|
+
which is not strictly true! It's close enough for most cases, but
|
|
441
|
+
should be updated in future to use the cross product of the
|
|
442
|
+
distances to the neighbouring cells. This will require the use
|
|
443
|
+
of a more advanced calculation. I would advise using pyproj
|
|
444
|
+
at some point in the future to solve this issue and replace
|
|
445
|
+
haversine distance.
|
|
446
|
+
|
|
447
|
+
Parameters
|
|
448
|
+
----------
|
|
449
|
+
_2Dlat_coord : AuxCoord
|
|
450
|
+
Iris auxilliary coordinate containing a 2d grid of latitudes
|
|
451
|
+
for each point.
|
|
452
|
+
|
|
453
|
+
_2Dlon_coord : AuxCoord
|
|
454
|
+
Iris auxilliary coordinate containing a 2d grid of longitudes
|
|
455
|
+
for each point.
|
|
456
|
+
|
|
457
|
+
Returns
|
|
458
|
+
-------
|
|
459
|
+
area : ndarray
|
|
460
|
+
A numpy array approximating the area of each cell.
|
|
461
|
+
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
hdist1 = (
|
|
465
|
+
haversine(
|
|
466
|
+
_2Dlat_coord.points[:-1],
|
|
467
|
+
_2Dlon_coord.points[:-1],
|
|
468
|
+
_2Dlat_coord.points[1:],
|
|
469
|
+
_2Dlon_coord.points[1:],
|
|
470
|
+
)
|
|
471
|
+
* 1000
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
dists1 = np.zeros(_2Dlat_coord.points.shape)
|
|
475
|
+
dists1[0] = hdist1[0]
|
|
476
|
+
dists1[-1] = hdist1[-1]
|
|
477
|
+
dists1[1:-1] = (hdist1[0:-1] + hdist1[1:]) * 0.5
|
|
478
|
+
|
|
479
|
+
hdist2 = (
|
|
480
|
+
haversine(
|
|
481
|
+
_2Dlat_coord.points[:, :-1],
|
|
482
|
+
_2Dlon_coord.points[:, :-1],
|
|
483
|
+
_2Dlat_coord.points[:, 1:],
|
|
484
|
+
_2Dlon_coord.points[:, 1:],
|
|
485
|
+
)
|
|
486
|
+
* 1000
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
dists2 = np.zeros(_2Dlat_coord.points.shape)
|
|
490
|
+
dists2[:, 0] = hdist2[:, 0]
|
|
491
|
+
dists2[:, -1] = hdist2[:, -1]
|
|
492
|
+
dists2[:, 1:-1] = (hdist2[:, 0:-1] + hdist2[:, 1:]) * 0.5
|
|
493
|
+
|
|
494
|
+
area = dists1 * dists2
|
|
495
|
+
|
|
496
|
+
return area
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
@decorators.xarray_to_iris()
|
|
500
|
+
def calculate_area(features, mask, method_area=None, vertical_coord=None):
|
|
501
|
+
"""Calculate the area of the segments for each feature.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
features : pandas.DataFrame
|
|
506
|
+
DataFrame of the features whose area is to be calculated.
|
|
507
|
+
|
|
508
|
+
mask : iris.cube.Cube
|
|
509
|
+
Cube containing mask (int for tracked volumes 0 everywhere
|
|
510
|
+
else). Needs to contain either projection_x_coordinate and
|
|
511
|
+
projection_y_coordinate or latitude and longitude
|
|
512
|
+
coordinates.
|
|
513
|
+
|
|
514
|
+
method_area : {None, 'xy', 'latlon'}, optional
|
|
515
|
+
Flag determining how the area is calculated. 'xy' uses the
|
|
516
|
+
areas of the individual pixels, 'latlon' uses the
|
|
517
|
+
area_weights method of iris.analysis.cartography, None
|
|
518
|
+
checks wether the required coordinates are present and
|
|
519
|
+
starts with 'xy'. Default is None.
|
|
520
|
+
|
|
521
|
+
vertical_coord: None | str, optional (default: None)
|
|
522
|
+
Name of the vertical coordinate. If None, tries to auto-detect.
|
|
523
|
+
It looks for the coordinate or the dimension name corresponding
|
|
524
|
+
to the string.
|
|
525
|
+
|
|
526
|
+
Returns
|
|
527
|
+
-------
|
|
528
|
+
features : pandas.DataFrame
|
|
529
|
+
DataFrame of the features with a new column 'area',
|
|
530
|
+
containing the calculated areas.
|
|
531
|
+
|
|
532
|
+
Raises
|
|
533
|
+
------
|
|
534
|
+
ValueError
|
|
535
|
+
If neither latitude/longitude nor
|
|
536
|
+
projection_x_coordinate/projection_y_coordinate are
|
|
537
|
+
present in mask_coords.
|
|
538
|
+
|
|
539
|
+
If latitude/longitude coordinates are 2D.
|
|
540
|
+
|
|
541
|
+
If latitude/longitude shapes are not supported.
|
|
542
|
+
|
|
543
|
+
If method is undefined, i.e. method is neither None,
|
|
544
|
+
'xy' nor 'latlon'.
|
|
545
|
+
|
|
546
|
+
"""
|
|
547
|
+
|
|
548
|
+
features["area"] = np.nan
|
|
549
|
+
|
|
550
|
+
# Get the first time step of mask to remove time dimension of calculated areas
|
|
551
|
+
mask_slice = next(mask.slices_over("time"))
|
|
552
|
+
is_3d = len(mask_slice.core_data().shape) == 3
|
|
553
|
+
if is_3d:
|
|
554
|
+
vertical_coord_name = find_vertical_coord_name(mask_slice, vertical_coord)
|
|
555
|
+
# Need to get var_name as xarray uses this to label dims
|
|
556
|
+
collapse_dim = mask_slice.coords(vertical_coord_name)[0].var_name
|
|
557
|
+
else:
|
|
558
|
+
collapse_dim = None
|
|
559
|
+
|
|
560
|
+
mask_coords = [coord.name() for coord in mask_slice.coords()]
|
|
561
|
+
if method_area is None:
|
|
562
|
+
if ("projection_x_coordinate" in mask_coords) and (
|
|
563
|
+
"projection_y_coordinate" in mask_coords
|
|
564
|
+
):
|
|
565
|
+
method_area = "xy"
|
|
566
|
+
elif ("latitude" in mask_coords) and ("longitude" in mask_coords):
|
|
567
|
+
method_area = "latlon"
|
|
568
|
+
else:
|
|
569
|
+
raise ValueError(
|
|
570
|
+
"either latitude/longitude or projection_x_coordinate/projection_y_coordinate have to be present to calculate distances"
|
|
571
|
+
)
|
|
572
|
+
# logging.debug("calculating area using method " + method_area)
|
|
573
|
+
if method_area == "xy":
|
|
574
|
+
if not (
|
|
575
|
+
mask_slice.coord("projection_x_coordinate").has_bounds()
|
|
576
|
+
and mask_slice.coord("projection_y_coordinate").has_bounds()
|
|
577
|
+
):
|
|
578
|
+
mask_slice.coord("projection_x_coordinate").guess_bounds()
|
|
579
|
+
mask_slice.coord("projection_y_coordinate").guess_bounds()
|
|
580
|
+
area = np.outer(
|
|
581
|
+
np.diff(mask_slice.coord("projection_y_coordinate").bounds, axis=1),
|
|
582
|
+
np.diff(mask_slice.coord("projection_x_coordinate").bounds, axis=1),
|
|
583
|
+
)
|
|
584
|
+
elif method_area == "latlon":
|
|
585
|
+
if (mask_slice.coord("latitude").ndim == 1) and (
|
|
586
|
+
mask_slice.coord("longitude").ndim == 1
|
|
587
|
+
):
|
|
588
|
+
if not (
|
|
589
|
+
mask_slice.coord("latitude").has_bounds()
|
|
590
|
+
and mask_slice.coord("longitude").has_bounds()
|
|
591
|
+
):
|
|
592
|
+
mask_slice.coord("latitude").guess_bounds()
|
|
593
|
+
mask_slice.coord("longitude").guess_bounds()
|
|
594
|
+
area = area_weights(mask_slice, normalize=False)
|
|
595
|
+
elif (
|
|
596
|
+
mask_slice.coord("latitude").ndim == 2
|
|
597
|
+
and mask_slice.coord("longitude").ndim == 2
|
|
598
|
+
):
|
|
599
|
+
area = calculate_areas_2Dlatlon(
|
|
600
|
+
mask_slice.coord("latitude"), mask_slice.coord("longitude")
|
|
601
|
+
)
|
|
602
|
+
else:
|
|
603
|
+
raise ValueError("latitude/longitude coordinate shape not supported")
|
|
604
|
+
else:
|
|
605
|
+
raise ValueError("method undefined")
|
|
606
|
+
|
|
607
|
+
# Area needs to be a dataarray for get_statistics from mask, but otherwise dims/coords don't actually matter
|
|
608
|
+
area = xr.DataArray(area, dims=("a", "b"))
|
|
609
|
+
|
|
610
|
+
features = get_statistics_from_mask(
|
|
611
|
+
features,
|
|
612
|
+
mask,
|
|
613
|
+
area,
|
|
614
|
+
statistic={"area": np.sum},
|
|
615
|
+
default=np.nan,
|
|
616
|
+
collapse_dim=collapse_dim,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
return features
|