tobac 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tobac/__init__.py +112 -0
  2. tobac/analysis/__init__.py +31 -0
  3. tobac/analysis/cell_analysis.py +628 -0
  4. tobac/analysis/feature_analysis.py +212 -0
  5. tobac/analysis/spatial.py +619 -0
  6. tobac/centerofgravity.py +226 -0
  7. tobac/feature_detection.py +1758 -0
  8. tobac/merge_split.py +324 -0
  9. tobac/plotting.py +2321 -0
  10. tobac/segmentation/__init__.py +10 -0
  11. tobac/segmentation/watershed_segmentation.py +1316 -0
  12. tobac/testing.py +1179 -0
  13. tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
  14. tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
  15. tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
  16. tobac/tests/test_analysis_spatial.py +1109 -0
  17. tobac/tests/test_convert.py +265 -0
  18. tobac/tests/test_datetime.py +216 -0
  19. tobac/tests/test_decorators.py +148 -0
  20. tobac/tests/test_feature_detection.py +1321 -0
  21. tobac/tests/test_generators.py +273 -0
  22. tobac/tests/test_import.py +24 -0
  23. tobac/tests/test_iris_xarray_match_utils.py +244 -0
  24. tobac/tests/test_merge_split.py +351 -0
  25. tobac/tests/test_pbc_utils.py +497 -0
  26. tobac/tests/test_sample_data.py +197 -0
  27. tobac/tests/test_testing.py +747 -0
  28. tobac/tests/test_tracking.py +714 -0
  29. tobac/tests/test_utils.py +650 -0
  30. tobac/tests/test_utils_bulk_statistics.py +789 -0
  31. tobac/tests/test_utils_coordinates.py +328 -0
  32. tobac/tests/test_utils_internal.py +97 -0
  33. tobac/tests/test_xarray_utils.py +232 -0
  34. tobac/tracking.py +613 -0
  35. tobac/utils/__init__.py +27 -0
  36. tobac/utils/bulk_statistics.py +360 -0
  37. tobac/utils/datetime.py +184 -0
  38. tobac/utils/decorators.py +540 -0
  39. tobac/utils/general.py +753 -0
  40. tobac/utils/generators.py +87 -0
  41. tobac/utils/internal/__init__.py +2 -0
  42. tobac/utils/internal/coordinates.py +430 -0
  43. tobac/utils/internal/iris_utils.py +462 -0
  44. tobac/utils/internal/label_props.py +82 -0
  45. tobac/utils/internal/xarray_utils.py +439 -0
  46. tobac/utils/mask.py +364 -0
  47. tobac/utils/periodic_boundaries.py +419 -0
  48. tobac/wrapper.py +244 -0
  49. tobac-1.6.2.dist-info/METADATA +154 -0
  50. tobac-1.6.2.dist-info/RECORD +53 -0
  51. tobac-1.6.2.dist-info/WHEEL +5 -0
  52. tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
  53. tobac-1.6.2.dist-info/top_level.txt +1 -0
tobac/utils/general.py ADDED
@@ -0,0 +1,753 @@
1
+ """General tobac utilities"""
2
+
3
+ from __future__ import annotations
4
+ import copy
5
+ import logging
6
+ from typing import Callable, Optional, Union
7
+ from typing_extensions import Literal
8
+ import iris
9
+ import pandas as pd
10
+ import iris.cube
11
+
12
+ from . import internal as internal_utils
13
+ from . import decorators
14
+ import numpy as np
15
+ import sklearn
16
+ import sklearn.neighbors
17
+ import datetime
18
+ import xarray as xr
19
+ import warnings
20
+
21
+
22
+ def add_coordinates(
23
+ features: pd.DataFrame,
24
+ variable_cube: Union[xr.DataArray, iris.cube.Cube],
25
+ use_standard_names: Optional[bool] = None,
26
+ ) -> pd.DataFrame:
27
+ """Add coordinates from the input cube of the feature detection
28
+ to the trajectories/features.
29
+
30
+ :meta private:
31
+
32
+ Parameters
33
+ ----------
34
+ features : pandas.DataFrame
35
+ Trajectories/features from feature detection or linking step.
36
+
37
+ variable_cube : iris.cube.Cube
38
+ Input data used for the tracking with coordinate information
39
+ to transfer to the resulting DataFrame. Needs to contain the
40
+ coordinate 'time'.
41
+
42
+ use_standard_names: bool
43
+ If true, when interpolating a coordinate, it looks for a standard_name
44
+ and uses that to name the output coordinate, to mimic iris functionality.
45
+ If false, uses the actual name of the coordinate to output.
46
+
47
+ Returns
48
+ -------
49
+ pandas.DataFrame
50
+ Trajectories with added coordinates.
51
+
52
+ """
53
+
54
+ if isinstance(variable_cube, iris.cube.Cube):
55
+ return internal_utils.iris_utils.add_coordinates(features, variable_cube)
56
+ if isinstance(variable_cube, xr.DataArray):
57
+ return internal_utils.xr_utils.add_coordinates_to_features(
58
+ features,
59
+ variable_cube,
60
+ use_standard_names=use_standard_names,
61
+ )
62
+ raise ValueError(
63
+ "add_coordinates only supports xarray.DataArray and iris.cube.Cube"
64
+ )
65
+
66
+
67
+ def add_coordinates_3D(
68
+ t: pd.DataFrame,
69
+ variable_cube: Union[xr.DataArray, iris.cube.Cube],
70
+ vertical_coord: Union[str, int] = None,
71
+ vertical_axis: Union[int, None] = None,
72
+ assume_coords_fixed_in_time: bool = True,
73
+ use_standard_names: Optional[bool] = None,
74
+ ):
75
+ """Function adding coordinates from the tracking cube to the trajectories
76
+ for the 3D case: time, longitude&latitude, x&y dimensions, and altitude
77
+
78
+ Parameters
79
+ ----------
80
+ t: pandas DataFrame
81
+ Input features
82
+ variable_cube: iris.cube.Cube
83
+ Cube (usually the one you are tracking on) at least conaining the dimension of 'time'.
84
+ Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate',
85
+ and 'altitude' (if 3D) are the coordinates that we expect, although this function
86
+ will happily interpolate along any dimension coordinates you give.
87
+ vertical_coord: str or int
88
+ Name or axis number of the vertical coordinate. If None, tries to auto-detect.
89
+ If it is a string, it looks for the coordinate or the dimension name corresponding
90
+ to the string. If it is an int, it assumes that it is the vertical axis.
91
+ Note that if you only have a 2D or 3D coordinate for altitude, you must
92
+ pass in an int.
93
+ vertical_axis: int or None
94
+ Axis number of the vertical.
95
+ assume_coords_fixed_in_time: bool
96
+ If true, it assumes that the coordinates are fixed in time, even if the
97
+ coordinates say they vary in time. This is, by default, True, to preserve
98
+ legacy functionality. If False, it assumes that if a coordinate says
99
+ it varies in time, it takes the coordinate at its word.
100
+ use_standard_names: bool
101
+ If true, when interpolating a coordinate, it looks for a standard_name
102
+ and uses that to name the output coordinate, to mimic iris functionality.
103
+ If false, uses the actual name of the coordinate to output.
104
+
105
+ Returns
106
+ -------
107
+ pandas DataFrame
108
+ trajectories with added coordinates
109
+ """
110
+ if isinstance(variable_cube, iris.cube.Cube):
111
+ return internal_utils.iris_utils.add_coordinates_3D(
112
+ t, variable_cube, vertical_coord, vertical_axis, assume_coords_fixed_in_time
113
+ )
114
+ if isinstance(variable_cube, xr.DataArray):
115
+ return internal_utils.xr_utils.add_coordinates_to_features(
116
+ t,
117
+ variable_cube,
118
+ vertical_coord=vertical_coord,
119
+ vertical_axis=vertical_axis,
120
+ use_standard_names=use_standard_names,
121
+ )
122
+ raise ValueError(
123
+ "add_coordinates_3D only supports xarray.DataArray and iris.cube.Cube"
124
+ )
125
+
126
+
127
+ def get_bounding_box(x, buffer=1):
128
+ """Finds the bounding box of a ndarray
129
+
130
+ This is the smallest bounding rectangle for nonzero values as explained here:
131
+ https://stackoverflow.com/questions/31400769/bounding-box-of-numpy-array
132
+
133
+ Parameters
134
+ ----------
135
+ x : numpy.ndarray
136
+ Array for which the bounding box is to be determined.
137
+ buffer : int, optional
138
+ Number to set a buffer between the nonzero values and
139
+ the edges of the box. Default is 1.
140
+
141
+ Returns
142
+ -------
143
+ bbox : list
144
+ Dimensionwise list of the indices representing the edges
145
+ of the bounding box.
146
+ """
147
+
148
+ from numpy import delete, arange, diff, nonzero, array
149
+
150
+ mask = x == 0
151
+
152
+ bbox = []
153
+ all_axis = arange(x.ndim)
154
+ # loop over dimensions
155
+ for kdim in all_axis:
156
+ nk_dim = delete(all_axis, kdim)
157
+ mask_i = mask.all(axis=tuple(nk_dim))
158
+ dmask_i = diff(mask_i)
159
+ idx_i = nonzero(dmask_i)[0]
160
+ # for case where there is no value in idx_i
161
+ if len(idx_i) == 0:
162
+ idx_i = array([0, x.shape[kdim] - 1])
163
+ # for case where there is only one value in idx_i
164
+ elif len(idx_i) == 1:
165
+ idx_i = array([idx_i, idx_i])
166
+ # make sure there is two values in idx_i
167
+ elif len(idx_i) > 2:
168
+ idx_i = array([idx_i[0], idx_i[-1]])
169
+ # caluclate min and max values for idx_i and append them to list
170
+ idx_min = max(0, idx_i[0] + 1 - buffer)
171
+ idx_max = min(x.shape[kdim] - 1, idx_i[1] + 1 + buffer)
172
+ bbox.append([idx_min, idx_max])
173
+ return bbox
174
+
175
+
176
+ @decorators.xarray_to_iris()
177
+ def get_spacings(
178
+ field_in, grid_spacing=None, time_spacing=None, average_method="arithmetic"
179
+ ):
180
+ """Determine spatial and temporal grid spacing of the
181
+ input data.
182
+
183
+ Parameters
184
+ ----------
185
+ field_in : iris.cube.Cube
186
+ Input field where to get spacings.
187
+
188
+ grid_spacing : float, optional
189
+ Manually sets the grid spacing if specified.
190
+ Default is None.
191
+
192
+ time_spacing : float, optional
193
+ Manually sets the time spacing if specified.
194
+ Default is None.
195
+
196
+ average_method : string, optional
197
+ Defines how spacings in x- and y-direction are
198
+ combined.
199
+
200
+ - 'arithmetic' : standard arithmetic mean like (dx+dy)/2
201
+ - 'geometric' : geometric mean; conserves gridbox area
202
+
203
+ Default is 'arithmetic'.
204
+
205
+
206
+ Returns
207
+ -------
208
+ dxy : float
209
+ Grid spacing in metres.
210
+
211
+ dt : float
212
+ Time resolution in seconds.
213
+
214
+ Raises
215
+ ------
216
+ ValueError
217
+ If input_cube does not contain projection_x_coord and
218
+ projection_y_coord or keyword argument grid_spacing.
219
+
220
+ """
221
+
222
+ from copy import deepcopy
223
+
224
+ # set horizontal grid spacing of input data
225
+ # If cartesian x and y corrdinates are present, use these to determine dxy (vertical grid spacing used to transfer pixel distances to real distances):
226
+ coord_names = [coord.name() for coord in field_in.coords()]
227
+
228
+ if (
229
+ "projection_x_coordinate" in coord_names
230
+ and "projection_y_coordinate" in coord_names
231
+ ) and (grid_spacing is None):
232
+ x_coord = deepcopy(field_in.coord("projection_x_coordinate"))
233
+ x_coord.convert_units("metre")
234
+ dx = np.diff(x_coord[0:2].points)[0]
235
+ y_coord = deepcopy(field_in.coord("projection_y_coordinate"))
236
+ y_coord.convert_units("metre")
237
+ dy = np.diff(y_coord[0:2].points)[0]
238
+
239
+ if average_method == "arithmetic":
240
+ dxy = 0.5 * (np.abs(dx) + np.abs(dy))
241
+ elif average_method == "geometric":
242
+ dxy = np.sqrt(np.abs(dx) * np.abs(dy))
243
+
244
+ elif grid_spacing is not None:
245
+ dxy = grid_spacing
246
+ else:
247
+ raise ValueError(
248
+ "no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing"
249
+ )
250
+
251
+ # set horizontal grid spacing of input data
252
+ if time_spacing is None:
253
+ # get time resolution of input data from first to steps of input cube:
254
+ time_coord = field_in.coord("time")
255
+ dt = (
256
+ time_coord.units.num2date(time_coord.points[1])
257
+ - time_coord.units.num2date(time_coord.points[0])
258
+ ).seconds
259
+ elif time_spacing is not None:
260
+ # use value of time_spacing for dt:
261
+ dt = time_spacing
262
+ else:
263
+ raise ValueError(
264
+ "no information about time spacing, need either input cube with time or keyword argument time_spacing"
265
+ )
266
+ return dxy, dt
267
+
268
+
269
+ def spectral_filtering(
270
+ dxy, field_in, lambda_min, lambda_max, return_transfer_function=False
271
+ ):
272
+ """This function creates and applies a 2D transfer function that
273
+ can be used as a bandpass filter to remove certain wavelengths
274
+ of an atmospheric input field (e.g. vorticity, IVT, etc).
275
+
276
+ Parameters:
277
+ -----------
278
+ dxy : float
279
+ Grid spacing in m.
280
+
281
+ field_in: numpy.array
282
+ 2D field with input data.
283
+
284
+ lambda_min: float
285
+ Minimum wavelength in m.
286
+
287
+ lambda_max: float
288
+ Maximum wavelength in m.
289
+
290
+ return_transfer_function: boolean, optional
291
+ default: False. If set to True, then the 2D transfer function and
292
+ the corresponding wavelengths are returned.
293
+
294
+ Returns:
295
+ --------
296
+ filtered_field: numpy.array
297
+ Spectrally filtered 2D field of data (with same shape as input data).
298
+
299
+ transfer_function: tuple
300
+ Two 2D fields, where the first one corresponds to the wavelengths
301
+ in the spectral space of the domain and the second one to the 2D
302
+ transfer function of the bandpass filter. Only returned, if
303
+ return_transfer_function is True.
304
+ """
305
+
306
+ from scipy import signal
307
+ from scipy import fft
308
+
309
+ # check if valid value for dxy is given
310
+ if dxy <= 0:
311
+ raise ValueError(
312
+ "Invalid value for dxy. Please provide the grid spacing in meter."
313
+ )
314
+
315
+ # get number of grid cells in x and y direction
316
+ Ni = field_in.shape[-2]
317
+ Nj = field_in.shape[-1]
318
+ # wavenumber space
319
+ m, n = np.meshgrid(np.arange(Ni), np.arange(Nj), indexing="ij")
320
+
321
+ # if domain is squared:
322
+ if Ni == Nj:
323
+ wavenumber = np.sqrt(m**2 + n**2)
324
+ lambda_mn = (2 * Ni * (dxy)) / wavenumber
325
+ else:
326
+ # if domain is a rectangle:
327
+ # alpha is the normalized wavenumber in wavenumber space
328
+ alpha = np.sqrt(m**2 / Ni**2 + n**2 / Nj**2)
329
+ # compute wavelengths for target grid in m
330
+ lambda_mn = 2 * dxy / alpha
331
+
332
+ ############### create a 2D bandpass filter (butterworth) #######################
333
+ b, a = signal.iirfilter(
334
+ 2,
335
+ [1 / lambda_max, 1 / lambda_min],
336
+ btype="band",
337
+ ftype="butter",
338
+ fs=1 / dxy,
339
+ output="ba",
340
+ )
341
+ w, h = signal.freqz(b, a, 1 / lambda_mn.flatten(), fs=1 / dxy)
342
+ transfer_function = np.reshape(abs(h), lambda_mn.shape)
343
+
344
+ # 2-dimensional discrete cosine transformation to convert data to spectral space
345
+ spectral = fft.dctn(field_in.data)
346
+ # multiplication of spectral coefficients with transfer function
347
+ filtered = spectral * transfer_function
348
+ # inverse discrete cosine transformation to go back from spectral to original space
349
+ filtered_field = fft.idctn(filtered)
350
+
351
+ if return_transfer_function is True:
352
+ return (lambda_mn, transfer_function), filtered_field
353
+ else:
354
+ return filtered_field
355
+
356
+
357
+ def combine_tobac_feats(list_of_feats, preserve_old_feat_nums=None):
358
+ """WARNING: This function has been deprecated and will be removed in a future
359
+ release, please use 'combine_feature_dataframes' instead
360
+
361
+ Function to combine a list of tobac feature detection dataframes
362
+ into one combined dataframe that can be used for tracking
363
+ or segmentation.
364
+
365
+ :meta private:
366
+
367
+ Parameters
368
+ ----------
369
+ list_of_feats: array-like of Pandas DataFrames
370
+ A list of dataframes (generated, for example, by
371
+ running feature detection on multiple nodes).
372
+ preserve_old_feat_nums: str or None
373
+ The column name to preserve old feature numbers in. If None, these
374
+ old numbers will be deleted. Users may want to enable this feature
375
+ if they have run segmentation with the separate dataframes and
376
+ therefore old feature numbers.
377
+ Returns
378
+ -------
379
+ pd.DataFrame
380
+ One combined DataFrame."""
381
+ import warnings
382
+
383
+ warnings.warn(
384
+ "This function has been deprecated and will be removed in a future release, please use 'combine_feature_dataframes' instead",
385
+ DeprecationWarning,
386
+ )
387
+
388
+ return combine_feature_dataframes(
389
+ list_of_feats, old_feature_column_name=preserve_old_feat_nums
390
+ )
391
+
392
+
393
+ def combine_feature_dataframes(
394
+ feature_df_list,
395
+ renumber_features=True,
396
+ old_feature_column_name=None,
397
+ sort_features_by=None,
398
+ ):
399
+ """Function to combine a list of tobac feature detection dataframes
400
+ into one combined dataframe that can be used for tracking
401
+ or segmentation.
402
+
403
+ Parameters
404
+ ----------
405
+ feature_df_list: array-like of Pandas DataFrames
406
+ A list of dataframes (generated, for example, by
407
+ running feature detection on multiple nodes).
408
+ renumber_features: bool, optional (default: True)
409
+ If true, features are renumber with contiguous integers. If false, the
410
+ old feature numbers will be retained, but an exception will be raised if
411
+ there are any non-unique feature numbers. If you have non-unique feature
412
+ numbers and want to preserve them, use the old_feature_column_name to
413
+ save the old feature numbers to under a different column name.
414
+ old_feature_column_name: str or None, optional (default: None)
415
+ The column name to preserve old feature numbers in. If None, these
416
+ old numbers will be deleted. Users may want to enable this feature
417
+ if they have run segmentation with the separate dataframes and
418
+ therefore old feature numbers.
419
+ sort_features_by: list, str or None, optional (default: None)
420
+ The sorting order to pass to Dataframe.sort_values for the merged
421
+ dataframe. If None, will default to ["frame", "idx"] if
422
+ renumber_features is True, or "feature" if renumber_features is False.
423
+
424
+ Returns
425
+ -------
426
+ pd.DataFrame
427
+ One combined DataFrame.
428
+ """
429
+ import pandas as pd
430
+
431
+ # first, let's just combine these.
432
+ combined_df = pd.concat(feature_df_list)
433
+
434
+ if not renumber_features and np.any(
435
+ np.bincount(combined_df["feature"] + np.nanmin(combined_df["feature"])) > 1
436
+ ):
437
+ error = ValueError(
438
+ "Non-unique feature values detected. Combining feature dataframes with original feature numbers cannot be performed because duplicate feature numbers exist, please use 'renumber_features=True'. If you would like to preserve the original feature numbers, please use the 'old_feature_column_name' keyword to define a new column for these values in the returned dataframe"
439
+ )
440
+ # error.add_note(
441
+ # "Combining feature dataframes with original feature numbers cannot be performed because duplicate feature numbers exist, please use 'renumber_features=True'"
442
+ # )
443
+ # error.add_note(
444
+ # "If you would like to preserve the original feature numbers, please use the 'old_feature_column_name' keyword to define a new column for these values in the returned dataframe"
445
+ # )
446
+ raise error
447
+
448
+ if sort_features_by is None:
449
+ if renumber_features:
450
+ sort_features_by = ["frame", "idx"]
451
+ else:
452
+ sort_features_by = "feature"
453
+ # # Then, sort by time first, then by feature number
454
+ # combined_df = combined_df.sort_values(["time", "feature"])
455
+ # Save the old feature numbers if requested.
456
+ if old_feature_column_name is not None:
457
+ combined_df[old_feature_column_name] = combined_df["feature"]
458
+ # count_per_time = combined_feats.groupby('time')['index'].count()
459
+ original_frame_dtype = combined_df["frame"].dtype
460
+ combined_df["frame"] = (
461
+ combined_df["time"].rank(method="dense").astype(original_frame_dtype) - 1
462
+ )
463
+ combined_sorted = combined_df.sort_values(sort_features_by, ignore_index=True)
464
+ if renumber_features:
465
+ original_feature_dtype = combined_df["feature"].dtype
466
+ combined_sorted["feature"] = np.arange(
467
+ 1, len(combined_sorted) + 1, dtype=original_feature_dtype
468
+ )
469
+ combined_sorted = combined_sorted.reset_index(drop=True)
470
+ return combined_sorted
471
+
472
+
473
+ @decorators.irispandas_to_xarray()
474
+ def transform_feature_points(
475
+ features,
476
+ new_dataset,
477
+ latitude_name=None,
478
+ longitude_name=None,
479
+ altitude_name=None,
480
+ max_time_away=None,
481
+ max_space_away=None,
482
+ max_vspace_away=None,
483
+ warn_dropped_features=True,
484
+ ):
485
+ """Function to transform input feature dataset horizontal grid points to a different grid.
486
+ The typical use case for this function is to transform detected features to perform
487
+ segmentation on a different grid.
488
+
489
+ The existing feature dataset must have some latitude/longitude coordinates associated
490
+ with each feature, and the new_dataset must have latitude/longitude available with
491
+ the same name. Note that due to xarray/iris incompatibilities, we suggest that the
492
+ input coordinates match the standard_name from Iris.
493
+
494
+ Parameters
495
+ ----------
496
+ features: pd.DataFrame
497
+ Input feature dataframe
498
+ new_dataset: iris.cube.Cube or xarray
499
+ The dataset to transform the
500
+ latitude_name: str
501
+ The name of the latitude coordinate. If None, tries to auto-detect.
502
+ longitude_name: str
503
+ The name of the longitude coordinate. If None, tries to auto-detect.
504
+ altitude_name: str
505
+ The name of the altitude coordinate. If None, tries to auto-detect.
506
+ max_time_away: datetime.timedelta
507
+ The maximum time delta to associate feature points away from.
508
+ max_space_away: float
509
+ The maximum horizontal distance (in meters) to transform features to.
510
+ max_vspace_away: float
511
+ The maximum vertical distance (in meters) to transform features to.
512
+ warn_dropped_features: bool
513
+ Whether or not to print a warning message if one of the max_* options is
514
+ going to result in features that are dropped.
515
+ Returns
516
+ -------
517
+ transformed_features: pd.DataFrame
518
+ A new feature dataframe, with the coordinates transformed to
519
+ the new grid, suitable for use in segmentation
520
+
521
+ """
522
+
523
+ RADIUS_EARTH_M = 6371000
524
+ is_3D = "vdim" in features
525
+ if is_3D:
526
+ vert_coord = internal_utils.find_vertical_coord_name(new_dataset, altitude_name)
527
+
528
+ lat_coord, lon_coord = internal_utils.detect_latlon_coord_name(
529
+ new_dataset, latitude_name=latitude_name, longitude_name=longitude_name
530
+ )
531
+
532
+ if lat_coord not in features or lon_coord not in features:
533
+ raise ValueError("Cannot find latitude and/or longitude coordinate")
534
+
535
+ lat_vals_new = new_dataset[lat_coord].values
536
+ lon_vals_new = new_dataset[lon_coord].values
537
+
538
+ if len(lat_vals_new.shape) != len(lon_vals_new.shape):
539
+ raise ValueError(
540
+ "Cannot work with lat/lon coordinates of unequal dimensionality"
541
+ )
542
+
543
+ # the lat/lons must be a 2D grid, so if they aren't, make them one.
544
+ if len(lat_vals_new.shape) == 1:
545
+ lon_vals_new, lat_vals_new = np.meshgrid(lon_vals_new, lat_vals_new)
546
+
547
+ # we have to convert to radians because scikit-learn's haversine
548
+ # requires that the input be in radians.
549
+ flat_lats = np.deg2rad(lat_vals_new.ravel())
550
+ flat_lons = np.deg2rad(lon_vals_new.ravel())
551
+
552
+ # we have to drop NaN values.
553
+ either_nan = np.logical_or(np.isnan(flat_lats), np.isnan(flat_lons))
554
+ # we need to remember where these values are in the array so that we can
555
+ # appropriately unravel them.
556
+ loc_arr_trimmed = np.where(np.logical_not(either_nan))[0]
557
+ flat_lats_nona = flat_lats[~either_nan]
558
+ flat_lons_nona = flat_lons[~either_nan]
559
+ ll_tree = sklearn.neighbors.BallTree(
560
+ np.array([flat_lats_nona, flat_lons_nona]).T, metric="haversine"
561
+ )
562
+
563
+ ret_features = copy.deepcopy(features)
564
+
565
+ # there is almost certainly room for speedup in here.
566
+ rad_lats = np.deg2rad(features[lat_coord])
567
+ rad_lons = np.deg2rad(features[lon_coord])
568
+ dists, closest_pts = ll_tree.query(np.column_stack((rad_lats, rad_lons)))
569
+ unraveled_h1, unraveled_h2 = np.unravel_index(
570
+ loc_arr_trimmed[closest_pts[:, 0]], np.shape(lat_vals_new)
571
+ )
572
+
573
+ ret_features["hdim_1"] = ("index", unraveled_h1)
574
+ ret_features["hdim_2"] = ("index", unraveled_h2)
575
+
576
+ # now interpolate vertical, if available.
577
+ if is_3D and max_space_away is not None and max_vspace_away is not None:
578
+ alt_tree = sklearn.neighbors.BallTree(
579
+ new_dataset[vert_coord].values[:, np.newaxis]
580
+ )
581
+ alt_dists, closest_alt_pts = alt_tree.query(
582
+ features[vert_coord].values[:, np.newaxis]
583
+ )
584
+ ret_features["vdim"] = ("index", closest_alt_pts[:, 0])
585
+
586
+ dist_cond = xr.DataArray(
587
+ np.logical_or(
588
+ (dists[:, 0] * RADIUS_EARTH_M) < max_space_away,
589
+ alt_dists[:, 0] < max_vspace_away,
590
+ ),
591
+ dims="index",
592
+ )
593
+ elif max_space_away is not None:
594
+ dist_cond = xr.DataArray(
595
+ (dists[:, 0] * RADIUS_EARTH_M) < max_space_away, dims="index"
596
+ )
597
+
598
+ if max_space_away is not None or max_vspace_away is not None:
599
+ ret_features = ret_features.where(dist_cond, drop=True)
600
+
601
+ # force times to match, where appropriate.
602
+ if "time" in new_dataset.coords and max_time_away is not None:
603
+ # this is necessary due to the iris/xarray/pandas weirdness that we have.
604
+ old_feat_times = ret_features["time"].astype("datetime64[s]")
605
+ new_dataset_times = new_dataset["time"].astype("datetime64[s]")
606
+ closest_times = np.min(np.abs(old_feat_times - new_dataset_times), axis=1)
607
+ closest_time_locs = np.abs(old_feat_times - new_dataset_times).argmin(axis=1)
608
+ # force to seconds to deal with iris not accepting ms
609
+ ret_features["time"] = new_dataset["time"][closest_time_locs].astype(
610
+ "datetime64[s]"
611
+ )
612
+ ret_features = ret_features.where(
613
+ closest_times < np.timedelta64(max_time_away), drop=True
614
+ )
615
+
616
+ if warn_dropped_features:
617
+ removed_features = np.setdiff1d(features["feature"], ret_features["feature"])
618
+ if len(removed_features):
619
+ warnings.warn(
620
+ "Dropping feature numbers: " + str(removed_features.tolist()),
621
+ UserWarning,
622
+ )
623
+
624
+ # make sure that feature points are converted back to int64
625
+ ret_features["feature"] = ret_features.feature.astype(int)
626
+
627
+ return ret_features
628
+
629
+
630
+ def standardize_track_dataset(TrackedFeatures, Mask, Projection=None):
631
+ """Combine a feature mask with the feature data table into a common dataset returned by tobac.segmentation
632
+
633
+ CAUTION: this function is experimental. No data structures output are guaranteed to be supported in future versions of tobac.
634
+ with the TrackedFeatures dataset returned by tobac.linking_trackpy.
635
+ Also rename the variables to be more descriptive and comply with cf-tree.
636
+ Convert the default cell parent ID to an integer table.
637
+ Add a cell dimension to reflect
638
+ Projection is an xarray DataArray
639
+ TODO: Add metadata attributes
640
+
641
+ Parameters
642
+ ----------
643
+ TrackedFeatures : xarray.core.dataset.Dataset
644
+ xarray dataset of tobac Track information, the xarray dataset returned by tobac.tracking.linking_trackpy
645
+ Mask: xarray.core.dataset.Dataset
646
+ xarray dataset of tobac segmentation mask information, the xarray dataset returned
647
+ by tobac.segmentation.segmentation
648
+ Projection : xarray.core.dataarray.DataArray, default = None
649
+ array.DataArray of the original input dataset (gridded nexrad data for example).
650
+ If using gridded nexrad data, this can be input as: data['ProjectionCoordinateSystem']
651
+ An example of the type of information in the dataarray includes the following attributes:
652
+ latitude_of_projection_origin :29.471900939941406
653
+ longitude_of_projection_origin :-95.0787353515625
654
+ _CoordinateTransformType :Projection
655
+ _CoordinateAxes :x y z time
656
+ _CoordinateAxesTypes :GeoX GeoY Height Time
657
+ grid_mapping_name :azimuthal_equidistant
658
+ semi_major_axis :6370997.0
659
+ inverse_flattening :298.25
660
+ longitude_of_prime_meridian :0.0
661
+ false_easting :0.0
662
+ false_northing :0.0
663
+
664
+ Returns
665
+ -------
666
+ ds : xarray.core.dataset.Dataset
667
+ xarray dataset of merged Track and Segmentation Mask datasets with renamed variables.
668
+ """
669
+ import xarray as xr
670
+
671
+ feature_standard_names = {
672
+ # new variable name, and long description for the NetCDF attribute
673
+ "frame": (
674
+ "feature_time_index",
675
+ "positional index of the feature along the time dimension of the mask, from 0 to N-1",
676
+ ),
677
+ "hdim_1": (
678
+ "feature_hdim1_coordinate",
679
+ "position of the feature along the first horizontal dimension in grid point space; a north-south coordinate for dim order (time, y, x)."
680
+ "The numbering is consistent with positional indexing of the coordinate, but can be"
681
+ "fractional, to account for a centroid not aligned to the grid.",
682
+ ),
683
+ "hdim_2": (
684
+ "feature_hdim2_coordinate",
685
+ "position of the feature along the second horizontal dimension in grid point space; an east-west coordinate for dim order (time, y, x)"
686
+ "The numbering is consistent with positional indexing of the coordinate, but can be"
687
+ "fractional, to account for a centroid not aligned to the grid.",
688
+ ),
689
+ "idx": ("feature_id_this_frame",),
690
+ "num": (
691
+ "feature_grid_cell_count",
692
+ "Number of grid points that are within the threshold of this feature",
693
+ ),
694
+ "threshold_value": (
695
+ "feature_threshold_max",
696
+ "Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments",
697
+ ),
698
+ "feature": (
699
+ "feature",
700
+ "Unique number of the feature; starts from 1 and increments by 1 to the number of features",
701
+ ),
702
+ "time": (
703
+ "feature_time",
704
+ "time of the feature, consistent with feature_time_index",
705
+ ),
706
+ "timestr": (
707
+ "feature_time_str",
708
+ "String representation of the feature time, YYYY-MM-DD HH:MM:SS",
709
+ ),
710
+ "projection_y_coordinate": (
711
+ "feature_projection_y_coordinate",
712
+ "y position of the feature in the projection given by ProjectionCoordinateSystem",
713
+ ),
714
+ "projection_x_coordinate": (
715
+ "feature_projection_x_coordinate",
716
+ "x position of the feature in the projection given by ProjectionCoordinateSystem",
717
+ ),
718
+ "lat": ("feature_latitude", "latitude of the feature"),
719
+ "lon": ("feature_longitude", "longitude of the feature"),
720
+ "ncells": (
721
+ "feature_ncells",
722
+ "number of grid cells for this feature (meaning uncertain)",
723
+ ),
724
+ "areas": ("feature_area",),
725
+ "isolated": ("feature_isolation_flag",),
726
+ "num_objects": ("number_of_feature_neighbors",),
727
+ "cell": ("feature_parent_cell_id",),
728
+ "time_cell": ("feature_parent_cell_elapsed_time",),
729
+ "segmentation_mask": ("2d segmentation mask",),
730
+ }
731
+ new_feature_var_names = {
732
+ k: feature_standard_names[k][0]
733
+ for k in feature_standard_names.keys()
734
+ if k in TrackedFeatures.variables.keys()
735
+ }
736
+
737
+ # TrackedFeatures = TrackedFeatures.drop(["cell_parent_track_id"])
738
+ # Combine Track and Mask variables. Use the 'feature' variable as the coordinate variable instead of
739
+ # the 'index' variable and call the dimension 'feature'
740
+ ds = xr.merge(
741
+ [
742
+ TrackedFeatures.swap_dims({"index": "feature"})
743
+ .drop("index")
744
+ .rename_vars(new_feature_var_names),
745
+ Mask,
746
+ ]
747
+ )
748
+
749
+ # Add the projection data back in
750
+ if Projection is not None:
751
+ ds["ProjectionCoordinateSystem"] = Projection
752
+
753
+ return ds