tobac 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tobac/__init__.py +112 -0
  2. tobac/analysis/__init__.py +31 -0
  3. tobac/analysis/cell_analysis.py +628 -0
  4. tobac/analysis/feature_analysis.py +212 -0
  5. tobac/analysis/spatial.py +619 -0
  6. tobac/centerofgravity.py +226 -0
  7. tobac/feature_detection.py +1758 -0
  8. tobac/merge_split.py +324 -0
  9. tobac/plotting.py +2321 -0
  10. tobac/segmentation/__init__.py +10 -0
  11. tobac/segmentation/watershed_segmentation.py +1316 -0
  12. tobac/testing.py +1179 -0
  13. tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
  14. tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
  15. tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
  16. tobac/tests/test_analysis_spatial.py +1109 -0
  17. tobac/tests/test_convert.py +265 -0
  18. tobac/tests/test_datetime.py +216 -0
  19. tobac/tests/test_decorators.py +148 -0
  20. tobac/tests/test_feature_detection.py +1321 -0
  21. tobac/tests/test_generators.py +273 -0
  22. tobac/tests/test_import.py +24 -0
  23. tobac/tests/test_iris_xarray_match_utils.py +244 -0
  24. tobac/tests/test_merge_split.py +351 -0
  25. tobac/tests/test_pbc_utils.py +497 -0
  26. tobac/tests/test_sample_data.py +197 -0
  27. tobac/tests/test_testing.py +747 -0
  28. tobac/tests/test_tracking.py +714 -0
  29. tobac/tests/test_utils.py +650 -0
  30. tobac/tests/test_utils_bulk_statistics.py +789 -0
  31. tobac/tests/test_utils_coordinates.py +328 -0
  32. tobac/tests/test_utils_internal.py +97 -0
  33. tobac/tests/test_xarray_utils.py +232 -0
  34. tobac/tracking.py +613 -0
  35. tobac/utils/__init__.py +27 -0
  36. tobac/utils/bulk_statistics.py +360 -0
  37. tobac/utils/datetime.py +184 -0
  38. tobac/utils/decorators.py +540 -0
  39. tobac/utils/general.py +753 -0
  40. tobac/utils/generators.py +87 -0
  41. tobac/utils/internal/__init__.py +2 -0
  42. tobac/utils/internal/coordinates.py +430 -0
  43. tobac/utils/internal/iris_utils.py +462 -0
  44. tobac/utils/internal/label_props.py +82 -0
  45. tobac/utils/internal/xarray_utils.py +439 -0
  46. tobac/utils/mask.py +364 -0
  47. tobac/utils/periodic_boundaries.py +419 -0
  48. tobac/wrapper.py +244 -0
  49. tobac-1.6.2.dist-info/METADATA +154 -0
  50. tobac-1.6.2.dist-info/RECORD +53 -0
  51. tobac-1.6.2.dist-info/WHEEL +5 -0
  52. tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
  53. tobac-1.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,439 @@
1
+ """Internal tobac utilities for xarray datasets/dataarrays"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ from typing import Union
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ import xarray as xr
11
+ from . import coordinates as tb_utils_gi
12
+
13
+
14
+ def find_axis_from_dim_coord(
15
+ in_da: xr.DataArray, dim_coord_name: str
16
+ ) -> Union[int, None]:
17
+ """Finds the axis number in an xarray dataarray given a coordinate or
18
+ dimension name.
19
+
20
+ Parameters
21
+ ----------
22
+ in_da: xarray.DataArray
23
+ Input variable array
24
+ dim_coord_name: str
25
+ coordinate or dimension to look for
26
+
27
+ Returns
28
+ -------
29
+ axis_number: int
30
+ the number of the axis of the given coordinate
31
+
32
+ Raises
33
+ ------
34
+ ValueError
35
+ Returns ValueError if there are more than one matching dimension name or
36
+ if the dimension/coordinate isn't found.
37
+ """
38
+ try:
39
+ dim_axis = find_axis_from_dim(in_da, dim_coord_name)
40
+ except ValueError:
41
+ dim_axis = None
42
+
43
+ try:
44
+ coord_axes = find_axis_from_coord(in_da, dim_coord_name)
45
+ except ValueError:
46
+ coord_axes = tuple()
47
+
48
+ if dim_axis is None and len(coord_axes) == 0:
49
+ raise ValueError("Coordinate/Dimension " + dim_coord_name + " not found.")
50
+
51
+ # if we find a dimension with an axis and/or the coordinates, return that axis number
52
+ if len(coord_axes) == 1 and dim_axis == coord_axes[0]:
53
+ return dim_axis
54
+ if len(coord_axes) == 0 and dim_axis is not None:
55
+ return dim_axis
56
+ if dim_axis is None and len(coord_axes) == 1:
57
+ return coord_axes[0]
58
+ raise ValueError("Coordinate/Dimension " + dim_coord_name + " not found.")
59
+ # return None
60
+
61
+
62
+ def find_axis_from_dim(in_da: xr.DataArray, dim_name: str) -> Union[int, None]:
63
+ """
64
+ Finds the axis number from a DataArray dimension name
65
+
66
+ Parameters
67
+ ----------
68
+ in_da: xarray.DataArray
69
+ Input DataArray to find the axis number from
70
+ dim_name: str
71
+ Name of the dimension
72
+
73
+ Returns
74
+ -------
75
+ int or None
76
+ axis number or None if axis isn't found
77
+
78
+ Raises
79
+ ------
80
+ ValueError
81
+ raises ValueError if dim_name matches multiple dimensions
82
+ """
83
+ list_dims = in_da.dims
84
+ all_matching_dims = [
85
+ dim
86
+ for dim in list_dims
87
+ if dim
88
+ in [
89
+ dim_name,
90
+ ]
91
+ ]
92
+ if len(all_matching_dims) == 1:
93
+ return list_dims.index(all_matching_dims[0])
94
+ if len(all_matching_dims) > 1:
95
+ raise ValueError(
96
+ "More than one matching dimension. Need to specify which axis number or rename "
97
+ "your dimensions."
98
+ )
99
+ raise ValueError("Dimension not found. ")
100
+
101
+
102
+ def find_axis_from_coord(in_da: xr.DataArray, coord_name: str) -> tuple[int]:
103
+ """
104
+ Finds the axis number from a DataArray coordinate name
105
+
106
+ Parameters
107
+ ----------
108
+ in_da: xarray.DataArray
109
+ Input DataArray to find the axis number from
110
+ coord_name: str
111
+ Name of the coordinate
112
+
113
+ Returns
114
+ -------
115
+ tuple of int
116
+ axis number(s)
117
+
118
+ Raises
119
+ ------
120
+ ValueError
121
+ raises ValueError if the coordinate has more than 1 axis or 0 axes; or if
122
+ there are >1 matching coordinate of that name
123
+ """
124
+ list_coords = in_da.coords
125
+ all_matching_coords = list(set(list_coords) & {coord_name})
126
+ if len(all_matching_coords) == 1:
127
+ curr_coord = list_coords[all_matching_coords[0]]
128
+ return tuple(
129
+ (
130
+ find_axis_from_dim(in_da, x)
131
+ for x in curr_coord.dims
132
+ if find_axis_from_dim(in_da, x) is not None
133
+ )
134
+ )
135
+
136
+ raise ValueError("No matching coords")
137
+
138
+
139
+ def find_vertical_coord_name(
140
+ variable_da: xr.DataArray,
141
+ vertical_coord: Union[str, None] = None,
142
+ ) -> str:
143
+ """Function to find the vertical coordinate in the iris cube
144
+
145
+ Parameters
146
+ ----------
147
+ variable_da: xarray.DataArray
148
+ Input variable cube, containing a vertical coordinate.
149
+ vertical_coord: str
150
+ Vertical coordinate name. If None, this function tries to auto-detect.
151
+
152
+ Returns
153
+ -------
154
+ str
155
+ the vertical coordinate name
156
+
157
+ Raises
158
+ ------
159
+ ValueError
160
+ Raised if the vertical coordinate isn't found in the cube.
161
+ """
162
+
163
+ list_coord_names = variable_da.coords
164
+
165
+ if vertical_coord is None or vertical_coord == "auto":
166
+ # find the intersection
167
+ all_vertical_axes = list(
168
+ set(list_coord_names) & set(tb_utils_gi.COMMON_VERT_COORDS)
169
+ )
170
+ if len(all_vertical_axes) >= 1:
171
+ return all_vertical_axes[0]
172
+ coord_names_err = str(tuple(tb_utils_gi.COMMON_VERT_COORDS))
173
+ raise ValueError(
174
+ "Cube lacks suitable automatic vertical coordinate " + coord_names_err
175
+ )
176
+ if vertical_coord in list_coord_names:
177
+ return vertical_coord
178
+ raise ValueError("Please specify vertical coordinate found in cube")
179
+
180
+
181
+ def find_hdim_axes_3d(
182
+ field_in: xr.DataArray,
183
+ vertical_coord: Union[str, None] = None,
184
+ vertical_axis: Union[int, None] = None,
185
+ time_dim_coord_name: str = "time",
186
+ return_vertical_axis: bool = False,
187
+ ) -> Union[tuple[int, int], tuple[int, int, int]]:
188
+ """Finds what the hdim axes are given a 3D (including z) or
189
+ 4D (including z and time) dataset.
190
+
191
+ Parameters
192
+ ----------
193
+ field_in: xarray.DataArray
194
+ Input field, can be 3D or 4D
195
+ vertical_coord: str or None
196
+ The name of the vertical coord, or None, which will attempt to find
197
+ the vertical coordinate name
198
+ vertical_axis: int or None
199
+ The axis number of the vertical coordinate, or None. Note
200
+ that only one of vertical_axis or vertical_coord can be set.
201
+ time_dim_coord_name: str
202
+ Name of the time dimension/coordinate
203
+ return_vertical_axis: bool
204
+ True if you want to also return the vertical axis number as the last value
205
+
206
+ Returns
207
+ -------
208
+ (hdim_1_axis, hdim_2_axis): (int, int)
209
+ The axes for hdim_1 and hdim_2
210
+ """
211
+
212
+ if vertical_coord is not None and vertical_axis is not None:
213
+ if vertical_coord != "auto":
214
+ raise ValueError("Cannot set both vertical_coord and vertical_axis.")
215
+
216
+ try:
217
+ time_axis = find_axis_from_dim_coord(field_in, time_dim_coord_name)
218
+ except ValueError:
219
+ # time axis not found
220
+ time_axis = None
221
+ # we have already specified the axis.
222
+ if vertical_axis is not None:
223
+ vertical_coord_axis = vertical_axis
224
+ vert_coord_found = True
225
+ else:
226
+ try:
227
+ vertical_coord_name = find_vertical_coord_name(
228
+ field_in, vertical_coord=vertical_coord
229
+ )
230
+ vert_coord_found = True
231
+ ndim_vertical = find_axis_from_dim_coord(field_in, vertical_coord_name)
232
+ if ndim_vertical is None:
233
+ raise ValueError(
234
+ "please specify 1 dimensional vertical coordinate."
235
+ f" Current vertical coordinates: {ndim_vertical}"
236
+ )
237
+ vertical_coord_axis = ndim_vertical
238
+
239
+ except ValueError:
240
+ vert_coord_found = False
241
+
242
+ if not vert_coord_found:
243
+ # if we don't have a vertical coordinate, and we are 3D or lower
244
+ # that is okay.
245
+ if (field_in.ndim == 3 and time_axis is not None) or field_in.ndim < 3:
246
+ vertical_coord_axis = None
247
+ else:
248
+ raise ValueError("No suitable vertical coordinate found")
249
+ # Once we know the vertical coordinate, we can resolve the
250
+ # horizontal coordinates
251
+
252
+ all_axes = np.arange(0, field_in.ndim)
253
+ output_vals = tuple(
254
+ all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))]
255
+ )
256
+ if return_vertical_axis:
257
+ output_vals = (*output_vals, vertical_coord_axis)
258
+ return output_vals
259
+
260
+
261
+ def add_coordinates_to_features(
262
+ feature_df: pd.DataFrame,
263
+ variable_da: xr.DataArray,
264
+ vertical_coord: Union[str, None] = None,
265
+ vertical_axis: Union[int, None] = None,
266
+ use_standard_names: bool = True,
267
+ interp_dims_without_coords: bool = False,
268
+ ) -> pd.DataFrame:
269
+ """Function to populate the interpolated coordinates to feature
270
+
271
+ Parameters
272
+ ----------
273
+ feature_df: pandas DataFrame
274
+ Feature dataframe
275
+ variable_da: xarray.DataArray
276
+ DataArray (usually the one you are tracking on) at least conaining the dimension of 'time'.
277
+ Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate',
278
+ and 'altitude' (if 3D) are the coordinates that we expect, although this function
279
+ will happily interpolate along any coordinates you give.
280
+ vertical_coord: str
281
+ Name of the vertical coordinate. If None, tries to auto-detect.
282
+ If it is a string, it looks for the coordinate or the dimension name corresponding
283
+ to the string. If it is an int, it assumes that it is the vertical axis.
284
+ Note that if you only have a 2D or 3D coordinate for altitude, you must
285
+ pass in an int.
286
+ vertical_axis: int or None
287
+ Axis number of the vertical.
288
+ use_standard_names: bool
289
+ If true, when interpolating a coordinate, it looks for a standard_name
290
+ and uses that to name the output coordinate, to mimic iris functionality.
291
+ If false, uses the actual name of the coordinate to output.
292
+ interp_dims_without_coords: bool
293
+ If True, interpolates dimensions without coordinates
294
+ If False, skips dimensions without coordinates
295
+
296
+ Returns
297
+ -------
298
+ Dataframe with coordinates added
299
+
300
+ """
301
+ # make a copy to avoid editing in place.
302
+ return_feat_df = copy.deepcopy(feature_df)
303
+
304
+ time_dim_name: str = "time"
305
+ # first, we must find the names of the dimensions corresponding to the numbered
306
+ # dimensions.
307
+
308
+ ndims: int = variable_da.ndim
309
+
310
+ time_dim_number = find_axis_from_dim(variable_da, time_dim_name)
311
+
312
+ is_3d = (time_dim_number is not None and ndims == 4) or (
313
+ time_dim_number is None and ndims == 3
314
+ )
315
+ vdim_coord = None
316
+ if is_3d:
317
+ hdim1_axis, hdim2_axis, vertical_axis = find_hdim_axes_3d(
318
+ variable_da,
319
+ vertical_coord,
320
+ vertical_axis,
321
+ time_dim_coord_name=time_dim_name,
322
+ return_vertical_axis=True,
323
+ )
324
+ if vertical_axis is None:
325
+ vdim_coord = find_vertical_coord_name(variable_da, vertical_coord)
326
+ vertical_axis = find_axis_from_dim_coord(variable_da, vdim_coord)
327
+ else:
328
+ vdim_coord = variable_da.dims[vertical_axis]
329
+ else: # 2D
330
+ if ndims == 2:
331
+ hdim1_axis = 0
332
+ hdim2_axis = 1
333
+ elif ndims == 3 and time_dim_number is not None:
334
+ possible_dims = [0, 1, 2]
335
+ possible_dims.pop(time_dim_number)
336
+ hdim1_axis, hdim2_axis = possible_dims
337
+ else:
338
+ raise ValueError("DataArray has too many or too few dimensions")
339
+
340
+ # If the dimensions share a name with the coordinates and those coordinates do not match
341
+ # with the i, j, k-style indices, you cannot `interp` along those i, j, k indices.
342
+ # so, instead, we rename the dimensions to random strings so that we can
343
+ # run interpolation.
344
+
345
+ hdim1_name_original = variable_da.dims[hdim1_axis]
346
+ hdim2_name_original = variable_da.dims[hdim2_axis]
347
+
348
+ # generate random names for the new coordinates that are based on i, j, k values
349
+ hdim1_name_new = "__temp_hdim1_name"
350
+ hdim2_name_new = "__temp_hdim2_name"
351
+ vdim_name_new = "__temp_vdim_name"
352
+ time_name_new = "__temp_time_name"
353
+
354
+ if (
355
+ hdim1_name_new in variable_da.dims
356
+ or hdim2_name_new in variable_da.dims
357
+ or vdim_name_new in variable_da.dims
358
+ ):
359
+ raise ValueError(
360
+ "Cannot have dimensions named {0}, {1}, or {2}".format(
361
+ hdim1_name_new, hdim2_name_new, vdim_name_new
362
+ )
363
+ )
364
+
365
+ dim_new_names = {
366
+ hdim1_name_original: hdim1_name_new,
367
+ hdim2_name_original: hdim2_name_new,
368
+ time_dim_name: time_name_new,
369
+ }
370
+ dim_interp_coords = {
371
+ hdim1_name_new: xr.DataArray(
372
+ return_feat_df["hdim_1"].values,
373
+ dims="features",
374
+ coords={"features": return_feat_df.index},
375
+ ),
376
+ hdim2_name_new: xr.DataArray(
377
+ return_feat_df["hdim_2"].values,
378
+ dims="features",
379
+ coords={"features": return_feat_df.index},
380
+ ),
381
+ time_name_new: xr.DataArray(
382
+ return_feat_df["frame"].values,
383
+ dims="features",
384
+ coords={"features": return_feat_df.index},
385
+ ),
386
+ }
387
+
388
+ if is_3d:
389
+ vdim_name_original = variable_da.dims[vertical_axis]
390
+ dim_interp_coords[vdim_name_new] = xr.DataArray(
391
+ return_feat_df["vdim"].values, dims="features"
392
+ )
393
+
394
+ dim_new_names[vdim_name_original] = vdim_name_new
395
+
396
+ # you can only rename dims alone when operating on datasets, so add our dataarray to a
397
+ # dataset
398
+ renamed_dim_da = variable_da.swap_dims(dim_new_names)
399
+ return_feat_df[time_dim_name] = variable_da[time_dim_name].values[
400
+ return_feat_df["frame"]
401
+ ]
402
+ return_feat_df[time_dim_name + "str"] = [
403
+ pd.to_datetime(str(x)).strftime("%Y-%m-%d %H:%M:%S")
404
+ for x in variable_da[time_dim_name].values[return_feat_df["frame"]]
405
+ ]
406
+
407
+ for interp_coord in renamed_dim_da.coords:
408
+ # skip time coordinate because we dealt with that already
409
+ if interp_coord == time_dim_name:
410
+ continue
411
+
412
+ if interp_coord not in variable_da.coords and not interp_dims_without_coords:
413
+ continue
414
+
415
+ interp_coord_name = interp_coord
416
+ # if we have standard names and are using them, rename our coordinates.
417
+ if use_standard_names:
418
+ try:
419
+ interp_coord_name = renamed_dim_da[interp_coord].attrs["standard_name"]
420
+ except KeyError:
421
+ pass
422
+
423
+ if renamed_dim_da[interp_coord].dtype.kind in "uifc":
424
+ # Interpolate over the coordinate
425
+ return_feat_df[interp_coord_name] = renamed_dim_da[interp_coord].interp(
426
+ coords={
427
+ dim: dim_interp_coords[dim]
428
+ for dim in renamed_dim_da[interp_coord].dims
429
+ }
430
+ )
431
+ else:
432
+ # If non-numeric, we should instead just index the nearest values:
433
+ return_feat_df[interp_coord_name] = renamed_dim_da[interp_coord].isel(
434
+ **{
435
+ dim: np.round(dim_interp_coords[dim]).astype(int)
436
+ for dim in renamed_dim_da[interp_coord].dims
437
+ }
438
+ )
439
+ return return_feat_df