tobac 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tobac/__init__.py +112 -0
  2. tobac/analysis/__init__.py +31 -0
  3. tobac/analysis/cell_analysis.py +628 -0
  4. tobac/analysis/feature_analysis.py +212 -0
  5. tobac/analysis/spatial.py +619 -0
  6. tobac/centerofgravity.py +226 -0
  7. tobac/feature_detection.py +1758 -0
  8. tobac/merge_split.py +324 -0
  9. tobac/plotting.py +2321 -0
  10. tobac/segmentation/__init__.py +10 -0
  11. tobac/segmentation/watershed_segmentation.py +1316 -0
  12. tobac/testing.py +1179 -0
  13. tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
  14. tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
  15. tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
  16. tobac/tests/test_analysis_spatial.py +1109 -0
  17. tobac/tests/test_convert.py +265 -0
  18. tobac/tests/test_datetime.py +216 -0
  19. tobac/tests/test_decorators.py +148 -0
  20. tobac/tests/test_feature_detection.py +1321 -0
  21. tobac/tests/test_generators.py +273 -0
  22. tobac/tests/test_import.py +24 -0
  23. tobac/tests/test_iris_xarray_match_utils.py +244 -0
  24. tobac/tests/test_merge_split.py +351 -0
  25. tobac/tests/test_pbc_utils.py +497 -0
  26. tobac/tests/test_sample_data.py +197 -0
  27. tobac/tests/test_testing.py +747 -0
  28. tobac/tests/test_tracking.py +714 -0
  29. tobac/tests/test_utils.py +650 -0
  30. tobac/tests/test_utils_bulk_statistics.py +789 -0
  31. tobac/tests/test_utils_coordinates.py +328 -0
  32. tobac/tests/test_utils_internal.py +97 -0
  33. tobac/tests/test_xarray_utils.py +232 -0
  34. tobac/tracking.py +613 -0
  35. tobac/utils/__init__.py +27 -0
  36. tobac/utils/bulk_statistics.py +360 -0
  37. tobac/utils/datetime.py +184 -0
  38. tobac/utils/decorators.py +540 -0
  39. tobac/utils/general.py +753 -0
  40. tobac/utils/generators.py +87 -0
  41. tobac/utils/internal/__init__.py +2 -0
  42. tobac/utils/internal/coordinates.py +430 -0
  43. tobac/utils/internal/iris_utils.py +462 -0
  44. tobac/utils/internal/label_props.py +82 -0
  45. tobac/utils/internal/xarray_utils.py +439 -0
  46. tobac/utils/mask.py +364 -0
  47. tobac/utils/periodic_boundaries.py +419 -0
  48. tobac/wrapper.py +244 -0
  49. tobac-1.6.2.dist-info/METADATA +154 -0
  50. tobac-1.6.2.dist-info/RECORD +53 -0
  51. tobac-1.6.2.dist-info/WHEEL +5 -0
  52. tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
  53. tobac-1.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,87 @@
1
+ """Custom generators used for iterators required by tobac"""
2
+
3
+ import datetime
4
+ from typing import Generator, Optional, Tuple, Union
5
+
6
+ import cftime
7
+ import numpy as np
8
+ import pandas as pd
9
+ import xarray as xr
10
+
11
+ import tobac.utils.datetime as datetime_utils
12
+
13
+
14
+ def field_and_features_over_time(
15
+ field: xr.DataArray,
16
+ features: pd.DataFrame,
17
+ time_var_name: str = "time",
18
+ time_padding: Optional[datetime.timedelta] = None,
19
+ ) -> Generator[
20
+ Tuple[
21
+ int,
22
+ Union[datetime.datetime, np.datetime64, cftime.datetime],
23
+ xr.DataArray,
24
+ pd.DataFrame,
25
+ ],
26
+ None,
27
+ None,
28
+ ]:
29
+ """Generator that iterates over time through a paired field dataarray and a
30
+ features dataframe. time_padding parameter allows a tolerance to be set for
31
+ matching time stamps in the datarray and dataframe
32
+
33
+ Parameters
34
+ ----------
35
+ field : xr.DataArray
36
+ The field to iterate over
37
+ features : pd.DataFrame
38
+ The features dataframe to iterate through
39
+ time_var_name : str, optional (default: "time")
40
+ The name of the time dimension in field and the time column in features,
41
+ by default "time"
42
+ time_padding : datetime.timedelta, optional (default: None)
43
+ The tolerance for matching features at the same time as each time step
44
+ in the field dataframe, by default None
45
+
46
+ Yields
47
+ ------
48
+ Generator[tuple[int, Union[datetime.datetime, np.datetime64, cftime.datetime], xr.DataArray, pd.DataFrame], None, None]
49
+ A generator that returns the iteration index, the time, the slice of
50
+ field at that time the slice of features with times within the time
51
+ padding tolerance of the time step
52
+ """
53
+ if time_var_name not in field.coords:
54
+ raise ValueError(f"{time_var_name} not present in input field coordinates")
55
+
56
+ if time_var_name not in features.columns:
57
+ raise ValueError(f"{time_var_name} not present in input feature columns")
58
+
59
+ all_times = pd.Series(
60
+ datetime_utils.match_datetime_format(
61
+ features[time_var_name], field.coords[time_var_name]
62
+ ),
63
+ index=features.index,
64
+ )
65
+ for time_iteration_number, time_iteration_value in enumerate(
66
+ field.coords[time_var_name]
67
+ ):
68
+ field_at_time = field.isel({time_var_name: time_iteration_number})
69
+ if time_padding is not None:
70
+ # padded_conv = pd.Timedelta(time_padding).to_timedelta64()
71
+ if isinstance(time_iteration_value.values.item(), int):
72
+ min_time = (
73
+ time_iteration_value.values
74
+ - pd.Timedelta(time_padding).to_timedelta64()
75
+ )
76
+ max_time = (
77
+ time_iteration_value.values
78
+ + pd.Timedelta(time_padding).to_timedelta64()
79
+ )
80
+ else:
81
+ min_time = time_iteration_value.values - time_padding
82
+ max_time = time_iteration_value.values + time_padding
83
+ features_i = features.loc[all_times.between(min_time, max_time)]
84
+ else:
85
+ features_i = features.loc[all_times == time_iteration_value.values]
86
+
87
+ yield time_iteration_number, time_iteration_value, field_at_time, features_i
@@ -0,0 +1,2 @@
1
+ from .label_props import *
2
+ from .coordinates import *
@@ -0,0 +1,430 @@
1
+ """Internal tobac utilities"""
2
+
3
+ from __future__ import annotations
4
+ from typing import List, Literal, Optional, Tuple, Union, Callable
5
+
6
+ import numpy as np
7
+ import skimage.measure
8
+ import xarray as xr
9
+ import iris
10
+ import iris.cube
11
+ import pandas as pd
12
+ import warnings
13
+
14
+ from tobac.utils.decorators import irispandas_to_xarray, njit_if_available
15
+ from . import iris_utils
16
+ from . import xarray_utils as xr_utils
17
+
18
+ # list of common vertical coordinates to search for in various functions
19
+ COMMON_VERT_COORDS: list[str] = [
20
+ "z",
21
+ "model_level_number",
22
+ "altitude",
23
+ "geopotential_height",
24
+ ]
25
+
26
+ COMMON_X_COORDS: list[str] = [
27
+ "x",
28
+ "projection_x_coordinate",
29
+ ]
30
+
31
+ COMMON_Y_COORDS: list[str] = [
32
+ "y",
33
+ "projection_y_coordinate",
34
+ ]
35
+
36
+ COMMON_LAT_COORDS: list[str] = [
37
+ "latitude",
38
+ "lat",
39
+ ]
40
+
41
+ COMMON_LON_COORDS: list[str] = [
42
+ "longitude",
43
+ "lon",
44
+ "long",
45
+ ]
46
+
47
+
48
+ def _warn_auto_coordinate():
49
+ """
50
+ Internal function to warn on the use of `auto` as a default coordinate.
51
+ """
52
+ warnings.warn(
53
+ '"auto" as a coordinate is deprecated. Use None instead.',
54
+ DeprecationWarning,
55
+ )
56
+
57
+
58
+ def find_vertical_coord_name(
59
+ variable_cube: Union[iris.cube.Cube, xr.DataArray],
60
+ vertical_coord: Union[str, None] = None,
61
+ ) -> str:
62
+ """Function to find the vertical coordinate in the iris cube
63
+
64
+ Parameters
65
+ ----------
66
+ variable_cube: iris.cube.Cube or xarray.DataArray
67
+ Input variable cube, containing a vertical coordinate.
68
+ vertical_coord: str
69
+ Vertical coordinate name. If None, this function tries to auto-detect.
70
+
71
+ Returns
72
+ -------
73
+ str
74
+ the vertical coordinate name
75
+
76
+ Raises
77
+ ------
78
+ ValueError
79
+ Raised if the vertical coordinate isn't found in the cube.
80
+ """
81
+
82
+ if vertical_coord == "auto":
83
+ _warn_auto_coordinate()
84
+
85
+ if isinstance(variable_cube, iris.cube.Cube):
86
+ return iris_utils.find_vertical_axis_from_coord(variable_cube, vertical_coord)
87
+ if isinstance(variable_cube, xr.DataArray):
88
+ return xr_utils.find_vertical_coord_name(variable_cube, vertical_coord)
89
+
90
+ raise ValueError("variable_cube must be xr.DataArray or iris.cube.Cube")
91
+
92
+
93
+ def find_coord_in_dataframe(
94
+ variable_dataframe: Union[pd.DataFrame, pd.Series],
95
+ coord: Optional[str] = None,
96
+ defaults: Optional[List[str]] = None,
97
+ ) -> str:
98
+ """Find a coord in the columns of a dataframe matching either a specific coordinate name or a list
99
+ of default values
100
+
101
+ Parameters
102
+ ----------
103
+ variable_dataframe : pd.DataFrame
104
+ Input dataframe
105
+ coord : Optional[str], optional
106
+ Coordinate name to search for, by default None
107
+ defaults : Optional[List[str]], optional
108
+ Default list of coordinates to search for if no coordinate name is provided by the coord
109
+ parameter, by default None
110
+
111
+ Returns
112
+ -------
113
+ str
114
+ The coordinate name in the columns of the input dataframe
115
+
116
+ Raises
117
+ ------
118
+ ValueError
119
+ If the coordinate specified by the coord parameter is not present in the columns of the input dataframe
120
+ ValueError
121
+ If multiple coordinates in the default parameter are present in the columns of the input dataframe
122
+ ValueError
123
+ If no coordinates in the default parameter are present in the columns of the input dataframe
124
+ ValueError
125
+ If neither the coord or defaults parameters are set
126
+ """
127
+ if isinstance(variable_dataframe, pd.DataFrame):
128
+ columns = variable_dataframe.columns
129
+ elif isinstance(variable_dataframe, pd.Series):
130
+ columns = variable_dataframe.index
131
+ else:
132
+ raise ValueError("Input variable_dataframe is neither a dataframe or a series")
133
+
134
+ if coord is not None:
135
+ if coord in columns:
136
+ return coord
137
+ else:
138
+ raise ValueError(f"Coordinate {coord} is note present in the dataframe")
139
+
140
+ if defaults is not None:
141
+ intersect_id = np.intersect1d(
142
+ columns.str.lower(), defaults, return_indices=True
143
+ )[1]
144
+ if len(intersect_id) == 1:
145
+ return columns[intersect_id[0]]
146
+ elif len(intersect_id) > 1:
147
+ raise ValueError(
148
+ "Multiple matching coord names found, please specify coordinate using coord parameter"
149
+ )
150
+ raise ValueError(
151
+ f"No coordinate found matching defaults {defaults}, please specify coordinate using coord parameter"
152
+ )
153
+
154
+ raise ValueError("One of coord or defaults parameter must be set")
155
+
156
+
157
+ def find_dataframe_vertical_coord(
158
+ variable_dataframe: pd.DataFrame, vertical_coord: Union[str, None] = None
159
+ ) -> str:
160
+ """Function to find the vertical coordinate in the iris cube
161
+
162
+ Parameters
163
+ ----------
164
+ variable_dataframe: pandas.DataFrame
165
+ Input dataframe, containing a vertical coordinate.
166
+ vertical_coord: str
167
+ Vertical coordinate name. If None, this function tries to auto-detect.
168
+
169
+ Returns
170
+ -------
171
+ str
172
+ the vertical coordinate name
173
+
174
+ Raises
175
+ ------
176
+ ValueError
177
+ Raised if the vertical coordinate isn't found in the cube.
178
+ """
179
+
180
+ if vertical_coord == "auto":
181
+ _warn_auto_coordinate()
182
+ vertical_coord = None
183
+
184
+ return find_coord_in_dataframe(
185
+ variable_dataframe, coord=vertical_coord, defaults=COMMON_VERT_COORDS
186
+ )
187
+
188
+
189
+ def find_dataframe_horizontal_coords(
190
+ variable_dataframe: pd.DataFrame,
191
+ hdim1_coord: Optional[str] = None,
192
+ hdim2_coord: Optional[str] = None,
193
+ coord_type: Optional[Literal["xy", "latlon"]] = None,
194
+ ) -> Tuple[str, str, str]:
195
+ """Function to find the coordinates for the horizontal dimensions in a dataframe,
196
+ either in Cartesian (xy) or Lat/Lon space. If both Cartesian and lat/lon coordinates
197
+ exist, the cartesian coords will take priority
198
+
199
+ Parameters
200
+ ----------
201
+ variable_dataframe : pd.DataFrame
202
+ Input dataframe
203
+ hdim1_coord : Optional[str], optional
204
+ First horzontal coordinate name, by default None
205
+ hdim2_coord : Optional[str], optional
206
+ Second horizontal coordinate name, by default None
207
+ coord_type : Optional[Literal[xy, latlon]], optional
208
+ The coordinate type to search for, either 'xy' or 'latlon', must be set if
209
+ providing either hdim1_coord or hdim2_coord parameters, by default None
210
+
211
+ Returns
212
+ -------
213
+ Tuple[str, str, str]
214
+ First horzontal coordinate name, second horizontal coordinate name, and the coordinate type
215
+
216
+ Raises
217
+ ------
218
+ ValueError
219
+ If coord_type is not set when either hdim1_coord or hdim2_coord are
220
+ ValueError
221
+ If no coordinates are found using the defaults for either xy or latlon
222
+ """
223
+ hdim_1_auto = hdim1_coord is None
224
+ hdim_2_auto = hdim2_coord is None
225
+
226
+ if coord_type is None and (not hdim_1_auto or not hdim_2_auto):
227
+ raise ValueError(
228
+ "Coord type parameter must be set if either hdim1_coord or hdim2_coord parameters are specified"
229
+ )
230
+
231
+ if coord_type in ["xy", None]:
232
+ try:
233
+ hdim1_coord_out = find_coord_in_dataframe(
234
+ variable_dataframe, coord=hdim1_coord, defaults=COMMON_Y_COORDS
235
+ )
236
+ except ValueError as e:
237
+ if not hdim_1_auto:
238
+ raise e
239
+ hdim1_coord_out = None
240
+
241
+ try:
242
+ hdim2_coord_out = find_coord_in_dataframe(
243
+ variable_dataframe, coord=hdim2_coord, defaults=COMMON_X_COORDS
244
+ )
245
+ except ValueError as e:
246
+ if not hdim_2_auto:
247
+ raise e
248
+ hdim2_coord_out = None
249
+
250
+ if hdim1_coord_out is not None and hdim2_coord_out is not None:
251
+ return hdim1_coord_out, hdim2_coord_out, "xy"
252
+ else:
253
+ # Reset output coords to None to ensure we don't match an xy coord in one dimension with latlon in another
254
+ hdim1_coord_out = None
255
+ hdim2_coord_out = None
256
+
257
+ if coord_type in ["latlon", None]:
258
+ try:
259
+ hdim1_coord_out = find_coord_in_dataframe(
260
+ variable_dataframe, coord=hdim1_coord, defaults=COMMON_LAT_COORDS
261
+ )
262
+ coord_type = "latlon"
263
+ except ValueError as e:
264
+ if not hdim_1_auto:
265
+ raise e
266
+ hdim1_coord_out = None
267
+
268
+ try:
269
+ hdim2_coord_out = find_coord_in_dataframe(
270
+ variable_dataframe, coord=hdim2_coord, defaults=COMMON_LON_COORDS
271
+ )
272
+ coord_type = "latlon"
273
+ except ValueError as e:
274
+ if not hdim_2_auto:
275
+ raise e
276
+ hdim2_coord_out = None
277
+
278
+ if hdim1_coord_out is not None and hdim2_coord_out is not None:
279
+ return hdim1_coord_out, hdim2_coord_out, "latlon"
280
+
281
+ raise ValueError(
282
+ "No coordinates found matching defaults, please specify coordinate using hdim1_coord and hdim2_coord parameters"
283
+ )
284
+
285
+
286
+ @njit_if_available
287
+ def calc_distance_coords(coords_1: np.array, coords_2: np.array) -> float:
288
+ """Function to calculate the distance between cartesian
289
+ coordinate set 1 and coordinate set 2.
290
+ Parameters
291
+ ----------
292
+ coords_1: 2D or 3D array-like
293
+ Set of coordinates passed in from trackpy of either (vdim, hdim_1, hdim_2)
294
+ coordinates or (hdim_1, hdim_2) coordinates.
295
+ coords_2: 2D or 3D array-like
296
+ Similar to coords_1, but for the second pair of coordinates
297
+ Returns
298
+ -------
299
+ float
300
+ Distance between coords_1 and coords_2 in cartesian space.
301
+ """
302
+
303
+ is_3D = len(coords_1) == 3
304
+
305
+ if not is_3D:
306
+ # Let's make the accounting easier.
307
+ coords_1 = np.array((0, coords_1[0], coords_1[1]))
308
+ coords_2 = np.array((0, coords_2[0], coords_2[1]))
309
+
310
+ deltas = coords_1 - coords_2
311
+ return np.sqrt(np.sum(deltas**2))
312
+
313
+
314
+ def find_hdim_axes_3D(
315
+ field_in: Union[iris.cube.Cube, xr.DataArray],
316
+ vertical_coord: Union[str, None] = None,
317
+ vertical_axis: Union[int, None] = None,
318
+ ) -> tuple[int, int]:
319
+ """Finds what the hdim axes are given a 3D (including z) or
320
+ 4D (including z and time) dataset.
321
+
322
+ Parameters
323
+ ----------
324
+ field_in: iris cube or xarray dataarray
325
+ Input field, can be 3D or 4D
326
+ vertical_coord: str
327
+ The name of the vertical coord, or None, which will attempt to find
328
+ the vertical coordinate name
329
+ vertical_axis: int or None
330
+ The axis number of the vertical coordinate, or None. Note
331
+ that only one of vertical_axis or vertical_coord can be set.
332
+
333
+ Returns
334
+ -------
335
+ (hdim_1_axis, hdim_2_axis): (int, int)
336
+ The axes for hdim_1 and hdim_2
337
+
338
+ """
339
+
340
+ if vertical_coord == "auto":
341
+ _warn_auto_coordinate()
342
+
343
+ if vertical_coord is not None and vertical_axis is not None:
344
+ if vertical_coord != "auto":
345
+ raise ValueError("Cannot set both vertical_coord and vertical_axis.")
346
+
347
+ if isinstance(field_in, iris.cube.Cube):
348
+ return iris_utils.find_hdim_axes_3d(field_in, vertical_coord, vertical_axis)
349
+ elif isinstance(field_in, xr.DataArray):
350
+ return xr_utils.find_hdim_axes_3d(field_in, vertical_coord, vertical_axis)
351
+ else:
352
+ raise ValueError("Unknown data type: " + type(field_in).__name__)
353
+
354
+
355
+ def find_axis_from_coord(
356
+ variable_arr: Union[iris.cube.Cube, xr.DataArray], coord_name: str
357
+ ) -> int:
358
+ """Finds the axis number in an xarray or iris cube given a coordinate or dimension name.
359
+
360
+ Parameters
361
+ ----------
362
+ variable_arr: iris.cube.Cube or xarray.DataArray
363
+ Input variable cube
364
+ coord_name: str
365
+ coordinate or dimension to look for
366
+
367
+ Returns
368
+ -------
369
+ axis_number: int
370
+ the number of the axis of the given coordinate, or None if the coordinate
371
+ is not found in the variable or not a dimensional coordinate
372
+ """
373
+
374
+ if isinstance(variable_arr, iris.cube.Cube):
375
+ return iris_utils.find_axis_from_coord(variable_arr, coord_name)
376
+ elif isinstance(variable_arr, xr.DataArray):
377
+ return xr_utils.find_axis_from_dim_coord(variable_arr, coord_name)
378
+ else:
379
+ raise ValueError("variable_arr must be Iris Cube or Xarray DataArray")
380
+
381
+
382
+ @irispandas_to_xarray()
383
+ def detect_latlon_coord_name(
384
+ in_dataset: Union[xr.DataArray, iris.cube.Cube],
385
+ latitude_name: Union[str, None] = None,
386
+ longitude_name: Union[str, None] = None,
387
+ ) -> tuple[str]:
388
+ """Function to detect the name of latitude/longitude coordinates
389
+
390
+ Parameters
391
+ ----------
392
+ in_dataset: iris.cube.Cube or xarray.DataArray
393
+ Input dataset to detect names from
394
+ latitude_name: str
395
+ The name of the latitude coordinate. If None, tries to auto-detect.
396
+ longitude_name: str
397
+ The name of the longitude coordinate. If None, tries to auto-detect.
398
+
399
+ Returns
400
+ -------
401
+ lat_name, lon_name: tuple(str)
402
+ the detected names of the latitude and longitude coordinates. If
403
+ coordinate is not detected, returns None for that coordinate.
404
+
405
+ """
406
+
407
+ if latitude_name == "auto" or longitude_name == "auto":
408
+ _warn_auto_coordinate()
409
+
410
+ out_lat = None
411
+ out_lon = None
412
+ test_lat_names = ["lat", "latitude"]
413
+ test_lon_names = ["lon", "long", "longitude"]
414
+ if latitude_name is not None and latitude_name != "auto":
415
+ if latitude_name in in_dataset.coords:
416
+ out_lat = latitude_name
417
+ else:
418
+ for test_lat_name in test_lat_names:
419
+ if test_lat_name in in_dataset.coords:
420
+ out_lat = test_lat_name
421
+ break
422
+ if longitude_name is not None and longitude_name != "auto":
423
+ if longitude_name in in_dataset.coords:
424
+ out_lon = longitude_name
425
+ else:
426
+ for test_lon_name in test_lon_names:
427
+ if test_lon_name in in_dataset.coords:
428
+ out_lon = test_lon_name
429
+ break
430
+ return out_lat, out_lon