tobac 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tobac/__init__.py +112 -0
  2. tobac/analysis/__init__.py +31 -0
  3. tobac/analysis/cell_analysis.py +628 -0
  4. tobac/analysis/feature_analysis.py +212 -0
  5. tobac/analysis/spatial.py +619 -0
  6. tobac/centerofgravity.py +226 -0
  7. tobac/feature_detection.py +1758 -0
  8. tobac/merge_split.py +324 -0
  9. tobac/plotting.py +2321 -0
  10. tobac/segmentation/__init__.py +10 -0
  11. tobac/segmentation/watershed_segmentation.py +1316 -0
  12. tobac/testing.py +1179 -0
  13. tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
  14. tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
  15. tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
  16. tobac/tests/test_analysis_spatial.py +1109 -0
  17. tobac/tests/test_convert.py +265 -0
  18. tobac/tests/test_datetime.py +216 -0
  19. tobac/tests/test_decorators.py +148 -0
  20. tobac/tests/test_feature_detection.py +1321 -0
  21. tobac/tests/test_generators.py +273 -0
  22. tobac/tests/test_import.py +24 -0
  23. tobac/tests/test_iris_xarray_match_utils.py +244 -0
  24. tobac/tests/test_merge_split.py +351 -0
  25. tobac/tests/test_pbc_utils.py +497 -0
  26. tobac/tests/test_sample_data.py +197 -0
  27. tobac/tests/test_testing.py +747 -0
  28. tobac/tests/test_tracking.py +714 -0
  29. tobac/tests/test_utils.py +650 -0
  30. tobac/tests/test_utils_bulk_statistics.py +789 -0
  31. tobac/tests/test_utils_coordinates.py +328 -0
  32. tobac/tests/test_utils_internal.py +97 -0
  33. tobac/tests/test_xarray_utils.py +232 -0
  34. tobac/tracking.py +613 -0
  35. tobac/utils/__init__.py +27 -0
  36. tobac/utils/bulk_statistics.py +360 -0
  37. tobac/utils/datetime.py +184 -0
  38. tobac/utils/decorators.py +540 -0
  39. tobac/utils/general.py +753 -0
  40. tobac/utils/generators.py +87 -0
  41. tobac/utils/internal/__init__.py +2 -0
  42. tobac/utils/internal/coordinates.py +430 -0
  43. tobac/utils/internal/iris_utils.py +462 -0
  44. tobac/utils/internal/label_props.py +82 -0
  45. tobac/utils/internal/xarray_utils.py +439 -0
  46. tobac/utils/mask.py +364 -0
  47. tobac/utils/periodic_boundaries.py +419 -0
  48. tobac/wrapper.py +244 -0
  49. tobac-1.6.2.dist-info/METADATA +154 -0
  50. tobac-1.6.2.dist-info/RECORD +53 -0
  51. tobac-1.6.2.dist-info/WHEEL +5 -0
  52. tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
  53. tobac-1.6.2.dist-info/top_level.txt +1 -0
tobac/merge_split.py ADDED
@@ -0,0 +1,324 @@
1
+ """
2
+ Tobac merge and split
3
+ This submodule is a post processing step to address tracked cells which merge/split.
4
+ The first iteration of this module is to combine the cells which are merging but have received
5
+ a new cell id (and are considered a new cell) once merged. In general this submodule will label merged/split cells
6
+ with a TRACK number in addition to its CELL number.
7
+
8
+ """
9
+
10
+ from __future__ import annotations
11
+ import logging
12
+ from typing import Optional
13
+ from typing_extensions import Literal
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ import xarray as xr
18
+ import scipy.sparse
19
+ from sklearn.neighbors import BallTree
20
+
21
+ from tobac.utils.periodic_boundaries import build_distance_function
22
+ from tobac.utils import internal as internal_utils
23
+
24
+
25
+ def merge_split_MEST(
26
+ tracks: pd.DataFrame,
27
+ dxy: float,
28
+ dz: Optional[float] = None,
29
+ distance: Optional[float] = None,
30
+ frame_len: int = 5,
31
+ cell_number_unassigned: int = -1,
32
+ vertical_coord: Optional[str] | None = None,
33
+ PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = None,
34
+ min_h1: Optional[int] = None,
35
+ max_h1: Optional[int] = None,
36
+ min_h2: Optional[int] = None,
37
+ max_h2: Optional[int] = None,
38
+ ) -> xr.Dataset:
39
+ """
40
+ Search for merging splitting cells in tobac tracking data using a minimum
41
+ euclidian spanning tree, and combine the merged cells into unique tracks.
42
+
43
+ Parameters
44
+ ----------
45
+ tracks : pandas.core.frame.DataFrame
46
+ Pandas dataframe of tobac Track information
47
+
48
+ dxy : float
49
+ The x/y grid spacing of the data.
50
+ Should be in meters.
51
+
52
+ dz : float, optional
53
+ Constant vertical grid spacing (m), default None. If None, the vertical
54
+ coord will be inferred automatically or from a specified coord given by
55
+ the vertical_coord parameter. An exception is raised if both dz and
56
+ vertical_coord are provided.
57
+
58
+ distance : float, optional
59
+ Distance threshold determining how close two features must be in order
60
+ to consider merge/splitting. Default is 25x the x/y grid spacing of the
61
+ data, given in dxy. The distance should be in units of meters.
62
+
63
+ frame_len : float, optional
64
+ Threshold for the maximum number of frames that can separate the end of
65
+ cell and the start of a related cell, by default 5 frames.
66
+
67
+ cell_number_unassigned: int, optional
68
+ Value given tp unassigned/non-tracked cells by tracking, by default -1.
69
+
70
+ vertical_coord: str, optional
71
+ Name of the vertical coordinate, default None. The vertical coordinate
72
+ used must have values in meters. If None, tries to auto-detect, or uses
73
+ constant vertical grid spacing if dz is specified. An exception is
74
+ raised if both dz and vertical_coord are provided.
75
+
76
+ PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both'), optional
77
+ Sets whether to use periodic boundaries, and if so in which directions.
78
+ 'none' means that we do not have periodic boundaries
79
+ 'hdim_1' means that we are periodic along hdim1
80
+ 'hdim_2' means that we are periodic along hdim2
81
+ 'both' means that we are periodic along both horizontal dimensions
82
+
83
+ min_h1: int, optional
84
+ Minimum real point in hdim_1, for use with periodic boundaries.
85
+
86
+ max_h1: int, optional
87
+ Maximum point in hdim_1, exclusive. max_h1-min_h1 should be the size of
88
+ hdim_1.
89
+
90
+ min_h2: int, optional
91
+ Minimum real point in hdim_2, for use with periodic boundaries.
92
+
93
+ max_h2: int, optional
94
+ Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size of
95
+ hdim_2.
96
+
97
+ Returns
98
+ -------
99
+
100
+ d : xarray.core.dataset.Dataset
101
+ xarray dataset of tobac merge/split cells with parent and child designations.
102
+
103
+ Parent/child variables include:
104
+ - cell_parent_track_id: The associated track id for each cell. All cells that have merged or split will have the same parent track id. If a cell never merges/splits, only one cell will have a particular track id.
105
+ - feature_parent_cell_id: The associated parent cell id for each feature. All features in a given cell will have the same cell id. This is the original TRACK cell_id.
106
+ - feature_parent_track_id: The associated parent track id for each feature. This is not the same as the cell id number.
107
+ - track_child_cell_count: The total number of features belonging to all child cells of a given track id.
108
+ - cell_child_feature_count: The total number of features for each cell.
109
+
110
+ Example usage:
111
+ d = merge_split_MEST(Track)
112
+ ds = tobac.utils.standardize_track_dataset(Track, refl_mask)
113
+ both_ds = xr.merge([ds, d],compat ='override')
114
+ both_ds = tobac.utils.compress_all(both_ds)
115
+ both_ds.to_netcdf(os.path.join(savedir,'Track_features_merges.nc'))
116
+
117
+ """
118
+
119
+ track_groups = tracks[tracks["cell"] != cell_number_unassigned].groupby("cell")
120
+ first = track_groups.first()
121
+ last = track_groups.last()
122
+
123
+ if distance is None:
124
+ distance = dxy * 25.0
125
+
126
+ # As optional coordinate names are not yet implemented, set to defaults here:
127
+ y_coordinate_name = "hdim_1"
128
+ x_coordinate_name = "hdim_2"
129
+
130
+ # Check if we are 3D.
131
+ is_3D = "vdim" in tracks
132
+ if is_3D:
133
+ if dz is None:
134
+ # Find vertical coord name
135
+ z_coordinate_name = internal_utils.find_dataframe_vertical_coord(
136
+ variable_dataframe=tracks, vertical_coord=vertical_coord
137
+ )
138
+ dz = 1
139
+ else:
140
+ # Use dz, raise error if both are set
141
+ if vertical_coord is None:
142
+ z_coordinate_name = "vdim"
143
+ else:
144
+ raise ValueError(
145
+ "dz and vertical_coord both set, vertical"
146
+ " spacing is ambiguous. Set one to None."
147
+ )
148
+
149
+ # Calculate feature locations in cartesian coordinates
150
+ if is_3D:
151
+ cell_start_locations = np.stack(
152
+ [
153
+ first[var].values
154
+ for var in [z_coordinate_name, y_coordinate_name, x_coordinate_name]
155
+ ],
156
+ axis=-1,
157
+ )
158
+ cell_start_locations[:, 0] *= dz
159
+ cell_start_locations[:, 1:] *= dxy
160
+ cell_end_locations = np.stack(
161
+ [
162
+ last[var].values
163
+ for var in [z_coordinate_name, y_coordinate_name, x_coordinate_name]
164
+ ],
165
+ axis=-1,
166
+ )
167
+ cell_end_locations[0] *= dz
168
+ cell_end_locations[1:] *= dxy
169
+ else:
170
+ cell_start_locations = (
171
+ np.stack(
172
+ [first[var].values for var in [y_coordinate_name, x_coordinate_name]],
173
+ axis=-1,
174
+ )
175
+ * dxy
176
+ )
177
+ cell_end_locations = (
178
+ np.stack(
179
+ [last[var].values for var in [y_coordinate_name, x_coordinate_name]],
180
+ axis=-1,
181
+ )
182
+ * dxy
183
+ )
184
+
185
+ if PBC_flag in ["hdim_1", "hdim_2", "both"]:
186
+ # Note that we multiply by dxy to get the distances in spatial coordinates
187
+ dist_func = build_distance_function(
188
+ min_h1 * dxy if min_h1 is not None else None,
189
+ max_h1 * dxy if max_h1 is not None else None,
190
+ min_h2 * dxy if min_h2 is not None else None,
191
+ max_h2 * dxy if max_h2 is not None else None,
192
+ PBC_flag,
193
+ is_3D,
194
+ )
195
+ cell_start_tree = BallTree(
196
+ cell_start_locations, metric="pyfunc", func=dist_func
197
+ )
198
+
199
+ else:
200
+ cell_start_tree = BallTree(cell_start_locations, metric="euclidean")
201
+
202
+ neighbours, distances = cell_start_tree.query_radius(
203
+ cell_end_locations, r=distance, return_distance=True
204
+ )
205
+
206
+ # Input data to the graph which will perform the spanning tree.
207
+ nodes = np.repeat(
208
+ np.arange(len(neighbours), dtype=int), [len(n) for n in neighbours]
209
+ )
210
+ neighbours = np.concatenate(neighbours)
211
+ weights = np.concatenate(distances)
212
+
213
+ # Remove edges where the frame gap is greater than frame_len, and also remove connections to the same cell
214
+ wh_frame_len = (
215
+ np.abs(first["frame"].values[nodes] - last["frame"].values[neighbours])
216
+ <= frame_len
217
+ )
218
+ wh_valid_edge = np.logical_and(wh_frame_len, nodes != neighbours)
219
+ start_node_cells = first.index.values[nodes[wh_valid_edge]].astype(np.int32)
220
+ end_node_cells = last.index.values[neighbours[wh_valid_edge]].astype(np.int32)
221
+
222
+ cell_id = np.unique(tracks.cell.values)
223
+ cell_id = cell_id[cell_id != cell_number_unassigned].astype(int)
224
+ max_cell = np.max(cell_id)
225
+
226
+ if len(start_node_cells):
227
+ # We need to add a small value to the dists to prevent 0-length edges
228
+ cell_graph = scipy.sparse.coo_array(
229
+ (weights[wh_valid_edge] + 0.01, (start_node_cells, end_node_cells)),
230
+ shape=(max_cell + 1, max_cell + 1),
231
+ )
232
+ cell_graph = scipy.sparse.csgraph.minimum_spanning_tree(
233
+ cell_graph, overwrite=True
234
+ )
235
+ # Find remaining start/end nodes after calculating minimum spanning tree
236
+ start_node_cells, end_node_cells = cell_graph.nonzero()
237
+
238
+ cell_parent_track_id = scipy.sparse.csgraph.connected_components(cell_graph)[1][
239
+ cell_id
240
+ ]
241
+ cell_parent_track_id = (
242
+ np.unique(cell_parent_track_id, return_inverse=True)[1] + 1
243
+ )
244
+ else:
245
+ cell_parent_track_id = np.arange(cell_id.size, dtype=int) + 1
246
+
247
+ track_dim = "track"
248
+ cell_dim = "cell"
249
+ feature_dim = "feature"
250
+
251
+ cell_parent_track_id = xr.DataArray(
252
+ cell_parent_track_id, dims=(cell_dim,), coords={cell_dim: cell_id}
253
+ )
254
+ logging.debug("found cell parent track ids")
255
+
256
+ track_id = np.unique(cell_parent_track_id)
257
+ logging.debug("found track ids")
258
+
259
+ # This version includes all the feature regardless of if they are used in cells or not.
260
+ feature_id = tracks.feature.values.astype(int)
261
+ logging.debug("found feature ids")
262
+
263
+ feature_parent_cell_id = tracks.cell.values.astype(int)
264
+ feature_parent_cell_id = xr.DataArray(
265
+ feature_parent_cell_id,
266
+ dims=(feature_dim,),
267
+ coords={feature_dim: feature_id},
268
+ )
269
+ logging.debug("found feature parent cell ids")
270
+
271
+ wh_feature_in_cell = (feature_parent_cell_id != cell_number_unassigned).values
272
+ feature_parent_track_id = np.full(wh_feature_in_cell.shape, cell_number_unassigned)
273
+ feature_parent_track_id[wh_feature_in_cell] = cell_parent_track_id.loc[
274
+ feature_parent_cell_id[wh_feature_in_cell]
275
+ ].values
276
+ feature_parent_track_id = xr.DataArray(
277
+ feature_parent_track_id,
278
+ dims=(feature_dim,),
279
+ coords={feature_dim: feature_id},
280
+ )
281
+
282
+ track_child_cell_count = (
283
+ cell_parent_track_id.groupby(cell_parent_track_id).reduce(np.size).values
284
+ )
285
+ track_child_cell_count = xr.DataArray(
286
+ track_child_cell_count,
287
+ dims=(track_dim,),
288
+ coords={track_dim: track_id},
289
+ )
290
+
291
+ cell_child_feature_count = (
292
+ feature_parent_cell_id[wh_feature_in_cell]
293
+ .groupby(feature_parent_cell_id[wh_feature_in_cell])
294
+ .reduce(np.size)
295
+ .values
296
+ )
297
+ cell_child_feature_count = xr.DataArray(
298
+ cell_child_feature_count, dims=(cell_dim), coords={cell_dim: cell_id}
299
+ )
300
+
301
+ cell_starts_with_split = np.isin(cell_id, start_node_cells)
302
+ cell_starts_with_split = xr.DataArray(
303
+ cell_starts_with_split, dims=(cell_dim), coords={cell_dim: cell_id}
304
+ )
305
+
306
+ cell_ends_with_merge = np.isin(cell_id, end_node_cells)
307
+ cell_ends_with_merge = xr.DataArray(
308
+ cell_ends_with_merge, dims=(cell_dim), coords={cell_dim: cell_id}
309
+ )
310
+
311
+ merge_split_ds = xr.Dataset(
312
+ data_vars={
313
+ "cell_parent_track_id": cell_parent_track_id,
314
+ "feature_parent_cell_id": feature_parent_cell_id,
315
+ "feature_parent_track_id": feature_parent_track_id,
316
+ "track_child_cell_count": track_child_cell_count,
317
+ "cell_child_feature_count": cell_child_feature_count,
318
+ "cell_starts_with_split": cell_starts_with_split,
319
+ "cell_ends_with_merge": cell_ends_with_merge,
320
+ },
321
+ coords={feature_dim: feature_id, cell_dim: cell_id, track_dim: track_id},
322
+ )
323
+
324
+ return merge_split_ds