tobac 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tobac/__init__.py +112 -0
  2. tobac/analysis/__init__.py +31 -0
  3. tobac/analysis/cell_analysis.py +628 -0
  4. tobac/analysis/feature_analysis.py +212 -0
  5. tobac/analysis/spatial.py +619 -0
  6. tobac/centerofgravity.py +226 -0
  7. tobac/feature_detection.py +1758 -0
  8. tobac/merge_split.py +324 -0
  9. tobac/plotting.py +2321 -0
  10. tobac/segmentation/__init__.py +10 -0
  11. tobac/segmentation/watershed_segmentation.py +1316 -0
  12. tobac/testing.py +1179 -0
  13. tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
  14. tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
  15. tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
  16. tobac/tests/test_analysis_spatial.py +1109 -0
  17. tobac/tests/test_convert.py +265 -0
  18. tobac/tests/test_datetime.py +216 -0
  19. tobac/tests/test_decorators.py +148 -0
  20. tobac/tests/test_feature_detection.py +1321 -0
  21. tobac/tests/test_generators.py +273 -0
  22. tobac/tests/test_import.py +24 -0
  23. tobac/tests/test_iris_xarray_match_utils.py +244 -0
  24. tobac/tests/test_merge_split.py +351 -0
  25. tobac/tests/test_pbc_utils.py +497 -0
  26. tobac/tests/test_sample_data.py +197 -0
  27. tobac/tests/test_testing.py +747 -0
  28. tobac/tests/test_tracking.py +714 -0
  29. tobac/tests/test_utils.py +650 -0
  30. tobac/tests/test_utils_bulk_statistics.py +789 -0
  31. tobac/tests/test_utils_coordinates.py +328 -0
  32. tobac/tests/test_utils_internal.py +97 -0
  33. tobac/tests/test_xarray_utils.py +232 -0
  34. tobac/tracking.py +613 -0
  35. tobac/utils/__init__.py +27 -0
  36. tobac/utils/bulk_statistics.py +360 -0
  37. tobac/utils/datetime.py +184 -0
  38. tobac/utils/decorators.py +540 -0
  39. tobac/utils/general.py +753 -0
  40. tobac/utils/generators.py +87 -0
  41. tobac/utils/internal/__init__.py +2 -0
  42. tobac/utils/internal/coordinates.py +430 -0
  43. tobac/utils/internal/iris_utils.py +462 -0
  44. tobac/utils/internal/label_props.py +82 -0
  45. tobac/utils/internal/xarray_utils.py +439 -0
  46. tobac/utils/mask.py +364 -0
  47. tobac/utils/periodic_boundaries.py +419 -0
  48. tobac/wrapper.py +244 -0
  49. tobac-1.6.2.dist-info/METADATA +154 -0
  50. tobac-1.6.2.dist-info/RECORD +53 -0
  51. tobac-1.6.2.dist-info/WHEEL +5 -0
  52. tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
  53. tobac-1.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1316 @@
1
+ """Provide segmentation techniques.
2
+
3
+ Segmentation techniques are used to associate areas or volumes to each
4
+ identified feature. The segmentation is implemented using watershedding
5
+ techniques from the field of image processing with a fixed threshold
6
+ value. This value has to be set specifically for every type of input
7
+ data and application. The segmentation can be performed for both
8
+ two-dimensional and three-dimensional data. At each timestep, a marker
9
+ is set at the position (weighted mean center) of each feature identified
10
+ in the detection step in an array otherwise filled with zeros. In case
11
+ of the three-dimentional watershedding, all cells in the column above
12
+ the weighted mean center position of the identified features fulfilling
13
+ the threshold condition are set to the respective marker. The algorithm
14
+ then fills the area (2D) or volume (3D) based on the input field
15
+ starting from these markers until reaching the threshold. If two or more
16
+ features are directly connected, the border runs along the
17
+ watershed line between the two regions. This procedure creates a mask
18
+ that has the same form as the input data, with the corresponding integer
19
+ number at all grid points that belong to a feature, else with zero. This
20
+ mask can be conveniently and efficiently used to select the volume of each
21
+ feature at a specific time step for further analysis or visialization.
22
+
23
+ References
24
+ ----------
25
+ .. Heikenfeld, M., Marinescu, P. J., Christensen, M.,
26
+ Watson-Parris, D., Senf, F., van den Heever, S. C.
27
+ & Stier, P. (2019). tobac 1.2: towards a flexible
28
+ framework for tracking and analysis of clouds in
29
+ diverse datasets. Geoscientific Model Development,
30
+ 12(11), 4551-4570.
31
+ """
32
+
33
+ from __future__ import annotations
34
+ import copy
35
+ import logging
36
+ import datetime
37
+ import warnings
38
+
39
+ import iris.cube
40
+ import xarray as xr
41
+ from typing_extensions import Literal
42
+ from typing import Union, Callable, Optional
43
+
44
+ import skimage
45
+ import numpy as np
46
+ import pandas as pd
47
+
48
+ from tobac.utils import periodic_boundaries as pbc_utils
49
+ from tobac.utils import internal as internal_utils
50
+ from tobac.utils import get_statistics
51
+ from tobac.utils import decorators
52
+ from tobac.utils.generators import field_and_features_over_time
53
+
54
+
55
+ def add_markers(
56
+ features: pd.DataFrame,
57
+ marker_arr: np.array,
58
+ seed_3D_flag: Literal["column", "box"],
59
+ seed_3D_size: Union[int, tuple[int]] = 5,
60
+ level: Union[None, slice] = None,
61
+ PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = "none",
62
+ ) -> np.array:
63
+ """Adds markers for watershedding using the `features` dataframe
64
+ to the marker_arr.
65
+ :hidden:
66
+ Parameters
67
+ ----------
68
+ features: pandas.DataFrame
69
+ Features for one point in time to add as markers.
70
+ marker_arr: 2D or 3D array-like
71
+ Array to add the markers to. Assumes a (z, y, x) configuration.
72
+ seed_3D_flag: str('column', 'box')
73
+ Seed 3D field at feature positions with either the full column
74
+ or a box of user-set size
75
+ seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
76
+ This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
77
+ integer (units of number of pixels), the seed box is identical in all dimensions.
78
+ If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
79
+ Note: we strongly recommend the use of odd numbers for this. If you give
80
+ an even number, your seed box will be biased and not centered
81
+ around the feature.
82
+ Note: if two seed boxes overlap, the feature that is seeded will be the
83
+ closer feature.
84
+ level: slice or None
85
+ If `seed_3D_flag` is 'column', the levels at which to seed the
86
+ cells for the watershedding algorithm. If None, seeds all levels.
87
+ PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
88
+ Sets whether to use periodic boundaries, and if so in which directions.
89
+ 'none' means that we do not have periodic boundaries
90
+ 'hdim_1' means that we are periodic along hdim1
91
+ 'hdim_2' means that we are periodic along hdim2
92
+ 'both' means that we are periodic along both horizontal dimensions
93
+
94
+ Returns
95
+ -------
96
+ 2D or 3D array like (same type as `marker_arr`)
97
+ The marker array
98
+ """
99
+ if seed_3D_flag not in ["column", "box"]:
100
+ raise ValueError('seed_3D_flag must be either "column" or "box"')
101
+
102
+ # What marker number is the background? Assumed 0.
103
+ bg_marker = 0
104
+
105
+ if level is None:
106
+ level = slice(None)
107
+
108
+ if len(marker_arr.shape) == 3:
109
+ is_3D = True
110
+ z_len = marker_arr.shape[0]
111
+ h1_len = marker_arr.shape[1]
112
+ h2_len = marker_arr.shape[2]
113
+
114
+ else:
115
+ is_3D = False
116
+ z_len = 0
117
+ h1_len = marker_arr.shape[0]
118
+ h2_len = marker_arr.shape[1]
119
+ # transpose to 3D array to make things easier.
120
+ marker_arr = marker_arr[np.newaxis, :, :]
121
+
122
+ if seed_3D_flag == "column":
123
+ for _, row in features.iterrows():
124
+ # Offset marker locations by 0.5 to find nearest pixel
125
+ marker_arr[
126
+ level,
127
+ int(row["hdim_1"] + 0.5) % h1_len,
128
+ int(row["hdim_2"] + 0.5) % h2_len,
129
+ ] = row["feature"]
130
+
131
+ elif seed_3D_flag == "box":
132
+ # Get the size of the seed box from the input parameter
133
+ try:
134
+ if is_3D:
135
+ seed_z = seed_3D_size[0]
136
+ start_num = 1
137
+ else:
138
+ start_num = 0
139
+ seed_h1 = seed_3D_size[start_num]
140
+ seed_h2 = seed_3D_size[start_num + 1]
141
+ except TypeError:
142
+ # Not iterable, assume int.
143
+ seed_z = seed_3D_size
144
+ seed_h1 = seed_3D_size
145
+ seed_h2 = seed_3D_size
146
+
147
+ for _, row in features.iterrows():
148
+ if is_3D:
149
+ # If we have a 3D input and we need to do box seeding
150
+ # we need to have 3D features.
151
+ try:
152
+ row["vdim"]
153
+ except KeyError:
154
+ raise ValueError(
155
+ "For Box seeding on 3D segmentation,"
156
+ " you must have a 3D input source."
157
+ )
158
+
159
+ # Because we don't support PBCs on the vertical axis,
160
+ # this is simple- just go in the seed_z/2 points around the
161
+ # vdim of the feature, up to the limits of the array.
162
+ if is_3D:
163
+ z_seed_start = int(np.max([0, np.ceil(row["vdim"] - seed_z / 2)]))
164
+ z_seed_end = int(np.min([z_len, np.ceil(row["vdim"] + seed_z / 2)]))
165
+ else:
166
+ z_seed_start = 0
167
+ z_seed_end = 1
168
+ # For the horizontal dimensions, it's more complicated if we have
169
+ # PBCs.
170
+ hdim_1_min = int(np.ceil(row["hdim_1"] - seed_h1 / 2))
171
+ hdim_1_max = int(np.ceil(row["hdim_1"] + seed_h1 / 2))
172
+ hdim_2_min = int(np.ceil(row["hdim_2"] - seed_h2 / 2))
173
+ hdim_2_max = int(np.ceil(row["hdim_2"] + seed_h2 / 2))
174
+
175
+ all_seed_boxes = pbc_utils.get_pbc_coordinates(
176
+ h1_min=0,
177
+ h1_max=h1_len,
178
+ h2_min=0,
179
+ h2_max=h2_len,
180
+ h1_start_coord=hdim_1_min,
181
+ h1_end_coord=hdim_1_max,
182
+ h2_start_coord=hdim_2_min,
183
+ h2_end_coord=hdim_2_max,
184
+ PBC_flag=PBC_flag,
185
+ )
186
+ # Build distance function ahead of time, 3D always true as we then reduce
187
+ dist_func = pbc_utils.build_distance_function(
188
+ 0, h1_len, 0, h2_len, PBC_flag, True
189
+ )
190
+ for seed_box in all_seed_boxes:
191
+ # Need to see if there are any other points seeded
192
+ # in this seed box first.
193
+ curr_box_markers = marker_arr[
194
+ z_seed_start:z_seed_end,
195
+ seed_box[0] : seed_box[1],
196
+ seed_box[2] : seed_box[3],
197
+ ]
198
+ all_feats_in_box = np.unique(curr_box_markers)
199
+ if np.any(curr_box_markers != bg_marker):
200
+ # If we have non-background points already seeded,
201
+ # we need to find the best way to seed them.
202
+ # Currently seeding with the closest point.
203
+ # Loop through all points in the box
204
+ with np.nditer(curr_box_markers, flags=["multi_index"]) as it:
205
+ for curr_box_pt in it:
206
+ # Get its global index so that we can calculate
207
+ # distance and set the array.
208
+ local_index = it.multi_index
209
+ global_index = (
210
+ local_index[0] + z_seed_start,
211
+ local_index[1] + seed_box[0],
212
+ local_index[2] + seed_box[2],
213
+ )
214
+
215
+ # If it's a background marker, we can just set it
216
+ # with the feature we're working on.
217
+ if curr_box_pt == bg_marker:
218
+ marker_arr[global_index] = row["feature"]
219
+ continue
220
+ # it has another feature in it. Calculate the distance
221
+ # from its current set feature and the new feature.
222
+ if is_3D:
223
+ curr_coord = np.array(
224
+ (row["vdim"], row["hdim_1"], row["hdim_2"])
225
+ )
226
+ else:
227
+ curr_coord = np.array((0, row["hdim_1"], row["hdim_2"]))
228
+
229
+ dist_from_curr_pt = dist_func(
230
+ np.array(global_index), curr_coord
231
+ )
232
+
233
+ # This is technically an O(N^2) operation, but
234
+ # hopefully performance isn't too bad as this should
235
+ # be rare.
236
+ orig_row = features[
237
+ features["feature"] == curr_box_pt
238
+ ].iloc[0]
239
+ if is_3D:
240
+ orig_coord = np.array(
241
+ (
242
+ orig_row["vdim"],
243
+ orig_row["hdim_1"],
244
+ orig_row["hdim_2"],
245
+ )
246
+ )
247
+ else:
248
+ orig_coord = np.array(
249
+ (0, orig_row["hdim_1"], orig_row["hdim_2"])
250
+ )
251
+ dist_from_orig_pt = dist_func(
252
+ np.array(global_index), orig_coord
253
+ )
254
+ # The current point center is further away
255
+ # than the original point center, so do nothing
256
+ if dist_from_curr_pt > dist_from_orig_pt:
257
+ continue
258
+ else:
259
+ # the current point center is closer.
260
+ marker_arr[global_index] = row["feature"]
261
+ # completely unseeded region so far.
262
+ else:
263
+ marker_arr[
264
+ z_seed_start:z_seed_end,
265
+ seed_box[0] : seed_box[1],
266
+ seed_box[2] : seed_box[3],
267
+ ] = row["feature"]
268
+
269
+ # If we aren't 3D, transpose back.
270
+ if not is_3D:
271
+ marker_arr = marker_arr[0, :, :]
272
+
273
+ return marker_arr
274
+
275
+
276
+ def segmentation_3D(
277
+ features,
278
+ field,
279
+ dxy,
280
+ threshold=3e-3,
281
+ target="maximum",
282
+ level=None,
283
+ method="watershed",
284
+ max_distance=None,
285
+ PBC_flag="none",
286
+ seed_3D_flag="column",
287
+ statistic=None,
288
+ ):
289
+ """Wrapper for the segmentation()-function."""
290
+
291
+ return segmentation(
292
+ features,
293
+ field,
294
+ dxy,
295
+ threshold=threshold,
296
+ target=target,
297
+ level=level,
298
+ method=method,
299
+ max_distance=max_distance,
300
+ PBC_flag=PBC_flag,
301
+ seed_3D_flag=seed_3D_flag,
302
+ statistic=statistic,
303
+ )
304
+
305
+
306
+ def segmentation_2D(
307
+ features,
308
+ field,
309
+ dxy,
310
+ threshold=3e-3,
311
+ target="maximum",
312
+ level=None,
313
+ method="watershed",
314
+ max_distance=None,
315
+ PBC_flag="none",
316
+ seed_3D_flag="column",
317
+ statistic=None,
318
+ ):
319
+ """Wrapper for the segmentation()-function."""
320
+ return segmentation(
321
+ features,
322
+ field,
323
+ dxy,
324
+ threshold=threshold,
325
+ target=target,
326
+ level=level,
327
+ method=method,
328
+ max_distance=max_distance,
329
+ PBC_flag=PBC_flag,
330
+ seed_3D_flag=seed_3D_flag,
331
+ statistic=statistic,
332
+ )
333
+
334
+
335
+ @decorators.iris_to_xarray()
336
+ def segmentation_timestep(
337
+ field_in: xr.DataArray,
338
+ features_in: pd.DataFrame,
339
+ dxy: float,
340
+ threshold: float = 3e-3,
341
+ target: Literal["maximum", "minimum"] = "maximum",
342
+ level: Union[None, slice] = None,
343
+ method: Literal["watershed"] = "watershed",
344
+ max_distance: Union[None, float] = None,
345
+ vertical_coord: Union[str, None] = None,
346
+ PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = "none",
347
+ seed_3D_flag: Literal["column", "box"] = "column",
348
+ seed_3D_size: Union[int, tuple[int]] = 5,
349
+ segment_number_below_threshold: int = 0,
350
+ segment_number_unassigned: int = 0,
351
+ statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
352
+ ) -> tuple[iris.cube.Cube, pd.DataFrame]:
353
+ """Perform watershedding for an individual time step of the data. Works
354
+ for both 2D and 3D data
355
+ :hidden:
356
+ Parameters
357
+ ----------
358
+ field_in : xr.DataArray
359
+ Input field to perform the watershedding on (2D or 3D for one
360
+ specific point in time).
361
+
362
+ features_in : pandas.DataFrame
363
+ Features for one specific point in time.
364
+
365
+ dxy : float
366
+ Grid spacing of the input data in metres
367
+
368
+ threshold : float, optional
369
+ Threshold for the watershedding field to be used for the mask. The watershedding is exclusive of the threshold value, i.e. values greater (less) than the threshold are included in the target region, while values equal to the threshold value are excluded.
370
+ Default is 3e-3.
371
+
372
+ target : {'maximum', 'minimum'}, optional
373
+ Flag to determine if tracking is targeting minima or maxima in
374
+ the data to determine from which direction to approach the threshold
375
+ value. Default is 'maximum'.
376
+
377
+ level : slice of iris.cube.Cube, optional
378
+ Levels at which to seed the cells for the watershedding
379
+ algorithm. Default is None.
380
+
381
+ method : {'watershed'}, optional
382
+ Flag determining the algorithm to use (currently watershedding
383
+ implemented).
384
+
385
+ max_distance : float, optional
386
+ Maximum distance from a marker allowed to be classified as
387
+ belonging to that cell in meters. Default is None.
388
+
389
+ vertical_coord : str, optional
390
+ Vertical coordinate in 3D input data. If None, input is checked for
391
+ one of {'z', 'model_level_number', 'altitude','geopotential_height'}
392
+ as a likely coordinate name
393
+
394
+ PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
395
+ Sets whether to use periodic boundaries, and if so in which directions.
396
+ 'none' means that we do not have periodic boundaries
397
+ 'hdim_1' means that we are periodic along hdim1
398
+ 'hdim_2' means that we are periodic along hdim2
399
+ 'both' means that we are periodic along both horizontal dimensions
400
+ seed_3D_flag: str('column', 'box')
401
+ Seed 3D field at feature positions with either the full column (default)
402
+ or a box of user-set size
403
+ seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
404
+ This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
405
+ integer (units of number of pixels), the seed box is identical in all dimensions.
406
+ If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
407
+ Note: we strongly recommend the use of odd numbers for this. If you give
408
+ an even number, your seed box will be biased and not centered
409
+ around the feature.
410
+ Note: if two seed boxes overlap, the feature that is seeded will be the
411
+ closer feature.
412
+ segment_number_below_threshold: int
413
+ the marker to use to indicate a segmentation point is below the threshold.
414
+ segment_number_unassigned: int
415
+ the marker to use to indicate a segmentation point is above the threshold but unsegmented.
416
+ This can be the same as `segment_number_below_threshold`, but can also be set separately.
417
+ statistics: boolean, optional
418
+ Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
419
+
420
+ Returns
421
+ -------
422
+ segmentation_out : xarray.DataArray
423
+ Mask, 0 outside and integer numbers according to track
424
+ inside the objects.
425
+
426
+ features_out : pandas.DataFrame
427
+ Feature dataframe including the number of cells (2D or 3D) in
428
+ the segmented area/volume of the feature at the timestep.
429
+
430
+ Raises
431
+ ------
432
+ ValueError
433
+ If target is neither 'maximum' nor 'minimum'.
434
+
435
+ If vertical_coord is not in {'auto', 'z', 'model_level_number',
436
+ 'altitude', geopotential_height'}.
437
+
438
+ If there is more than one coordinate name.
439
+
440
+ If the spatial dimension is neither 2 nor 3.
441
+
442
+ If method is not 'watershed'.
443
+
444
+ """
445
+
446
+ # The location of watershed within skimage submodules changes with v0.19, but I've kept both for backward compatibility for now
447
+ try:
448
+ from skimage.segmentation import watershed
449
+ except ImportError:
450
+ from skimage.morphology import watershed
451
+ # from skimage.segmentation import random_walker
452
+ from scipy.ndimage import distance_transform_edt
453
+ from copy import deepcopy
454
+
455
+ if max_distance is not None and PBC_flag in ["hdim_1", "hdim_2", "both"]:
456
+ raise NotImplementedError("max_distance not yet implemented for PBCs")
457
+
458
+ # How many dimensions are we using?
459
+ if field_in.ndim == 2:
460
+ hdim_1_axis = 0
461
+ hdim_2_axis = 1
462
+ vertical_coord_axis = None
463
+ elif field_in.ndim == 3:
464
+ vertical_axis = internal_utils.find_vertical_coord_name(
465
+ field_in, vertical_coord=vertical_coord
466
+ )
467
+ vertical_coord_axis = internal_utils.find_axis_from_coord(
468
+ field_in, vertical_axis
469
+ )
470
+ # Once we know the vertical coordinate, we can resolve the
471
+ # horizontal coordinates
472
+ # To make things easier, we will transpose the axes
473
+ # so that they are consistent.
474
+
475
+ hdim_1_axis, hdim_2_axis = internal_utils.find_hdim_axes_3D(
476
+ field_in, vertical_axis=vertical_coord_axis
477
+ )
478
+ else:
479
+ raise ValueError(
480
+ "Segmentation routine only possible with 2 or 3 spatial dimensions"
481
+ )
482
+
483
+ if segment_number_below_threshold > 0 or segment_number_unassigned > 0:
484
+ raise ValueError("Below/above threshold markers must be <=0")
485
+
486
+ # copy feature dataframe for output
487
+ features_out = deepcopy(features_in)
488
+ # Create cube of the same dimensions and coordinates as input data to store mask:
489
+ segmentation_out = xr.zeros_like(field_in, dtype=int)
490
+ segmentation_out = segmentation_out.rename("segmentation_mask")
491
+
492
+ # Get raw array from input data:
493
+ data = field_in.values
494
+ is_3D_seg = len(data.shape) == 3
495
+ # To make things easier, we will transpose the axes
496
+ # so that they are consistent: z, hdim_1, hdim_2
497
+ # We only need to do this for 3D.
498
+ transposed_data = False
499
+ if is_3D_seg:
500
+ if vertical_coord_axis == 1:
501
+ data = np.transpose(data, axes=(1, 0, 2))
502
+ transposed_data = True
503
+ elif vertical_coord_axis == 2:
504
+ data = np.transpose(data, axes=(2, 0, 1))
505
+ transposed_data = True
506
+
507
+ # Set level at which to create "Seed" for each feature in the case of 3D watershedding:
508
+ # If none, use all levels (later reduced to the ones fulfilling the theshold conditions)
509
+ if level is None:
510
+ level = slice(None)
511
+
512
+ # transform max_distance in metres to distance in pixels:
513
+ if max_distance is not None:
514
+ max_distance_pixel = np.ceil(max_distance / dxy)
515
+
516
+ # mask data outside region above/below threshold and invert data if tracking maxima:
517
+ if target == "maximum":
518
+ unmasked = data > threshold
519
+ data_segmentation = -1 * data
520
+ elif target == "minimum":
521
+ unmasked = data < threshold
522
+ data_segmentation = data
523
+ else:
524
+ raise ValueError("unknown type of target")
525
+
526
+ # set markers at the positions of the features:
527
+ markers = np.zeros(unmasked.shape).astype(np.int32)
528
+ markers = add_markers(
529
+ features_in, markers, seed_3D_flag, seed_3D_size, level, PBC_flag
530
+ )
531
+ # set markers in cells not fulfilling threshold condition to zero:
532
+ markers[~unmasked] = 0
533
+ # marker_vals = np.unique(markers)
534
+
535
+ # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm
536
+ data_segmentation = np.array(data_segmentation)
537
+ unmasked = np.array(unmasked)
538
+
539
+ # perform segmentation:
540
+ if method == "watershed":
541
+ segmentation_mask = watershed(
542
+ np.array(data_segmentation), markers.astype(np.int32), mask=unmasked
543
+ )
544
+ else:
545
+ raise ValueError("unknown method, must be watershed")
546
+
547
+ # remove everything from the individual masks that is more than max_distance_pixel away from the markers
548
+ if max_distance is not None:
549
+ D = distance_transform_edt((markers == 0))
550
+ segmentation_mask[
551
+ np.bitwise_and(segmentation_mask > 0, D > max_distance_pixel)
552
+ ] = 0
553
+
554
+ # mask all segmentation_mask points below threshold as -1
555
+ # to differentiate from those unmasked points NOT filled by watershedding
556
+ # TODO: allow user to specify
557
+ points_below_threshold_val = -1
558
+ segmentation_mask[~unmasked] = points_below_threshold_val
559
+
560
+ hdim1_min = 0
561
+ hdim1_max = segmentation_mask.shape[hdim_1_axis] - 1
562
+ hdim2_min = 0
563
+ hdim2_max = segmentation_mask.shape[hdim_2_axis] - 1
564
+
565
+ # all options that involve dealing with periodic boundaries
566
+ pbc_options = ["hdim_1", "hdim_2", "both"]
567
+ # Only run this if we need to deal with PBCs
568
+ if PBC_flag in pbc_options:
569
+ if not is_3D_seg:
570
+ # let's transpose segmentation_mask to a 1,y,x array to make calculations etc easier.
571
+ segmentation_mask = segmentation_mask[np.newaxis, :, :]
572
+ unmasked = unmasked[np.newaxis, :, :]
573
+ data_segmentation = data_segmentation[np.newaxis, :, :]
574
+ vertical_coord_axis = 0
575
+ hdim_1_axis = 1
576
+ hdim_2_axis = 2
577
+
578
+ seg_mask_unseeded = np.zeros(segmentation_mask.shape)
579
+
580
+ # Return all indices where segmentation field == 0
581
+ # meaning unfilled but above threshold
582
+ # TODO: is there a way to do this without np.where?
583
+ vdim_unf, hdim1_unf, hdim2_unf = np.where(segmentation_mask == 0)
584
+ seg_mask_unseeded[vdim_unf, hdim1_unf, hdim2_unf] = 1
585
+
586
+ # create labeled field of unfilled, unseeded features
587
+ labels_unseeded, label_num = skimage.measure.label(
588
+ seg_mask_unseeded, return_num=True
589
+ )
590
+
591
+ markers_2 = np.zeros(data_segmentation.shape, dtype=np.int32)
592
+
593
+ # PBC marker seeding approach
594
+ # loop thru LB points, then check if fillable region (labels_unseeded > 0) and seed
595
+ # then check if point on other side of boundary is > 0 in segmentation_mask and
596
+ # adjust where needed
597
+ """
598
+ "First pass" at seeding features across the boundaries. This first pass will bring in
599
+ eligible (meaning values that are higher than threshold) but not previously watershedded
600
+ points across the boundary by seeding them with the appropriate feature across the boundary.
601
+
602
+ Later, we will run the second pass or "buddy box" approach that handles cases where points across the boundary
603
+ have been watershedded already.
604
+ """
605
+ if PBC_flag == "hdim_1" or PBC_flag == "both":
606
+ check_add_unseeded_across_bdrys(
607
+ "hdim_1",
608
+ segmentation_mask,
609
+ labels_unseeded,
610
+ hdim1_min,
611
+ hdim1_max,
612
+ markers_2,
613
+ )
614
+ if PBC_flag == "hdim_2" or PBC_flag == "both":
615
+ check_add_unseeded_across_bdrys(
616
+ "hdim_2",
617
+ segmentation_mask,
618
+ labels_unseeded,
619
+ hdim2_min,
620
+ hdim2_max,
621
+ markers_2,
622
+ )
623
+
624
+ # Deal with the opposite corner only
625
+ if PBC_flag == "both":
626
+ # TODO: This seems quite slow, is there scope for further speedup?
627
+ for vdim_ind in range(0, segmentation_mask.shape[0]):
628
+ for hdim1_ind in [hdim1_min, hdim1_max]:
629
+ for hdim2_ind in [hdim2_min, hdim2_max]:
630
+ # If this point is unseeded and unlabeled
631
+ if labels_unseeded[vdim_ind, hdim1_ind, hdim2_ind] == 0:
632
+ continue
633
+
634
+ # Find the opposite point in hdim1 space
635
+ hdim1_opposite_corner = (
636
+ hdim1_min if hdim1_ind == hdim1_max else hdim1_max
637
+ )
638
+ hdim2_opposite_corner = (
639
+ hdim2_min if hdim2_ind == hdim2_max else hdim2_max
640
+ )
641
+ if (
642
+ segmentation_mask[
643
+ vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner
644
+ ]
645
+ <= 0
646
+ ):
647
+ continue
648
+
649
+ markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[
650
+ vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner
651
+ ]
652
+
653
+ markers_2[~unmasked] = 0
654
+
655
+ if method == "watershed":
656
+ segmentation_mask_2 = watershed(
657
+ data_segmentation, markers_2.astype(np.int32), mask=unmasked
658
+ )
659
+ else:
660
+ raise ValueError("unknown method, must be watershed")
661
+
662
+ # Sum up original mask and secondary PBC-mask for full PBC segmentation
663
+ segmentation_mask_3 = segmentation_mask + segmentation_mask_2
664
+
665
+ # Secondary seeding complete, now blending periodic boundaries
666
+ # keep segmentation mask fields for now so we can save these all later
667
+ # for demos of changes, otherwise, could add deletion for memory efficiency, e.g.
668
+
669
+ # del segmentation_mask
670
+ # del segmentation_mask_2
671
+ # gc.collect()
672
+
673
+ # update mask coord regions
674
+
675
+ """
676
+ Now, start the second round of watershedding- the "buddy box" approach.
677
+ 'buddies' array contains features of interest and any neighbors that are across the boundary or
678
+ otherwise have lateral and/or diagonal physical contact with that label.
679
+ The "buddy box" is also used for multiple crossings of the boundaries with segmented features.
680
+ """
681
+
682
+ # TODO: this is a very inelegant way of handling this problem. We should wrap up the pure
683
+ # segmentation routines and simply call them again here with the same parameters.
684
+ reg_props_dict = internal_utils.get_label_props_in_dict(segmentation_mask_3)
685
+
686
+ if len(reg_props_dict) != 0:
687
+ (
688
+ curr_reg_inds,
689
+ z_reg_inds,
690
+ y_reg_inds,
691
+ x_reg_inds,
692
+ ) = internal_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict)
693
+
694
+ wall_labels = np.array([])
695
+
696
+ w_wall = np.unique(segmentation_mask_3[:, :, 0])
697
+ wall_labels = np.append(wall_labels, w_wall)
698
+
699
+ s_wall = np.unique(segmentation_mask_3[:, 0, :])
700
+ wall_labels = np.append(wall_labels, s_wall)
701
+
702
+ wall_labels = np.unique(wall_labels)
703
+ wall_labels = wall_labels[(wall_labels) > 0].astype(int)
704
+
705
+ # Loop through all segmentation mask labels on the wall
706
+ for cur_idx in wall_labels:
707
+ vdim_indices = z_reg_inds[cur_idx]
708
+ hdim1_indices = y_reg_inds[cur_idx]
709
+ hdim2_indices = x_reg_inds[cur_idx]
710
+
711
+ # start buddies array with feature of interest
712
+ buddies = np.array([cur_idx], dtype=int)
713
+ # Loop through all points in the segmentation mask that we're intertested in
714
+ for label_z, label_y, label_x in zip(
715
+ vdim_indices, hdim1_indices, hdim2_indices
716
+ ):
717
+ # check if this is the special case of being a corner point.
718
+ # if it's doubly periodic AND on both x and y boundaries, it's a corner point
719
+ # and we have to look at the other corner.
720
+ # here, we will only look at the corner point and let the below deal with x/y only.
721
+ if PBC_flag == "both" and (
722
+ np.any(label_y == [hdim1_min, hdim1_max])
723
+ and np.any(label_x == [hdim2_min, hdim2_max])
724
+ ):
725
+ # adjust x and y points to the other side
726
+ y_val_alt = pbc_utils.adjust_pbc_point(
727
+ label_y, hdim1_min, hdim1_max
728
+ )
729
+ x_val_alt = pbc_utils.adjust_pbc_point(
730
+ label_x, hdim2_min, hdim2_max
731
+ )
732
+ label_on_corner = segmentation_mask_3[label_z, y_val_alt, x_val_alt]
733
+
734
+ if label_on_corner >= 0:
735
+ # add opposite-corner buddy if it exists
736
+ buddies = np.append(buddies, label_on_corner)
737
+
738
+ # on the hdim1 boundary and periodic on hdim1
739
+ if (PBC_flag == "hdim_1" or PBC_flag == "both") and np.any(
740
+ label_y == [hdim1_min, hdim1_max]
741
+ ):
742
+ y_val_alt = pbc_utils.adjust_pbc_point(
743
+ label_y, hdim1_min, hdim1_max
744
+ )
745
+
746
+ # get the label value on the opposite side
747
+ label_alt = segmentation_mask_3[label_z, y_val_alt, label_x]
748
+
749
+ # if it's labeled and not already been dealt with
750
+ if label_alt >= 0:
751
+ # add above/below buddy if it exists
752
+ buddies = np.append(buddies, label_alt)
753
+
754
+ if (PBC_flag == "hdim_2" or PBC_flag == "both") and np.any(
755
+ label_x == [hdim2_min, hdim2_max]
756
+ ):
757
+ x_val_alt = pbc_utils.adjust_pbc_point(
758
+ label_x, hdim2_min, hdim2_max
759
+ )
760
+
761
+ # get the seg value on the opposite side
762
+ label_alt = segmentation_mask_3[label_z, label_y, x_val_alt]
763
+
764
+ # if it's labeled and not already been dealt with
765
+ if label_alt >= 0:
766
+ # add left/right buddy if it exists
767
+ buddies = np.append(buddies, label_alt)
768
+
769
+ buddies = np.unique(buddies)
770
+
771
+ if np.all(buddies == cur_idx):
772
+ continue
773
+ else:
774
+ inter_buddies, feat_inds, buddy_inds = np.intersect1d(
775
+ features_in.feature.values[:], buddies, return_indices=True
776
+ )
777
+
778
+ # Get features that are needed for the buddy box
779
+ buddy_features = deepcopy(features_in.iloc[feat_inds])
780
+
781
+ # create arrays to contain points of all buddies
782
+ # and their transpositions/transformations
783
+ # for use in Buddy Box space
784
+
785
+ # z,y,x points in the grid domain with no transformations
786
+ # NOTE: when I think about it, not sure if these are really needed
787
+ # as we use the y_a1/x_a1 points for the data transposition
788
+ # to the buddy box rather than these and their z2/y2/x2 counterparts
789
+ buddy_z = np.array([], dtype=int)
790
+ buddy_y = np.array([], dtype=int)
791
+ buddy_x = np.array([], dtype=int)
792
+
793
+ # z,y,x points from the grid domain WHICH MAY OR MAY NOT BE TRANSFORMED
794
+ # so as to be continuous/contiguous across a grid boundary for that dimension
795
+ # (e.g., instead of [1496,1497,0,1,2,3] it would be [1496,1497,1498,1499,1500,1501])
796
+ buddy_z2 = np.array([], dtype=int)
797
+ buddy_y2 = np.array([], dtype=int)
798
+ buddy_x2 = np.array([], dtype=int)
799
+
800
+ # These are just for feature positions and are in z2/y2/x2 space
801
+ # (may or may not be within real grid domain)
802
+ # so that when the buddy box is constructed, seeding is done properly
803
+ # in the buddy box space
804
+
805
+ # NOTE: We may not need this, as we already do this editing the buddy_features df
806
+ # and an iterrows call through this is what's used to actually seed the buddy box
807
+
808
+ buddy_looper = 0
809
+
810
+ # loop thru buddies
811
+ for buddy in buddies:
812
+ if buddy == 0:
813
+ continue
814
+ # isolate feature from set of buddies
815
+ buddy_feat = features_in[features_in["feature"] == buddy].iloc[0]
816
+
817
+ # transform buddy feature position if needed for positioning in z2/y2/x2 space
818
+ # MAY be redundant with what is done just below here
819
+ yf2 = pbc_utils.transfm_pbc_point(
820
+ int(buddy_feat.hdim_1), hdim1_min, hdim1_max
821
+ )
822
+ xf2 = pbc_utils.transfm_pbc_point(
823
+ int(buddy_feat.hdim_2), hdim2_min, hdim2_max
824
+ )
825
+
826
+ # edit value in buddy_features dataframe
827
+ buddy_features.hdim_1.values[buddy_looper] = (
828
+ pbc_utils.transfm_pbc_point(
829
+ float(buddy_feat.hdim_1), hdim1_min, hdim1_max
830
+ )
831
+ )
832
+ buddy_features.hdim_2.values[buddy_looper] = (
833
+ pbc_utils.transfm_pbc_point(
834
+ float(buddy_feat.hdim_2), hdim2_min, hdim2_max
835
+ )
836
+ )
837
+
838
+ buddy_looper = buddy_looper + 1
839
+ # Create 1:1 map through actual domain points and continuous/contiguous points
840
+ # used to identify buddy box dimension lengths for its construction
841
+ for z, y, x in zip(
842
+ z_reg_inds[buddy], y_reg_inds[buddy], x_reg_inds[buddy]
843
+ ):
844
+ buddy_z = np.append(buddy_z, z)
845
+ buddy_y = np.append(buddy_y, y)
846
+ buddy_x = np.append(buddy_x, x)
847
+
848
+ y2 = pbc_utils.transfm_pbc_point(y, hdim1_min, hdim1_max)
849
+ x2 = pbc_utils.transfm_pbc_point(x, hdim2_min, hdim2_max)
850
+
851
+ buddy_z2 = np.append(buddy_z2, z)
852
+ buddy_y2 = np.append(buddy_y2, y2)
853
+ buddy_x2 = np.append(buddy_x2, x2)
854
+
855
+ # Buddy Box!
856
+ # Identify mins and maxes of Buddy Box continuous points range
857
+ # so that box of correct size can be constructed
858
+ bbox_zstart = int(np.min(buddy_z2))
859
+ bbox_ystart = int(np.min(buddy_y2))
860
+ bbox_xstart = int(np.min(buddy_x2))
861
+ bbox_zend = int(np.max(buddy_z2) + 1)
862
+ bbox_yend = int(np.max(buddy_y2) + 1)
863
+ bbox_xend = int(np.max(buddy_x2) + 1)
864
+
865
+ bbox_zsize = bbox_zend - bbox_zstart
866
+ bbox_ysize = bbox_yend - bbox_ystart
867
+ bbox_xsize = bbox_xend - bbox_xstart
868
+
869
+ # Creation of actual Buddy Box space for transposition
870
+ # of data in domain and re-seeding with Buddy feature markers
871
+ buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize))
872
+
873
+ # need to loop thru ALL z,y,x inds in buddy box
874
+ # not just the ones that have nonzero seg mask values
875
+
876
+ # "_a1" points are re-transformations from the continuous buddy box points
877
+ # back to original grid/domain space to ensure that the correct data are
878
+ # copied to the proper Buddy Box locations
879
+ for z in range(bbox_zstart, bbox_zend):
880
+ for y in range(bbox_ystart, bbox_yend):
881
+ for x in range(bbox_xstart, bbox_xend):
882
+ z_a1 = z
883
+ if y > hdim1_max:
884
+ y_a1 = y - (hdim1_max + 1)
885
+ else:
886
+ y_a1 = y
887
+
888
+ if x > hdim2_max:
889
+ x_a1 = x - (hdim2_max + 1)
890
+ else:
891
+ x_a1 = x
892
+ if is_3D_seg:
893
+ buddy_rgn[
894
+ z - bbox_zstart, y - bbox_ystart, x - bbox_xstart
895
+ ] = field_in.data[z_a1, y_a1, x_a1]
896
+ else:
897
+ buddy_rgn[
898
+ z - bbox_zstart, y - bbox_ystart, x - bbox_xstart
899
+ ] = field_in.data[y_a1, x_a1]
900
+
901
+ # Update buddy_features feature positions to correspond to buddy box space
902
+ # rather than domain space or continuous/contiguous point space
903
+ if "vdim" not in buddy_features:
904
+ buddy_features["vdim"] = np.zeros(len(buddy_features), dtype=int)
905
+ for buddy_looper in range(0, len(buddy_features)):
906
+ buddy_features.vdim.values[buddy_looper] = (
907
+ buddy_features.vdim.values[buddy_looper] - bbox_zstart
908
+ )
909
+
910
+ buddy_features.hdim_1.values[buddy_looper] = (
911
+ buddy_features.hdim_1.values[buddy_looper] - bbox_ystart
912
+ )
913
+ buddy_features.hdim_2.values[buddy_looper] = (
914
+ buddy_features.hdim_2.values[buddy_looper] - bbox_xstart
915
+ )
916
+
917
+ # Create dask array from input data:
918
+ buddy_data = buddy_rgn
919
+
920
+ # All of the below is the same overarching segmentation procedure as in the original
921
+ # segmentation approach until the line which states
922
+ # "#transform segmentation_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")"
923
+ # It's just performed on the buddy box and its data rather than our full domain
924
+
925
+ # mask data outside region above/below threshold and invert data if tracking maxima:
926
+ if target == "maximum":
927
+ unmasked_buddies = buddy_data > threshold
928
+ buddy_segmentation = -1 * buddy_data
929
+ elif target == "minimum":
930
+ unmasked_buddies = buddy_data < threshold
931
+ buddy_segmentation = buddy_data
932
+ else:
933
+ raise ValueError("unknown type of target")
934
+
935
+ # set markers at the positions of the features:
936
+ buddy_markers = np.zeros(unmasked_buddies.shape).astype(np.int32)
937
+ # Buddy boxes are always without PBCs
938
+ buddy_markers = add_markers(
939
+ buddy_features,
940
+ buddy_markers,
941
+ seed_3D_flag,
942
+ seed_3D_size,
943
+ level,
944
+ PBC_flag="none",
945
+ )
946
+
947
+ # set markers in cells not fulfilling threshold condition to zero:
948
+ buddy_markers[~unmasked_buddies] = 0
949
+
950
+ marker_vals = np.unique(buddy_markers)
951
+
952
+ # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm
953
+ buddy_segmentation = np.array(buddy_segmentation)
954
+ unmasked_buddies = np.array(unmasked_buddies)
955
+
956
+ # perform segmentation:
957
+ if method == "watershed":
958
+ segmentation_mask_4 = watershed(
959
+ np.array(buddy_segmentation),
960
+ buddy_markers.astype(np.int32),
961
+ mask=unmasked_buddies,
962
+ )
963
+
964
+ else:
965
+ raise ValueError("unknown method, must be watershed")
966
+
967
+ # remove everything from the individual masks that is more than max_distance_pixel away from the markers
968
+
969
+ # mask all segmentation_mask points below threshold as -1
970
+ # to differentiate from those unmasked points NOT filled by watershedding
971
+ segmentation_mask_4[~unmasked_buddies] = -1
972
+
973
+ # transform segmentation_mask_4 data back to mask created after PBC first-pass ("segmentation_mask_3")
974
+
975
+ # loop through buddy box inds and analogous seg mask inds
976
+ for z_val in range(bbox_zstart, bbox_zend):
977
+ z_seg = z_val - bbox_zstart
978
+ z_val_o = z_val
979
+ for y_val in range(bbox_ystart, bbox_yend):
980
+ y_seg = y_val - bbox_ystart
981
+ # y_val_o = y_val
982
+ if y_val > hdim1_max:
983
+ y_val_o = y_val - (hdim1_max + 1)
984
+ else:
985
+ y_val_o = y_val
986
+ for x_val in range(bbox_xstart, bbox_xend):
987
+ x_seg = x_val - bbox_xstart
988
+ # x_val_o = x_val
989
+ if x_val > hdim2_max:
990
+ x_val_o = x_val - (hdim2_max + 1)
991
+ else:
992
+ x_val_o = x_val
993
+
994
+ # fix to
995
+ # overwrite IF:
996
+ # 1) feature of interest
997
+ # 2) changing to/from feature of interest or adjacent segmented feature
998
+
999
+ # We don't want to overwrite other features that may be in the
1000
+ # buddy box if not contacting the intersected seg field
1001
+
1002
+ if np.any(
1003
+ segmentation_mask_3[z_val_o, y_val_o, x_val_o] == buddies
1004
+ ) and np.any(
1005
+ segmentation_mask_4.data[z_seg, y_seg, x_seg] == buddies
1006
+ ):
1007
+ # only do updating procedure if old and new values both in buddy set
1008
+ # and values are different
1009
+ if (
1010
+ segmentation_mask_3[z_val_o, y_val_o, x_val_o]
1011
+ != segmentation_mask_4.data[z_seg, y_seg, x_seg]
1012
+ ):
1013
+ segmentation_mask_3[z_val_o, y_val_o, x_val_o] = (
1014
+ segmentation_mask_4.data[z_seg, y_seg, x_seg]
1015
+ )
1016
+ if not is_3D_seg:
1017
+ segmentation_mask_3 = segmentation_mask_3[0]
1018
+
1019
+ segmentation_mask = segmentation_mask_3
1020
+
1021
+ if transposed_data:
1022
+ if vertical_coord_axis == 1:
1023
+ segmentation_mask = np.transpose(segmentation_mask, axes=(1, 0, 2))
1024
+ elif vertical_coord_axis == 2:
1025
+ segmentation_mask = np.transpose(segmentation_mask, axes=(1, 2, 0))
1026
+
1027
+ # Finished PBC checks and new PBC updated segmentation now in segmentation_mask.
1028
+ # Write resulting mask into cube for output
1029
+ wh_below_threshold = segmentation_mask == -1
1030
+ wh_unsegmented = segmentation_mask == 0
1031
+ segmentation_mask[wh_unsegmented] = segment_number_unassigned
1032
+ segmentation_mask[wh_below_threshold] = segment_number_below_threshold
1033
+ segmentation_out.data = segmentation_mask
1034
+
1035
+ # add ncells to feature dataframe with new statistic method
1036
+ features_out = get_statistics(
1037
+ features_out,
1038
+ np.array(segmentation_out.data.copy()),
1039
+ np.array(field_in.data.copy()),
1040
+ statistic={"ncells": np.count_nonzero},
1041
+ default=0,
1042
+ )
1043
+
1044
+ # compute additional statistics, if requested
1045
+ if statistic:
1046
+ features_out = get_statistics(
1047
+ features_out,
1048
+ segmentation_out.data.copy(),
1049
+ field_in.data.copy(),
1050
+ statistic=statistic,
1051
+ )
1052
+
1053
+ return segmentation_out, features_out
1054
+
1055
+
1056
+ def check_add_unseeded_across_bdrys(
1057
+ dim_to_run: str,
1058
+ segmentation_mask: np.array,
1059
+ unseeded_labels: np.array,
1060
+ border_min: int,
1061
+ border_max: int,
1062
+ markers_arr: np.array,
1063
+ inplace: bool = True,
1064
+ ) -> np.array:
1065
+ """Add new markers to unseeded but eligible regions when they are bordering
1066
+ an appropriate boundary.
1067
+ :hidden:
1068
+ Parameters
1069
+ ----------
1070
+ dim_to_run: {'hdim_1', 'hdim_2'}
1071
+ what dimension to run
1072
+ segmentation_mask: np.array
1073
+ the incomming segmentation mask
1074
+ unseeded_labels: np.array
1075
+ The list of labels that are unseeded
1076
+ border_min: int
1077
+ minimum real point in the dimension we are running on
1078
+ border_max: int
1079
+ maximum real point in the dimension we are running on (inclusive)
1080
+ markers_arr: np.array
1081
+ The array of markers to re-run segmentation with
1082
+ inplace: bool
1083
+ whether or not to modify markers_arr in place
1084
+
1085
+ Returns
1086
+ -------
1087
+ markers_arr with new markers added
1088
+
1089
+ """
1090
+
1091
+ # if we are okay modifying the marker array inplace, do that
1092
+ if inplace:
1093
+ markers_out = markers_arr
1094
+ else:
1095
+ # If we can't modify the marker array inplace, make a deep copy.
1096
+ markers_out = copy.deepcopy(markers_arr)
1097
+
1098
+ # identify border points and the loop points depending on what we want to run
1099
+ if dim_to_run == "hdim_1":
1100
+ border_axnum = 1
1101
+ elif dim_to_run == "hdim_2":
1102
+ border_axnum = 2
1103
+ # loop through vertical levels
1104
+ for border_ind, border_opposite in [
1105
+ (border_min, border_max),
1106
+ (border_max, border_min),
1107
+ ]:
1108
+ label_border_pts = np.take(unseeded_labels, border_ind, axis=border_axnum)
1109
+ seg_opp_pts = np.take(segmentation_mask, border_opposite, axis=border_axnum)
1110
+ if dim_to_run == "hdim_1":
1111
+ cond_to_check = np.logical_and(label_border_pts != 0, seg_opp_pts > 0)
1112
+ markers_out[:, border_ind, :][cond_to_check] = seg_opp_pts[cond_to_check]
1113
+
1114
+ elif dim_to_run == "hdim_2":
1115
+ cond_to_check = np.logical_and(label_border_pts != 0, seg_opp_pts > 0)
1116
+ markers_out[:, :, border_ind][cond_to_check] = seg_opp_pts[cond_to_check]
1117
+ return markers_out
1118
+
1119
+
1120
+ @decorators.iris_to_xarray()
1121
+ def segmentation(
1122
+ features: pd.DataFrame,
1123
+ field: xr.DataArray,
1124
+ dxy: float,
1125
+ threshold: float = 3e-3,
1126
+ target: Literal["maximum", "minimum"] = "maximum",
1127
+ level: Union[None, slice] = None,
1128
+ method: Literal["watershed"] = "watershed",
1129
+ max_distance: Union[None, float] = None,
1130
+ vertical_coord: Union[str, None] = None,
1131
+ PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = "none",
1132
+ seed_3D_flag: Literal["column", "box"] = "column",
1133
+ seed_3D_size: Union[int, tuple[int]] = 5,
1134
+ segment_number_below_threshold: int = 0,
1135
+ segment_number_unassigned: int = 0,
1136
+ statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
1137
+ time_padding: Optional[datetime.timedelta] = datetime.timedelta(seconds=0.5),
1138
+ ) -> tuple[xr.DataArray, pd.DataFrame]:
1139
+ """Use watershedding to determine region above a threshold
1140
+ value around initial seeding position for all time steps of
1141
+ the input data. Works both in 2D (based on single seeding
1142
+ point) and 3D and returns a mask with zeros everywhere around
1143
+ the identified regions and the feature id inside the regions.
1144
+
1145
+ Calls segmentation_timestep at each individal timestep of the
1146
+ input data.
1147
+
1148
+ Parameters
1149
+ ----------
1150
+ features : pandas.DataFrame
1151
+ Output from trackpy/maketrack.
1152
+
1153
+ field : iris.cube.Cube or xarray.DataArray
1154
+ Containing the field to perform the watershedding on.
1155
+
1156
+ dxy : float
1157
+ Grid spacing of the input data in meters.
1158
+
1159
+ threshold : float, optional
1160
+ Threshold for the watershedding field to be used for the mask.
1161
+ Default is 3e-3.
1162
+
1163
+ target : {'maximum', 'minimum'}, optional
1164
+ Flag to determine if tracking is targetting minima or maxima in
1165
+ the data. Default is 'maximum'.
1166
+
1167
+ level : slice of iris.cube.Cube, optional
1168
+ Levels at which to seed the cells for the watershedding
1169
+ algorithm. Default is None.
1170
+
1171
+ method : {'watershed'}, optional
1172
+ Flag determining the algorithm to use (currently watershedding
1173
+ implemented). 'random_walk' could be uncommented.
1174
+
1175
+ max_distance : float, optional
1176
+ Maximum distance from a marker allowed to be classified as
1177
+ belonging to that cell in meters. Default is None.
1178
+
1179
+ vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
1180
+ 'geopotential_height'}, optional
1181
+ Name of the vertical coordinate for use in 3D segmentation case
1182
+
1183
+ PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
1184
+ Sets whether to use periodic boundaries, and if so in which directions.
1185
+ 'none' means that we do not have periodic boundaries
1186
+ 'hdim_1' means that we are periodic along hdim1
1187
+ 'hdim_2' means that we are periodic along hdim2
1188
+ 'both' means that we are periodic along both horizontal dimensions
1189
+
1190
+ seed_3D_flag: str('column', 'box')
1191
+ Seed 3D field at feature positions with either the full column (default)
1192
+ or a box of user-set size
1193
+
1194
+ seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
1195
+ This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
1196
+ integer (units of number of pixels), the seed box is identical in all dimensions.
1197
+ If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
1198
+ Note: we strongly recommend the use of odd numbers for this. If you give
1199
+ an even number, your seed box will be biased and not centered
1200
+ around the feature.
1201
+ Note: if two seed boxes overlap, the feature that is seeded will be the
1202
+ closer feature.
1203
+ segment_number_below_threshold: int
1204
+ the marker to use to indicate a segmentation point is below the threshold.
1205
+ segment_number_unassigned: int
1206
+ the marker to use to indicate a segmentation point is above the threshold but unsegmented.
1207
+ statistic : dict, optional
1208
+ Default is None. Optional parameter to calculate bulk statistics within feature detection.
1209
+ Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
1210
+ time_padding: timedelta, optional
1211
+ If set, allows for segmentation to be associated with a feature input
1212
+ timestep that is time_padding off of the feature. Extremely useful when
1213
+ converting between micro- and nanoseconds, as is common when using Pandas
1214
+ dataframes.
1215
+
1216
+ Returns
1217
+ -------
1218
+ segmentation_out : iris.cube.Cube
1219
+ Mask, 0 outside and integer numbers according to track
1220
+ inside the area/volume of the feature.
1221
+
1222
+ features_out : pandas.DataFrame
1223
+ Feature dataframe including the number of cells (2D or 3D) in
1224
+ the segmented area/volume of the feature at the timestep.
1225
+
1226
+ Raises
1227
+ ------
1228
+ ValueError
1229
+ If field_in.ndim is neither 3 nor 4 and 'time' is not included
1230
+ in coords.
1231
+ """
1232
+ import pandas as pd
1233
+
1234
+ time_var_name: str = "time"
1235
+ seg_out_type: str = "int64"
1236
+
1237
+ logging.info("Start watershedding")
1238
+
1239
+ # check input for right dimensions:
1240
+ if not (field.ndim == 3 or field.ndim == 4):
1241
+ raise ValueError(
1242
+ "input to segmentation step must be 3D or 4D including a time dimension"
1243
+ )
1244
+ try:
1245
+ ndim_time = internal_utils.find_axis_from_coord(field, time_var_name)
1246
+ except ValueError as exc:
1247
+ raise ValueError(
1248
+ "input to segmentation step must include a dimension named '{0}'".format(
1249
+ time_var_name
1250
+ )
1251
+ ) from exc
1252
+
1253
+ # create our output dataarray
1254
+ segmentation_out_data = xr.DataArray(
1255
+ np.zeros(field.shape, dtype=int),
1256
+ coords=field.coords,
1257
+ dims=field.dims,
1258
+ name="segmentation_mask",
1259
+ ).assign_attrs(threshold=threshold)
1260
+
1261
+ features_out_list = []
1262
+
1263
+ if len(field.coords[time_var_name]) == 1:
1264
+ warnings.warn(
1265
+ "As of v1.6.0, segmentation with time length 1 will return time as a coordinate"
1266
+ " instead of dropping it (i.e., output will now be 1xMxN instead of MxN). ",
1267
+ UserWarning,
1268
+ )
1269
+
1270
+ for (
1271
+ time_iteration_number,
1272
+ time_iteration_value,
1273
+ field_at_time,
1274
+ features_i,
1275
+ ) in field_and_features_over_time(
1276
+ field, features, time_var_name=time_var_name, time_padding=time_padding
1277
+ ):
1278
+ segmentation_out_i, features_out_i = segmentation_timestep(
1279
+ field_at_time,
1280
+ features_i,
1281
+ dxy,
1282
+ threshold=threshold,
1283
+ target=target,
1284
+ level=level,
1285
+ method=method,
1286
+ max_distance=max_distance,
1287
+ vertical_coord=vertical_coord,
1288
+ PBC_flag=PBC_flag,
1289
+ seed_3D_flag=seed_3D_flag,
1290
+ seed_3D_size=seed_3D_size,
1291
+ segment_number_unassigned=segment_number_unassigned,
1292
+ segment_number_below_threshold=segment_number_below_threshold,
1293
+ statistic=statistic,
1294
+ )
1295
+ segmentation_out_data.loc[{time_var_name: time_iteration_value}] = (
1296
+ segmentation_out_i
1297
+ )
1298
+ features_out_list.append(features_out_i)
1299
+ logging.debug(f"Finished segmentation for {time_iteration_value.values}")
1300
+
1301
+ # Merge output from individual timesteps:
1302
+ features_out = pd.concat(features_out_list)
1303
+ logging.debug("Finished segmentation")
1304
+ return segmentation_out_data, features_out
1305
+
1306
+
1307
+ def watershedding_3D(track, field_in, **kwargs):
1308
+ """Wrapper for the segmentation()-function."""
1309
+ kwargs.pop("method", None)
1310
+ return segmentation_3D(track, field_in, method="watershed", **kwargs)
1311
+
1312
+
1313
+ def watershedding_2D(track, field_in, **kwargs):
1314
+ """Wrapper for the segmentation()-function."""
1315
+ kwargs.pop("method", None)
1316
+ return segmentation_2D(track, field_in, method="watershed", **kwargs)