tobac 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tobac/__init__.py +112 -0
- tobac/analysis/__init__.py +31 -0
- tobac/analysis/cell_analysis.py +628 -0
- tobac/analysis/feature_analysis.py +212 -0
- tobac/analysis/spatial.py +619 -0
- tobac/centerofgravity.py +226 -0
- tobac/feature_detection.py +1758 -0
- tobac/merge_split.py +324 -0
- tobac/plotting.py +2321 -0
- tobac/segmentation/__init__.py +10 -0
- tobac/segmentation/watershed_segmentation.py +1316 -0
- tobac/testing.py +1179 -0
- tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
- tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
- tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
- tobac/tests/test_analysis_spatial.py +1109 -0
- tobac/tests/test_convert.py +265 -0
- tobac/tests/test_datetime.py +216 -0
- tobac/tests/test_decorators.py +148 -0
- tobac/tests/test_feature_detection.py +1321 -0
- tobac/tests/test_generators.py +273 -0
- tobac/tests/test_import.py +24 -0
- tobac/tests/test_iris_xarray_match_utils.py +244 -0
- tobac/tests/test_merge_split.py +351 -0
- tobac/tests/test_pbc_utils.py +497 -0
- tobac/tests/test_sample_data.py +197 -0
- tobac/tests/test_testing.py +747 -0
- tobac/tests/test_tracking.py +714 -0
- tobac/tests/test_utils.py +650 -0
- tobac/tests/test_utils_bulk_statistics.py +789 -0
- tobac/tests/test_utils_coordinates.py +328 -0
- tobac/tests/test_utils_internal.py +97 -0
- tobac/tests/test_xarray_utils.py +232 -0
- tobac/tracking.py +613 -0
- tobac/utils/__init__.py +27 -0
- tobac/utils/bulk_statistics.py +360 -0
- tobac/utils/datetime.py +184 -0
- tobac/utils/decorators.py +540 -0
- tobac/utils/general.py +753 -0
- tobac/utils/generators.py +87 -0
- tobac/utils/internal/__init__.py +2 -0
- tobac/utils/internal/coordinates.py +430 -0
- tobac/utils/internal/iris_utils.py +462 -0
- tobac/utils/internal/label_props.py +82 -0
- tobac/utils/internal/xarray_utils.py +439 -0
- tobac/utils/mask.py +364 -0
- tobac/utils/periodic_boundaries.py +419 -0
- tobac/wrapper.py +244 -0
- tobac-1.6.2.dist-info/METADATA +154 -0
- tobac-1.6.2.dist-info/RECORD +53 -0
- tobac-1.6.2.dist-info/WHEEL +5 -0
- tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
- tobac-1.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1316 @@
|
|
|
1
|
+
"""Provide segmentation techniques.
|
|
2
|
+
|
|
3
|
+
Segmentation techniques are used to associate areas or volumes to each
|
|
4
|
+
identified feature. The segmentation is implemented using watershedding
|
|
5
|
+
techniques from the field of image processing with a fixed threshold
|
|
6
|
+
value. This value has to be set specifically for every type of input
|
|
7
|
+
data and application. The segmentation can be performed for both
|
|
8
|
+
two-dimensional and three-dimensional data. At each timestep, a marker
|
|
9
|
+
is set at the position (weighted mean center) of each feature identified
|
|
10
|
+
in the detection step in an array otherwise filled with zeros. In case
|
|
11
|
+
of the three-dimentional watershedding, all cells in the column above
|
|
12
|
+
the weighted mean center position of the identified features fulfilling
|
|
13
|
+
the threshold condition are set to the respective marker. The algorithm
|
|
14
|
+
then fills the area (2D) or volume (3D) based on the input field
|
|
15
|
+
starting from these markers until reaching the threshold. If two or more
|
|
16
|
+
features are directly connected, the border runs along the
|
|
17
|
+
watershed line between the two regions. This procedure creates a mask
|
|
18
|
+
that has the same form as the input data, with the corresponding integer
|
|
19
|
+
number at all grid points that belong to a feature, else with zero. This
|
|
20
|
+
mask can be conveniently and efficiently used to select the volume of each
|
|
21
|
+
feature at a specific time step for further analysis or visialization.
|
|
22
|
+
|
|
23
|
+
References
|
|
24
|
+
----------
|
|
25
|
+
.. Heikenfeld, M., Marinescu, P. J., Christensen, M.,
|
|
26
|
+
Watson-Parris, D., Senf, F., van den Heever, S. C.
|
|
27
|
+
& Stier, P. (2019). tobac 1.2: towards a flexible
|
|
28
|
+
framework for tracking and analysis of clouds in
|
|
29
|
+
diverse datasets. Geoscientific Model Development,
|
|
30
|
+
12(11), 4551-4570.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
import copy
|
|
35
|
+
import logging
|
|
36
|
+
import datetime
|
|
37
|
+
import warnings
|
|
38
|
+
|
|
39
|
+
import iris.cube
|
|
40
|
+
import xarray as xr
|
|
41
|
+
from typing_extensions import Literal
|
|
42
|
+
from typing import Union, Callable, Optional
|
|
43
|
+
|
|
44
|
+
import skimage
|
|
45
|
+
import numpy as np
|
|
46
|
+
import pandas as pd
|
|
47
|
+
|
|
48
|
+
from tobac.utils import periodic_boundaries as pbc_utils
|
|
49
|
+
from tobac.utils import internal as internal_utils
|
|
50
|
+
from tobac.utils import get_statistics
|
|
51
|
+
from tobac.utils import decorators
|
|
52
|
+
from tobac.utils.generators import field_and_features_over_time
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def add_markers(
|
|
56
|
+
features: pd.DataFrame,
|
|
57
|
+
marker_arr: np.array,
|
|
58
|
+
seed_3D_flag: Literal["column", "box"],
|
|
59
|
+
seed_3D_size: Union[int, tuple[int]] = 5,
|
|
60
|
+
level: Union[None, slice] = None,
|
|
61
|
+
PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = "none",
|
|
62
|
+
) -> np.array:
|
|
63
|
+
"""Adds markers for watershedding using the `features` dataframe
|
|
64
|
+
to the marker_arr.
|
|
65
|
+
:hidden:
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
features: pandas.DataFrame
|
|
69
|
+
Features for one point in time to add as markers.
|
|
70
|
+
marker_arr: 2D or 3D array-like
|
|
71
|
+
Array to add the markers to. Assumes a (z, y, x) configuration.
|
|
72
|
+
seed_3D_flag: str('column', 'box')
|
|
73
|
+
Seed 3D field at feature positions with either the full column
|
|
74
|
+
or a box of user-set size
|
|
75
|
+
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
|
|
76
|
+
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
|
|
77
|
+
integer (units of number of pixels), the seed box is identical in all dimensions.
|
|
78
|
+
If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
|
|
79
|
+
Note: we strongly recommend the use of odd numbers for this. If you give
|
|
80
|
+
an even number, your seed box will be biased and not centered
|
|
81
|
+
around the feature.
|
|
82
|
+
Note: if two seed boxes overlap, the feature that is seeded will be the
|
|
83
|
+
closer feature.
|
|
84
|
+
level: slice or None
|
|
85
|
+
If `seed_3D_flag` is 'column', the levels at which to seed the
|
|
86
|
+
cells for the watershedding algorithm. If None, seeds all levels.
|
|
87
|
+
PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
|
|
88
|
+
Sets whether to use periodic boundaries, and if so in which directions.
|
|
89
|
+
'none' means that we do not have periodic boundaries
|
|
90
|
+
'hdim_1' means that we are periodic along hdim1
|
|
91
|
+
'hdim_2' means that we are periodic along hdim2
|
|
92
|
+
'both' means that we are periodic along both horizontal dimensions
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
2D or 3D array like (same type as `marker_arr`)
|
|
97
|
+
The marker array
|
|
98
|
+
"""
|
|
99
|
+
if seed_3D_flag not in ["column", "box"]:
|
|
100
|
+
raise ValueError('seed_3D_flag must be either "column" or "box"')
|
|
101
|
+
|
|
102
|
+
# What marker number is the background? Assumed 0.
|
|
103
|
+
bg_marker = 0
|
|
104
|
+
|
|
105
|
+
if level is None:
|
|
106
|
+
level = slice(None)
|
|
107
|
+
|
|
108
|
+
if len(marker_arr.shape) == 3:
|
|
109
|
+
is_3D = True
|
|
110
|
+
z_len = marker_arr.shape[0]
|
|
111
|
+
h1_len = marker_arr.shape[1]
|
|
112
|
+
h2_len = marker_arr.shape[2]
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
is_3D = False
|
|
116
|
+
z_len = 0
|
|
117
|
+
h1_len = marker_arr.shape[0]
|
|
118
|
+
h2_len = marker_arr.shape[1]
|
|
119
|
+
# transpose to 3D array to make things easier.
|
|
120
|
+
marker_arr = marker_arr[np.newaxis, :, :]
|
|
121
|
+
|
|
122
|
+
if seed_3D_flag == "column":
|
|
123
|
+
for _, row in features.iterrows():
|
|
124
|
+
# Offset marker locations by 0.5 to find nearest pixel
|
|
125
|
+
marker_arr[
|
|
126
|
+
level,
|
|
127
|
+
int(row["hdim_1"] + 0.5) % h1_len,
|
|
128
|
+
int(row["hdim_2"] + 0.5) % h2_len,
|
|
129
|
+
] = row["feature"]
|
|
130
|
+
|
|
131
|
+
elif seed_3D_flag == "box":
|
|
132
|
+
# Get the size of the seed box from the input parameter
|
|
133
|
+
try:
|
|
134
|
+
if is_3D:
|
|
135
|
+
seed_z = seed_3D_size[0]
|
|
136
|
+
start_num = 1
|
|
137
|
+
else:
|
|
138
|
+
start_num = 0
|
|
139
|
+
seed_h1 = seed_3D_size[start_num]
|
|
140
|
+
seed_h2 = seed_3D_size[start_num + 1]
|
|
141
|
+
except TypeError:
|
|
142
|
+
# Not iterable, assume int.
|
|
143
|
+
seed_z = seed_3D_size
|
|
144
|
+
seed_h1 = seed_3D_size
|
|
145
|
+
seed_h2 = seed_3D_size
|
|
146
|
+
|
|
147
|
+
for _, row in features.iterrows():
|
|
148
|
+
if is_3D:
|
|
149
|
+
# If we have a 3D input and we need to do box seeding
|
|
150
|
+
# we need to have 3D features.
|
|
151
|
+
try:
|
|
152
|
+
row["vdim"]
|
|
153
|
+
except KeyError:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
"For Box seeding on 3D segmentation,"
|
|
156
|
+
" you must have a 3D input source."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Because we don't support PBCs on the vertical axis,
|
|
160
|
+
# this is simple- just go in the seed_z/2 points around the
|
|
161
|
+
# vdim of the feature, up to the limits of the array.
|
|
162
|
+
if is_3D:
|
|
163
|
+
z_seed_start = int(np.max([0, np.ceil(row["vdim"] - seed_z / 2)]))
|
|
164
|
+
z_seed_end = int(np.min([z_len, np.ceil(row["vdim"] + seed_z / 2)]))
|
|
165
|
+
else:
|
|
166
|
+
z_seed_start = 0
|
|
167
|
+
z_seed_end = 1
|
|
168
|
+
# For the horizontal dimensions, it's more complicated if we have
|
|
169
|
+
# PBCs.
|
|
170
|
+
hdim_1_min = int(np.ceil(row["hdim_1"] - seed_h1 / 2))
|
|
171
|
+
hdim_1_max = int(np.ceil(row["hdim_1"] + seed_h1 / 2))
|
|
172
|
+
hdim_2_min = int(np.ceil(row["hdim_2"] - seed_h2 / 2))
|
|
173
|
+
hdim_2_max = int(np.ceil(row["hdim_2"] + seed_h2 / 2))
|
|
174
|
+
|
|
175
|
+
all_seed_boxes = pbc_utils.get_pbc_coordinates(
|
|
176
|
+
h1_min=0,
|
|
177
|
+
h1_max=h1_len,
|
|
178
|
+
h2_min=0,
|
|
179
|
+
h2_max=h2_len,
|
|
180
|
+
h1_start_coord=hdim_1_min,
|
|
181
|
+
h1_end_coord=hdim_1_max,
|
|
182
|
+
h2_start_coord=hdim_2_min,
|
|
183
|
+
h2_end_coord=hdim_2_max,
|
|
184
|
+
PBC_flag=PBC_flag,
|
|
185
|
+
)
|
|
186
|
+
# Build distance function ahead of time, 3D always true as we then reduce
|
|
187
|
+
dist_func = pbc_utils.build_distance_function(
|
|
188
|
+
0, h1_len, 0, h2_len, PBC_flag, True
|
|
189
|
+
)
|
|
190
|
+
for seed_box in all_seed_boxes:
|
|
191
|
+
# Need to see if there are any other points seeded
|
|
192
|
+
# in this seed box first.
|
|
193
|
+
curr_box_markers = marker_arr[
|
|
194
|
+
z_seed_start:z_seed_end,
|
|
195
|
+
seed_box[0] : seed_box[1],
|
|
196
|
+
seed_box[2] : seed_box[3],
|
|
197
|
+
]
|
|
198
|
+
all_feats_in_box = np.unique(curr_box_markers)
|
|
199
|
+
if np.any(curr_box_markers != bg_marker):
|
|
200
|
+
# If we have non-background points already seeded,
|
|
201
|
+
# we need to find the best way to seed them.
|
|
202
|
+
# Currently seeding with the closest point.
|
|
203
|
+
# Loop through all points in the box
|
|
204
|
+
with np.nditer(curr_box_markers, flags=["multi_index"]) as it:
|
|
205
|
+
for curr_box_pt in it:
|
|
206
|
+
# Get its global index so that we can calculate
|
|
207
|
+
# distance and set the array.
|
|
208
|
+
local_index = it.multi_index
|
|
209
|
+
global_index = (
|
|
210
|
+
local_index[0] + z_seed_start,
|
|
211
|
+
local_index[1] + seed_box[0],
|
|
212
|
+
local_index[2] + seed_box[2],
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# If it's a background marker, we can just set it
|
|
216
|
+
# with the feature we're working on.
|
|
217
|
+
if curr_box_pt == bg_marker:
|
|
218
|
+
marker_arr[global_index] = row["feature"]
|
|
219
|
+
continue
|
|
220
|
+
# it has another feature in it. Calculate the distance
|
|
221
|
+
# from its current set feature and the new feature.
|
|
222
|
+
if is_3D:
|
|
223
|
+
curr_coord = np.array(
|
|
224
|
+
(row["vdim"], row["hdim_1"], row["hdim_2"])
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
curr_coord = np.array((0, row["hdim_1"], row["hdim_2"]))
|
|
228
|
+
|
|
229
|
+
dist_from_curr_pt = dist_func(
|
|
230
|
+
np.array(global_index), curr_coord
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# This is technically an O(N^2) operation, but
|
|
234
|
+
# hopefully performance isn't too bad as this should
|
|
235
|
+
# be rare.
|
|
236
|
+
orig_row = features[
|
|
237
|
+
features["feature"] == curr_box_pt
|
|
238
|
+
].iloc[0]
|
|
239
|
+
if is_3D:
|
|
240
|
+
orig_coord = np.array(
|
|
241
|
+
(
|
|
242
|
+
orig_row["vdim"],
|
|
243
|
+
orig_row["hdim_1"],
|
|
244
|
+
orig_row["hdim_2"],
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
orig_coord = np.array(
|
|
249
|
+
(0, orig_row["hdim_1"], orig_row["hdim_2"])
|
|
250
|
+
)
|
|
251
|
+
dist_from_orig_pt = dist_func(
|
|
252
|
+
np.array(global_index), orig_coord
|
|
253
|
+
)
|
|
254
|
+
# The current point center is further away
|
|
255
|
+
# than the original point center, so do nothing
|
|
256
|
+
if dist_from_curr_pt > dist_from_orig_pt:
|
|
257
|
+
continue
|
|
258
|
+
else:
|
|
259
|
+
# the current point center is closer.
|
|
260
|
+
marker_arr[global_index] = row["feature"]
|
|
261
|
+
# completely unseeded region so far.
|
|
262
|
+
else:
|
|
263
|
+
marker_arr[
|
|
264
|
+
z_seed_start:z_seed_end,
|
|
265
|
+
seed_box[0] : seed_box[1],
|
|
266
|
+
seed_box[2] : seed_box[3],
|
|
267
|
+
] = row["feature"]
|
|
268
|
+
|
|
269
|
+
# If we aren't 3D, transpose back.
|
|
270
|
+
if not is_3D:
|
|
271
|
+
marker_arr = marker_arr[0, :, :]
|
|
272
|
+
|
|
273
|
+
return marker_arr
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def segmentation_3D(
|
|
277
|
+
features,
|
|
278
|
+
field,
|
|
279
|
+
dxy,
|
|
280
|
+
threshold=3e-3,
|
|
281
|
+
target="maximum",
|
|
282
|
+
level=None,
|
|
283
|
+
method="watershed",
|
|
284
|
+
max_distance=None,
|
|
285
|
+
PBC_flag="none",
|
|
286
|
+
seed_3D_flag="column",
|
|
287
|
+
statistic=None,
|
|
288
|
+
):
|
|
289
|
+
"""Wrapper for the segmentation()-function."""
|
|
290
|
+
|
|
291
|
+
return segmentation(
|
|
292
|
+
features,
|
|
293
|
+
field,
|
|
294
|
+
dxy,
|
|
295
|
+
threshold=threshold,
|
|
296
|
+
target=target,
|
|
297
|
+
level=level,
|
|
298
|
+
method=method,
|
|
299
|
+
max_distance=max_distance,
|
|
300
|
+
PBC_flag=PBC_flag,
|
|
301
|
+
seed_3D_flag=seed_3D_flag,
|
|
302
|
+
statistic=statistic,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def segmentation_2D(
|
|
307
|
+
features,
|
|
308
|
+
field,
|
|
309
|
+
dxy,
|
|
310
|
+
threshold=3e-3,
|
|
311
|
+
target="maximum",
|
|
312
|
+
level=None,
|
|
313
|
+
method="watershed",
|
|
314
|
+
max_distance=None,
|
|
315
|
+
PBC_flag="none",
|
|
316
|
+
seed_3D_flag="column",
|
|
317
|
+
statistic=None,
|
|
318
|
+
):
|
|
319
|
+
"""Wrapper for the segmentation()-function."""
|
|
320
|
+
return segmentation(
|
|
321
|
+
features,
|
|
322
|
+
field,
|
|
323
|
+
dxy,
|
|
324
|
+
threshold=threshold,
|
|
325
|
+
target=target,
|
|
326
|
+
level=level,
|
|
327
|
+
method=method,
|
|
328
|
+
max_distance=max_distance,
|
|
329
|
+
PBC_flag=PBC_flag,
|
|
330
|
+
seed_3D_flag=seed_3D_flag,
|
|
331
|
+
statistic=statistic,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@decorators.iris_to_xarray()
|
|
336
|
+
def segmentation_timestep(
|
|
337
|
+
field_in: xr.DataArray,
|
|
338
|
+
features_in: pd.DataFrame,
|
|
339
|
+
dxy: float,
|
|
340
|
+
threshold: float = 3e-3,
|
|
341
|
+
target: Literal["maximum", "minimum"] = "maximum",
|
|
342
|
+
level: Union[None, slice] = None,
|
|
343
|
+
method: Literal["watershed"] = "watershed",
|
|
344
|
+
max_distance: Union[None, float] = None,
|
|
345
|
+
vertical_coord: Union[str, None] = None,
|
|
346
|
+
PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = "none",
|
|
347
|
+
seed_3D_flag: Literal["column", "box"] = "column",
|
|
348
|
+
seed_3D_size: Union[int, tuple[int]] = 5,
|
|
349
|
+
segment_number_below_threshold: int = 0,
|
|
350
|
+
segment_number_unassigned: int = 0,
|
|
351
|
+
statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
|
|
352
|
+
) -> tuple[iris.cube.Cube, pd.DataFrame]:
|
|
353
|
+
"""Perform watershedding for an individual time step of the data. Works
|
|
354
|
+
for both 2D and 3D data
|
|
355
|
+
:hidden:
|
|
356
|
+
Parameters
|
|
357
|
+
----------
|
|
358
|
+
field_in : xr.DataArray
|
|
359
|
+
Input field to perform the watershedding on (2D or 3D for one
|
|
360
|
+
specific point in time).
|
|
361
|
+
|
|
362
|
+
features_in : pandas.DataFrame
|
|
363
|
+
Features for one specific point in time.
|
|
364
|
+
|
|
365
|
+
dxy : float
|
|
366
|
+
Grid spacing of the input data in metres
|
|
367
|
+
|
|
368
|
+
threshold : float, optional
|
|
369
|
+
Threshold for the watershedding field to be used for the mask. The watershedding is exclusive of the threshold value, i.e. values greater (less) than the threshold are included in the target region, while values equal to the threshold value are excluded.
|
|
370
|
+
Default is 3e-3.
|
|
371
|
+
|
|
372
|
+
target : {'maximum', 'minimum'}, optional
|
|
373
|
+
Flag to determine if tracking is targeting minima or maxima in
|
|
374
|
+
the data to determine from which direction to approach the threshold
|
|
375
|
+
value. Default is 'maximum'.
|
|
376
|
+
|
|
377
|
+
level : slice of iris.cube.Cube, optional
|
|
378
|
+
Levels at which to seed the cells for the watershedding
|
|
379
|
+
algorithm. Default is None.
|
|
380
|
+
|
|
381
|
+
method : {'watershed'}, optional
|
|
382
|
+
Flag determining the algorithm to use (currently watershedding
|
|
383
|
+
implemented).
|
|
384
|
+
|
|
385
|
+
max_distance : float, optional
|
|
386
|
+
Maximum distance from a marker allowed to be classified as
|
|
387
|
+
belonging to that cell in meters. Default is None.
|
|
388
|
+
|
|
389
|
+
vertical_coord : str, optional
|
|
390
|
+
Vertical coordinate in 3D input data. If None, input is checked for
|
|
391
|
+
one of {'z', 'model_level_number', 'altitude','geopotential_height'}
|
|
392
|
+
as a likely coordinate name
|
|
393
|
+
|
|
394
|
+
PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
|
|
395
|
+
Sets whether to use periodic boundaries, and if so in which directions.
|
|
396
|
+
'none' means that we do not have periodic boundaries
|
|
397
|
+
'hdim_1' means that we are periodic along hdim1
|
|
398
|
+
'hdim_2' means that we are periodic along hdim2
|
|
399
|
+
'both' means that we are periodic along both horizontal dimensions
|
|
400
|
+
seed_3D_flag: str('column', 'box')
|
|
401
|
+
Seed 3D field at feature positions with either the full column (default)
|
|
402
|
+
or a box of user-set size
|
|
403
|
+
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
|
|
404
|
+
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
|
|
405
|
+
integer (units of number of pixels), the seed box is identical in all dimensions.
|
|
406
|
+
If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
|
|
407
|
+
Note: we strongly recommend the use of odd numbers for this. If you give
|
|
408
|
+
an even number, your seed box will be biased and not centered
|
|
409
|
+
around the feature.
|
|
410
|
+
Note: if two seed boxes overlap, the feature that is seeded will be the
|
|
411
|
+
closer feature.
|
|
412
|
+
segment_number_below_threshold: int
|
|
413
|
+
the marker to use to indicate a segmentation point is below the threshold.
|
|
414
|
+
segment_number_unassigned: int
|
|
415
|
+
the marker to use to indicate a segmentation point is above the threshold but unsegmented.
|
|
416
|
+
This can be the same as `segment_number_below_threshold`, but can also be set separately.
|
|
417
|
+
statistics: boolean, optional
|
|
418
|
+
Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
|
|
419
|
+
|
|
420
|
+
Returns
|
|
421
|
+
-------
|
|
422
|
+
segmentation_out : xarray.DataArray
|
|
423
|
+
Mask, 0 outside and integer numbers according to track
|
|
424
|
+
inside the objects.
|
|
425
|
+
|
|
426
|
+
features_out : pandas.DataFrame
|
|
427
|
+
Feature dataframe including the number of cells (2D or 3D) in
|
|
428
|
+
the segmented area/volume of the feature at the timestep.
|
|
429
|
+
|
|
430
|
+
Raises
|
|
431
|
+
------
|
|
432
|
+
ValueError
|
|
433
|
+
If target is neither 'maximum' nor 'minimum'.
|
|
434
|
+
|
|
435
|
+
If vertical_coord is not in {'auto', 'z', 'model_level_number',
|
|
436
|
+
'altitude', geopotential_height'}.
|
|
437
|
+
|
|
438
|
+
If there is more than one coordinate name.
|
|
439
|
+
|
|
440
|
+
If the spatial dimension is neither 2 nor 3.
|
|
441
|
+
|
|
442
|
+
If method is not 'watershed'.
|
|
443
|
+
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
# The location of watershed within skimage submodules changes with v0.19, but I've kept both for backward compatibility for now
|
|
447
|
+
try:
|
|
448
|
+
from skimage.segmentation import watershed
|
|
449
|
+
except ImportError:
|
|
450
|
+
from skimage.morphology import watershed
|
|
451
|
+
# from skimage.segmentation import random_walker
|
|
452
|
+
from scipy.ndimage import distance_transform_edt
|
|
453
|
+
from copy import deepcopy
|
|
454
|
+
|
|
455
|
+
if max_distance is not None and PBC_flag in ["hdim_1", "hdim_2", "both"]:
|
|
456
|
+
raise NotImplementedError("max_distance not yet implemented for PBCs")
|
|
457
|
+
|
|
458
|
+
# How many dimensions are we using?
|
|
459
|
+
if field_in.ndim == 2:
|
|
460
|
+
hdim_1_axis = 0
|
|
461
|
+
hdim_2_axis = 1
|
|
462
|
+
vertical_coord_axis = None
|
|
463
|
+
elif field_in.ndim == 3:
|
|
464
|
+
vertical_axis = internal_utils.find_vertical_coord_name(
|
|
465
|
+
field_in, vertical_coord=vertical_coord
|
|
466
|
+
)
|
|
467
|
+
vertical_coord_axis = internal_utils.find_axis_from_coord(
|
|
468
|
+
field_in, vertical_axis
|
|
469
|
+
)
|
|
470
|
+
# Once we know the vertical coordinate, we can resolve the
|
|
471
|
+
# horizontal coordinates
|
|
472
|
+
# To make things easier, we will transpose the axes
|
|
473
|
+
# so that they are consistent.
|
|
474
|
+
|
|
475
|
+
hdim_1_axis, hdim_2_axis = internal_utils.find_hdim_axes_3D(
|
|
476
|
+
field_in, vertical_axis=vertical_coord_axis
|
|
477
|
+
)
|
|
478
|
+
else:
|
|
479
|
+
raise ValueError(
|
|
480
|
+
"Segmentation routine only possible with 2 or 3 spatial dimensions"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
if segment_number_below_threshold > 0 or segment_number_unassigned > 0:
|
|
484
|
+
raise ValueError("Below/above threshold markers must be <=0")
|
|
485
|
+
|
|
486
|
+
# copy feature dataframe for output
|
|
487
|
+
features_out = deepcopy(features_in)
|
|
488
|
+
# Create cube of the same dimensions and coordinates as input data to store mask:
|
|
489
|
+
segmentation_out = xr.zeros_like(field_in, dtype=int)
|
|
490
|
+
segmentation_out = segmentation_out.rename("segmentation_mask")
|
|
491
|
+
|
|
492
|
+
# Get raw array from input data:
|
|
493
|
+
data = field_in.values
|
|
494
|
+
is_3D_seg = len(data.shape) == 3
|
|
495
|
+
# To make things easier, we will transpose the axes
|
|
496
|
+
# so that they are consistent: z, hdim_1, hdim_2
|
|
497
|
+
# We only need to do this for 3D.
|
|
498
|
+
transposed_data = False
|
|
499
|
+
if is_3D_seg:
|
|
500
|
+
if vertical_coord_axis == 1:
|
|
501
|
+
data = np.transpose(data, axes=(1, 0, 2))
|
|
502
|
+
transposed_data = True
|
|
503
|
+
elif vertical_coord_axis == 2:
|
|
504
|
+
data = np.transpose(data, axes=(2, 0, 1))
|
|
505
|
+
transposed_data = True
|
|
506
|
+
|
|
507
|
+
# Set level at which to create "Seed" for each feature in the case of 3D watershedding:
|
|
508
|
+
# If none, use all levels (later reduced to the ones fulfilling the theshold conditions)
|
|
509
|
+
if level is None:
|
|
510
|
+
level = slice(None)
|
|
511
|
+
|
|
512
|
+
# transform max_distance in metres to distance in pixels:
|
|
513
|
+
if max_distance is not None:
|
|
514
|
+
max_distance_pixel = np.ceil(max_distance / dxy)
|
|
515
|
+
|
|
516
|
+
# mask data outside region above/below threshold and invert data if tracking maxima:
|
|
517
|
+
if target == "maximum":
|
|
518
|
+
unmasked = data > threshold
|
|
519
|
+
data_segmentation = -1 * data
|
|
520
|
+
elif target == "minimum":
|
|
521
|
+
unmasked = data < threshold
|
|
522
|
+
data_segmentation = data
|
|
523
|
+
else:
|
|
524
|
+
raise ValueError("unknown type of target")
|
|
525
|
+
|
|
526
|
+
# set markers at the positions of the features:
|
|
527
|
+
markers = np.zeros(unmasked.shape).astype(np.int32)
|
|
528
|
+
markers = add_markers(
|
|
529
|
+
features_in, markers, seed_3D_flag, seed_3D_size, level, PBC_flag
|
|
530
|
+
)
|
|
531
|
+
# set markers in cells not fulfilling threshold condition to zero:
|
|
532
|
+
markers[~unmasked] = 0
|
|
533
|
+
# marker_vals = np.unique(markers)
|
|
534
|
+
|
|
535
|
+
# Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm
|
|
536
|
+
data_segmentation = np.array(data_segmentation)
|
|
537
|
+
unmasked = np.array(unmasked)
|
|
538
|
+
|
|
539
|
+
# perform segmentation:
|
|
540
|
+
if method == "watershed":
|
|
541
|
+
segmentation_mask = watershed(
|
|
542
|
+
np.array(data_segmentation), markers.astype(np.int32), mask=unmasked
|
|
543
|
+
)
|
|
544
|
+
else:
|
|
545
|
+
raise ValueError("unknown method, must be watershed")
|
|
546
|
+
|
|
547
|
+
# remove everything from the individual masks that is more than max_distance_pixel away from the markers
|
|
548
|
+
if max_distance is not None:
|
|
549
|
+
D = distance_transform_edt((markers == 0))
|
|
550
|
+
segmentation_mask[
|
|
551
|
+
np.bitwise_and(segmentation_mask > 0, D > max_distance_pixel)
|
|
552
|
+
] = 0
|
|
553
|
+
|
|
554
|
+
# mask all segmentation_mask points below threshold as -1
|
|
555
|
+
# to differentiate from those unmasked points NOT filled by watershedding
|
|
556
|
+
# TODO: allow user to specify
|
|
557
|
+
points_below_threshold_val = -1
|
|
558
|
+
segmentation_mask[~unmasked] = points_below_threshold_val
|
|
559
|
+
|
|
560
|
+
hdim1_min = 0
|
|
561
|
+
hdim1_max = segmentation_mask.shape[hdim_1_axis] - 1
|
|
562
|
+
hdim2_min = 0
|
|
563
|
+
hdim2_max = segmentation_mask.shape[hdim_2_axis] - 1
|
|
564
|
+
|
|
565
|
+
# all options that involve dealing with periodic boundaries
|
|
566
|
+
pbc_options = ["hdim_1", "hdim_2", "both"]
|
|
567
|
+
# Only run this if we need to deal with PBCs
|
|
568
|
+
if PBC_flag in pbc_options:
|
|
569
|
+
if not is_3D_seg:
|
|
570
|
+
# let's transpose segmentation_mask to a 1,y,x array to make calculations etc easier.
|
|
571
|
+
segmentation_mask = segmentation_mask[np.newaxis, :, :]
|
|
572
|
+
unmasked = unmasked[np.newaxis, :, :]
|
|
573
|
+
data_segmentation = data_segmentation[np.newaxis, :, :]
|
|
574
|
+
vertical_coord_axis = 0
|
|
575
|
+
hdim_1_axis = 1
|
|
576
|
+
hdim_2_axis = 2
|
|
577
|
+
|
|
578
|
+
seg_mask_unseeded = np.zeros(segmentation_mask.shape)
|
|
579
|
+
|
|
580
|
+
# Return all indices where segmentation field == 0
|
|
581
|
+
# meaning unfilled but above threshold
|
|
582
|
+
# TODO: is there a way to do this without np.where?
|
|
583
|
+
vdim_unf, hdim1_unf, hdim2_unf = np.where(segmentation_mask == 0)
|
|
584
|
+
seg_mask_unseeded[vdim_unf, hdim1_unf, hdim2_unf] = 1
|
|
585
|
+
|
|
586
|
+
# create labeled field of unfilled, unseeded features
|
|
587
|
+
labels_unseeded, label_num = skimage.measure.label(
|
|
588
|
+
seg_mask_unseeded, return_num=True
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
markers_2 = np.zeros(data_segmentation.shape, dtype=np.int32)
|
|
592
|
+
|
|
593
|
+
# PBC marker seeding approach
|
|
594
|
+
# loop thru LB points, then check if fillable region (labels_unseeded > 0) and seed
|
|
595
|
+
# then check if point on other side of boundary is > 0 in segmentation_mask and
|
|
596
|
+
# adjust where needed
|
|
597
|
+
"""
|
|
598
|
+
"First pass" at seeding features across the boundaries. This first pass will bring in
|
|
599
|
+
eligible (meaning values that are higher than threshold) but not previously watershedded
|
|
600
|
+
points across the boundary by seeding them with the appropriate feature across the boundary.
|
|
601
|
+
|
|
602
|
+
Later, we will run the second pass or "buddy box" approach that handles cases where points across the boundary
|
|
603
|
+
have been watershedded already.
|
|
604
|
+
"""
|
|
605
|
+
if PBC_flag == "hdim_1" or PBC_flag == "both":
|
|
606
|
+
check_add_unseeded_across_bdrys(
|
|
607
|
+
"hdim_1",
|
|
608
|
+
segmentation_mask,
|
|
609
|
+
labels_unseeded,
|
|
610
|
+
hdim1_min,
|
|
611
|
+
hdim1_max,
|
|
612
|
+
markers_2,
|
|
613
|
+
)
|
|
614
|
+
if PBC_flag == "hdim_2" or PBC_flag == "both":
|
|
615
|
+
check_add_unseeded_across_bdrys(
|
|
616
|
+
"hdim_2",
|
|
617
|
+
segmentation_mask,
|
|
618
|
+
labels_unseeded,
|
|
619
|
+
hdim2_min,
|
|
620
|
+
hdim2_max,
|
|
621
|
+
markers_2,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
# Deal with the opposite corner only
|
|
625
|
+
if PBC_flag == "both":
|
|
626
|
+
# TODO: This seems quite slow, is there scope for further speedup?
|
|
627
|
+
for vdim_ind in range(0, segmentation_mask.shape[0]):
|
|
628
|
+
for hdim1_ind in [hdim1_min, hdim1_max]:
|
|
629
|
+
for hdim2_ind in [hdim2_min, hdim2_max]:
|
|
630
|
+
# If this point is unseeded and unlabeled
|
|
631
|
+
if labels_unseeded[vdim_ind, hdim1_ind, hdim2_ind] == 0:
|
|
632
|
+
continue
|
|
633
|
+
|
|
634
|
+
# Find the opposite point in hdim1 space
|
|
635
|
+
hdim1_opposite_corner = (
|
|
636
|
+
hdim1_min if hdim1_ind == hdim1_max else hdim1_max
|
|
637
|
+
)
|
|
638
|
+
hdim2_opposite_corner = (
|
|
639
|
+
hdim2_min if hdim2_ind == hdim2_max else hdim2_max
|
|
640
|
+
)
|
|
641
|
+
if (
|
|
642
|
+
segmentation_mask[
|
|
643
|
+
vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner
|
|
644
|
+
]
|
|
645
|
+
<= 0
|
|
646
|
+
):
|
|
647
|
+
continue
|
|
648
|
+
|
|
649
|
+
markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[
|
|
650
|
+
vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner
|
|
651
|
+
]
|
|
652
|
+
|
|
653
|
+
markers_2[~unmasked] = 0
|
|
654
|
+
|
|
655
|
+
if method == "watershed":
|
|
656
|
+
segmentation_mask_2 = watershed(
|
|
657
|
+
data_segmentation, markers_2.astype(np.int32), mask=unmasked
|
|
658
|
+
)
|
|
659
|
+
else:
|
|
660
|
+
raise ValueError("unknown method, must be watershed")
|
|
661
|
+
|
|
662
|
+
# Sum up original mask and secondary PBC-mask for full PBC segmentation
|
|
663
|
+
segmentation_mask_3 = segmentation_mask + segmentation_mask_2
|
|
664
|
+
|
|
665
|
+
# Secondary seeding complete, now blending periodic boundaries
|
|
666
|
+
# keep segmentation mask fields for now so we can save these all later
|
|
667
|
+
# for demos of changes, otherwise, could add deletion for memory efficiency, e.g.
|
|
668
|
+
|
|
669
|
+
# del segmentation_mask
|
|
670
|
+
# del segmentation_mask_2
|
|
671
|
+
# gc.collect()
|
|
672
|
+
|
|
673
|
+
# update mask coord regions
|
|
674
|
+
|
|
675
|
+
"""
|
|
676
|
+
Now, start the second round of watershedding- the "buddy box" approach.
|
|
677
|
+
'buddies' array contains features of interest and any neighbors that are across the boundary or
|
|
678
|
+
otherwise have lateral and/or diagonal physical contact with that label.
|
|
679
|
+
The "buddy box" is also used for multiple crossings of the boundaries with segmented features.
|
|
680
|
+
"""
|
|
681
|
+
|
|
682
|
+
# TODO: this is a very inelegant way of handling this problem. We should wrap up the pure
|
|
683
|
+
# segmentation routines and simply call them again here with the same parameters.
|
|
684
|
+
reg_props_dict = internal_utils.get_label_props_in_dict(segmentation_mask_3)
|
|
685
|
+
|
|
686
|
+
if len(reg_props_dict) != 0:
|
|
687
|
+
(
|
|
688
|
+
curr_reg_inds,
|
|
689
|
+
z_reg_inds,
|
|
690
|
+
y_reg_inds,
|
|
691
|
+
x_reg_inds,
|
|
692
|
+
) = internal_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict)
|
|
693
|
+
|
|
694
|
+
wall_labels = np.array([])
|
|
695
|
+
|
|
696
|
+
w_wall = np.unique(segmentation_mask_3[:, :, 0])
|
|
697
|
+
wall_labels = np.append(wall_labels, w_wall)
|
|
698
|
+
|
|
699
|
+
s_wall = np.unique(segmentation_mask_3[:, 0, :])
|
|
700
|
+
wall_labels = np.append(wall_labels, s_wall)
|
|
701
|
+
|
|
702
|
+
wall_labels = np.unique(wall_labels)
|
|
703
|
+
wall_labels = wall_labels[(wall_labels) > 0].astype(int)
|
|
704
|
+
|
|
705
|
+
# Loop through all segmentation mask labels on the wall
|
|
706
|
+
for cur_idx in wall_labels:
|
|
707
|
+
vdim_indices = z_reg_inds[cur_idx]
|
|
708
|
+
hdim1_indices = y_reg_inds[cur_idx]
|
|
709
|
+
hdim2_indices = x_reg_inds[cur_idx]
|
|
710
|
+
|
|
711
|
+
# start buddies array with feature of interest
|
|
712
|
+
buddies = np.array([cur_idx], dtype=int)
|
|
713
|
+
# Loop through all points in the segmentation mask that we're intertested in
|
|
714
|
+
for label_z, label_y, label_x in zip(
|
|
715
|
+
vdim_indices, hdim1_indices, hdim2_indices
|
|
716
|
+
):
|
|
717
|
+
# check if this is the special case of being a corner point.
|
|
718
|
+
# if it's doubly periodic AND on both x and y boundaries, it's a corner point
|
|
719
|
+
# and we have to look at the other corner.
|
|
720
|
+
# here, we will only look at the corner point and let the below deal with x/y only.
|
|
721
|
+
if PBC_flag == "both" and (
|
|
722
|
+
np.any(label_y == [hdim1_min, hdim1_max])
|
|
723
|
+
and np.any(label_x == [hdim2_min, hdim2_max])
|
|
724
|
+
):
|
|
725
|
+
# adjust x and y points to the other side
|
|
726
|
+
y_val_alt = pbc_utils.adjust_pbc_point(
|
|
727
|
+
label_y, hdim1_min, hdim1_max
|
|
728
|
+
)
|
|
729
|
+
x_val_alt = pbc_utils.adjust_pbc_point(
|
|
730
|
+
label_x, hdim2_min, hdim2_max
|
|
731
|
+
)
|
|
732
|
+
label_on_corner = segmentation_mask_3[label_z, y_val_alt, x_val_alt]
|
|
733
|
+
|
|
734
|
+
if label_on_corner >= 0:
|
|
735
|
+
# add opposite-corner buddy if it exists
|
|
736
|
+
buddies = np.append(buddies, label_on_corner)
|
|
737
|
+
|
|
738
|
+
# on the hdim1 boundary and periodic on hdim1
|
|
739
|
+
if (PBC_flag == "hdim_1" or PBC_flag == "both") and np.any(
|
|
740
|
+
label_y == [hdim1_min, hdim1_max]
|
|
741
|
+
):
|
|
742
|
+
y_val_alt = pbc_utils.adjust_pbc_point(
|
|
743
|
+
label_y, hdim1_min, hdim1_max
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
# get the label value on the opposite side
|
|
747
|
+
label_alt = segmentation_mask_3[label_z, y_val_alt, label_x]
|
|
748
|
+
|
|
749
|
+
# if it's labeled and not already been dealt with
|
|
750
|
+
if label_alt >= 0:
|
|
751
|
+
# add above/below buddy if it exists
|
|
752
|
+
buddies = np.append(buddies, label_alt)
|
|
753
|
+
|
|
754
|
+
if (PBC_flag == "hdim_2" or PBC_flag == "both") and np.any(
|
|
755
|
+
label_x == [hdim2_min, hdim2_max]
|
|
756
|
+
):
|
|
757
|
+
x_val_alt = pbc_utils.adjust_pbc_point(
|
|
758
|
+
label_x, hdim2_min, hdim2_max
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
# get the seg value on the opposite side
|
|
762
|
+
label_alt = segmentation_mask_3[label_z, label_y, x_val_alt]
|
|
763
|
+
|
|
764
|
+
# if it's labeled and not already been dealt with
|
|
765
|
+
if label_alt >= 0:
|
|
766
|
+
# add left/right buddy if it exists
|
|
767
|
+
buddies = np.append(buddies, label_alt)
|
|
768
|
+
|
|
769
|
+
buddies = np.unique(buddies)
|
|
770
|
+
|
|
771
|
+
if np.all(buddies == cur_idx):
|
|
772
|
+
continue
|
|
773
|
+
else:
|
|
774
|
+
inter_buddies, feat_inds, buddy_inds = np.intersect1d(
|
|
775
|
+
features_in.feature.values[:], buddies, return_indices=True
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
# Get features that are needed for the buddy box
|
|
779
|
+
buddy_features = deepcopy(features_in.iloc[feat_inds])
|
|
780
|
+
|
|
781
|
+
# create arrays to contain points of all buddies
|
|
782
|
+
# and their transpositions/transformations
|
|
783
|
+
# for use in Buddy Box space
|
|
784
|
+
|
|
785
|
+
# z,y,x points in the grid domain with no transformations
|
|
786
|
+
# NOTE: when I think about it, not sure if these are really needed
|
|
787
|
+
# as we use the y_a1/x_a1 points for the data transposition
|
|
788
|
+
# to the buddy box rather than these and their z2/y2/x2 counterparts
|
|
789
|
+
buddy_z = np.array([], dtype=int)
|
|
790
|
+
buddy_y = np.array([], dtype=int)
|
|
791
|
+
buddy_x = np.array([], dtype=int)
|
|
792
|
+
|
|
793
|
+
# z,y,x points from the grid domain WHICH MAY OR MAY NOT BE TRANSFORMED
|
|
794
|
+
# so as to be continuous/contiguous across a grid boundary for that dimension
|
|
795
|
+
# (e.g., instead of [1496,1497,0,1,2,3] it would be [1496,1497,1498,1499,1500,1501])
|
|
796
|
+
buddy_z2 = np.array([], dtype=int)
|
|
797
|
+
buddy_y2 = np.array([], dtype=int)
|
|
798
|
+
buddy_x2 = np.array([], dtype=int)
|
|
799
|
+
|
|
800
|
+
# These are just for feature positions and are in z2/y2/x2 space
|
|
801
|
+
# (may or may not be within real grid domain)
|
|
802
|
+
# so that when the buddy box is constructed, seeding is done properly
|
|
803
|
+
# in the buddy box space
|
|
804
|
+
|
|
805
|
+
# NOTE: We may not need this, as we already do this editing the buddy_features df
|
|
806
|
+
# and an iterrows call through this is what's used to actually seed the buddy box
|
|
807
|
+
|
|
808
|
+
buddy_looper = 0
|
|
809
|
+
|
|
810
|
+
# loop thru buddies
|
|
811
|
+
for buddy in buddies:
|
|
812
|
+
if buddy == 0:
|
|
813
|
+
continue
|
|
814
|
+
# isolate feature from set of buddies
|
|
815
|
+
buddy_feat = features_in[features_in["feature"] == buddy].iloc[0]
|
|
816
|
+
|
|
817
|
+
# transform buddy feature position if needed for positioning in z2/y2/x2 space
|
|
818
|
+
# MAY be redundant with what is done just below here
|
|
819
|
+
yf2 = pbc_utils.transfm_pbc_point(
|
|
820
|
+
int(buddy_feat.hdim_1), hdim1_min, hdim1_max
|
|
821
|
+
)
|
|
822
|
+
xf2 = pbc_utils.transfm_pbc_point(
|
|
823
|
+
int(buddy_feat.hdim_2), hdim2_min, hdim2_max
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
# edit value in buddy_features dataframe
|
|
827
|
+
buddy_features.hdim_1.values[buddy_looper] = (
|
|
828
|
+
pbc_utils.transfm_pbc_point(
|
|
829
|
+
float(buddy_feat.hdim_1), hdim1_min, hdim1_max
|
|
830
|
+
)
|
|
831
|
+
)
|
|
832
|
+
buddy_features.hdim_2.values[buddy_looper] = (
|
|
833
|
+
pbc_utils.transfm_pbc_point(
|
|
834
|
+
float(buddy_feat.hdim_2), hdim2_min, hdim2_max
|
|
835
|
+
)
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
buddy_looper = buddy_looper + 1
|
|
839
|
+
# Create 1:1 map through actual domain points and continuous/contiguous points
|
|
840
|
+
# used to identify buddy box dimension lengths for its construction
|
|
841
|
+
for z, y, x in zip(
|
|
842
|
+
z_reg_inds[buddy], y_reg_inds[buddy], x_reg_inds[buddy]
|
|
843
|
+
):
|
|
844
|
+
buddy_z = np.append(buddy_z, z)
|
|
845
|
+
buddy_y = np.append(buddy_y, y)
|
|
846
|
+
buddy_x = np.append(buddy_x, x)
|
|
847
|
+
|
|
848
|
+
y2 = pbc_utils.transfm_pbc_point(y, hdim1_min, hdim1_max)
|
|
849
|
+
x2 = pbc_utils.transfm_pbc_point(x, hdim2_min, hdim2_max)
|
|
850
|
+
|
|
851
|
+
buddy_z2 = np.append(buddy_z2, z)
|
|
852
|
+
buddy_y2 = np.append(buddy_y2, y2)
|
|
853
|
+
buddy_x2 = np.append(buddy_x2, x2)
|
|
854
|
+
|
|
855
|
+
# Buddy Box!
|
|
856
|
+
# Identify mins and maxes of Buddy Box continuous points range
|
|
857
|
+
# so that box of correct size can be constructed
|
|
858
|
+
bbox_zstart = int(np.min(buddy_z2))
|
|
859
|
+
bbox_ystart = int(np.min(buddy_y2))
|
|
860
|
+
bbox_xstart = int(np.min(buddy_x2))
|
|
861
|
+
bbox_zend = int(np.max(buddy_z2) + 1)
|
|
862
|
+
bbox_yend = int(np.max(buddy_y2) + 1)
|
|
863
|
+
bbox_xend = int(np.max(buddy_x2) + 1)
|
|
864
|
+
|
|
865
|
+
bbox_zsize = bbox_zend - bbox_zstart
|
|
866
|
+
bbox_ysize = bbox_yend - bbox_ystart
|
|
867
|
+
bbox_xsize = bbox_xend - bbox_xstart
|
|
868
|
+
|
|
869
|
+
# Creation of actual Buddy Box space for transposition
|
|
870
|
+
# of data in domain and re-seeding with Buddy feature markers
|
|
871
|
+
buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize))
|
|
872
|
+
|
|
873
|
+
# need to loop thru ALL z,y,x inds in buddy box
|
|
874
|
+
# not just the ones that have nonzero seg mask values
|
|
875
|
+
|
|
876
|
+
# "_a1" points are re-transformations from the continuous buddy box points
|
|
877
|
+
# back to original grid/domain space to ensure that the correct data are
|
|
878
|
+
# copied to the proper Buddy Box locations
|
|
879
|
+
for z in range(bbox_zstart, bbox_zend):
|
|
880
|
+
for y in range(bbox_ystart, bbox_yend):
|
|
881
|
+
for x in range(bbox_xstart, bbox_xend):
|
|
882
|
+
z_a1 = z
|
|
883
|
+
if y > hdim1_max:
|
|
884
|
+
y_a1 = y - (hdim1_max + 1)
|
|
885
|
+
else:
|
|
886
|
+
y_a1 = y
|
|
887
|
+
|
|
888
|
+
if x > hdim2_max:
|
|
889
|
+
x_a1 = x - (hdim2_max + 1)
|
|
890
|
+
else:
|
|
891
|
+
x_a1 = x
|
|
892
|
+
if is_3D_seg:
|
|
893
|
+
buddy_rgn[
|
|
894
|
+
z - bbox_zstart, y - bbox_ystart, x - bbox_xstart
|
|
895
|
+
] = field_in.data[z_a1, y_a1, x_a1]
|
|
896
|
+
else:
|
|
897
|
+
buddy_rgn[
|
|
898
|
+
z - bbox_zstart, y - bbox_ystart, x - bbox_xstart
|
|
899
|
+
] = field_in.data[y_a1, x_a1]
|
|
900
|
+
|
|
901
|
+
# Update buddy_features feature positions to correspond to buddy box space
|
|
902
|
+
# rather than domain space or continuous/contiguous point space
|
|
903
|
+
if "vdim" not in buddy_features:
|
|
904
|
+
buddy_features["vdim"] = np.zeros(len(buddy_features), dtype=int)
|
|
905
|
+
for buddy_looper in range(0, len(buddy_features)):
|
|
906
|
+
buddy_features.vdim.values[buddy_looper] = (
|
|
907
|
+
buddy_features.vdim.values[buddy_looper] - bbox_zstart
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
buddy_features.hdim_1.values[buddy_looper] = (
|
|
911
|
+
buddy_features.hdim_1.values[buddy_looper] - bbox_ystart
|
|
912
|
+
)
|
|
913
|
+
buddy_features.hdim_2.values[buddy_looper] = (
|
|
914
|
+
buddy_features.hdim_2.values[buddy_looper] - bbox_xstart
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
# Create dask array from input data:
|
|
918
|
+
buddy_data = buddy_rgn
|
|
919
|
+
|
|
920
|
+
# All of the below is the same overarching segmentation procedure as in the original
|
|
921
|
+
# segmentation approach until the line which states
|
|
922
|
+
# "#transform segmentation_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")"
|
|
923
|
+
# It's just performed on the buddy box and its data rather than our full domain
|
|
924
|
+
|
|
925
|
+
# mask data outside region above/below threshold and invert data if tracking maxima:
|
|
926
|
+
if target == "maximum":
|
|
927
|
+
unmasked_buddies = buddy_data > threshold
|
|
928
|
+
buddy_segmentation = -1 * buddy_data
|
|
929
|
+
elif target == "minimum":
|
|
930
|
+
unmasked_buddies = buddy_data < threshold
|
|
931
|
+
buddy_segmentation = buddy_data
|
|
932
|
+
else:
|
|
933
|
+
raise ValueError("unknown type of target")
|
|
934
|
+
|
|
935
|
+
# set markers at the positions of the features:
|
|
936
|
+
buddy_markers = np.zeros(unmasked_buddies.shape).astype(np.int32)
|
|
937
|
+
# Buddy boxes are always without PBCs
|
|
938
|
+
buddy_markers = add_markers(
|
|
939
|
+
buddy_features,
|
|
940
|
+
buddy_markers,
|
|
941
|
+
seed_3D_flag,
|
|
942
|
+
seed_3D_size,
|
|
943
|
+
level,
|
|
944
|
+
PBC_flag="none",
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
# set markers in cells not fulfilling threshold condition to zero:
|
|
948
|
+
buddy_markers[~unmasked_buddies] = 0
|
|
949
|
+
|
|
950
|
+
marker_vals = np.unique(buddy_markers)
|
|
951
|
+
|
|
952
|
+
# Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm
|
|
953
|
+
buddy_segmentation = np.array(buddy_segmentation)
|
|
954
|
+
unmasked_buddies = np.array(unmasked_buddies)
|
|
955
|
+
|
|
956
|
+
# perform segmentation:
|
|
957
|
+
if method == "watershed":
|
|
958
|
+
segmentation_mask_4 = watershed(
|
|
959
|
+
np.array(buddy_segmentation),
|
|
960
|
+
buddy_markers.astype(np.int32),
|
|
961
|
+
mask=unmasked_buddies,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
else:
|
|
965
|
+
raise ValueError("unknown method, must be watershed")
|
|
966
|
+
|
|
967
|
+
# remove everything from the individual masks that is more than max_distance_pixel away from the markers
|
|
968
|
+
|
|
969
|
+
# mask all segmentation_mask points below threshold as -1
|
|
970
|
+
# to differentiate from those unmasked points NOT filled by watershedding
|
|
971
|
+
segmentation_mask_4[~unmasked_buddies] = -1
|
|
972
|
+
|
|
973
|
+
# transform segmentation_mask_4 data back to mask created after PBC first-pass ("segmentation_mask_3")
|
|
974
|
+
|
|
975
|
+
# loop through buddy box inds and analogous seg mask inds
|
|
976
|
+
for z_val in range(bbox_zstart, bbox_zend):
|
|
977
|
+
z_seg = z_val - bbox_zstart
|
|
978
|
+
z_val_o = z_val
|
|
979
|
+
for y_val in range(bbox_ystart, bbox_yend):
|
|
980
|
+
y_seg = y_val - bbox_ystart
|
|
981
|
+
# y_val_o = y_val
|
|
982
|
+
if y_val > hdim1_max:
|
|
983
|
+
y_val_o = y_val - (hdim1_max + 1)
|
|
984
|
+
else:
|
|
985
|
+
y_val_o = y_val
|
|
986
|
+
for x_val in range(bbox_xstart, bbox_xend):
|
|
987
|
+
x_seg = x_val - bbox_xstart
|
|
988
|
+
# x_val_o = x_val
|
|
989
|
+
if x_val > hdim2_max:
|
|
990
|
+
x_val_o = x_val - (hdim2_max + 1)
|
|
991
|
+
else:
|
|
992
|
+
x_val_o = x_val
|
|
993
|
+
|
|
994
|
+
# fix to
|
|
995
|
+
# overwrite IF:
|
|
996
|
+
# 1) feature of interest
|
|
997
|
+
# 2) changing to/from feature of interest or adjacent segmented feature
|
|
998
|
+
|
|
999
|
+
# We don't want to overwrite other features that may be in the
|
|
1000
|
+
# buddy box if not contacting the intersected seg field
|
|
1001
|
+
|
|
1002
|
+
if np.any(
|
|
1003
|
+
segmentation_mask_3[z_val_o, y_val_o, x_val_o] == buddies
|
|
1004
|
+
) and np.any(
|
|
1005
|
+
segmentation_mask_4.data[z_seg, y_seg, x_seg] == buddies
|
|
1006
|
+
):
|
|
1007
|
+
# only do updating procedure if old and new values both in buddy set
|
|
1008
|
+
# and values are different
|
|
1009
|
+
if (
|
|
1010
|
+
segmentation_mask_3[z_val_o, y_val_o, x_val_o]
|
|
1011
|
+
!= segmentation_mask_4.data[z_seg, y_seg, x_seg]
|
|
1012
|
+
):
|
|
1013
|
+
segmentation_mask_3[z_val_o, y_val_o, x_val_o] = (
|
|
1014
|
+
segmentation_mask_4.data[z_seg, y_seg, x_seg]
|
|
1015
|
+
)
|
|
1016
|
+
if not is_3D_seg:
|
|
1017
|
+
segmentation_mask_3 = segmentation_mask_3[0]
|
|
1018
|
+
|
|
1019
|
+
segmentation_mask = segmentation_mask_3
|
|
1020
|
+
|
|
1021
|
+
if transposed_data:
|
|
1022
|
+
if vertical_coord_axis == 1:
|
|
1023
|
+
segmentation_mask = np.transpose(segmentation_mask, axes=(1, 0, 2))
|
|
1024
|
+
elif vertical_coord_axis == 2:
|
|
1025
|
+
segmentation_mask = np.transpose(segmentation_mask, axes=(1, 2, 0))
|
|
1026
|
+
|
|
1027
|
+
# Finished PBC checks and new PBC updated segmentation now in segmentation_mask.
|
|
1028
|
+
# Write resulting mask into cube for output
|
|
1029
|
+
wh_below_threshold = segmentation_mask == -1
|
|
1030
|
+
wh_unsegmented = segmentation_mask == 0
|
|
1031
|
+
segmentation_mask[wh_unsegmented] = segment_number_unassigned
|
|
1032
|
+
segmentation_mask[wh_below_threshold] = segment_number_below_threshold
|
|
1033
|
+
segmentation_out.data = segmentation_mask
|
|
1034
|
+
|
|
1035
|
+
# add ncells to feature dataframe with new statistic method
|
|
1036
|
+
features_out = get_statistics(
|
|
1037
|
+
features_out,
|
|
1038
|
+
np.array(segmentation_out.data.copy()),
|
|
1039
|
+
np.array(field_in.data.copy()),
|
|
1040
|
+
statistic={"ncells": np.count_nonzero},
|
|
1041
|
+
default=0,
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
# compute additional statistics, if requested
|
|
1045
|
+
if statistic:
|
|
1046
|
+
features_out = get_statistics(
|
|
1047
|
+
features_out,
|
|
1048
|
+
segmentation_out.data.copy(),
|
|
1049
|
+
field_in.data.copy(),
|
|
1050
|
+
statistic=statistic,
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
return segmentation_out, features_out
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def check_add_unseeded_across_bdrys(
|
|
1057
|
+
dim_to_run: str,
|
|
1058
|
+
segmentation_mask: np.array,
|
|
1059
|
+
unseeded_labels: np.array,
|
|
1060
|
+
border_min: int,
|
|
1061
|
+
border_max: int,
|
|
1062
|
+
markers_arr: np.array,
|
|
1063
|
+
inplace: bool = True,
|
|
1064
|
+
) -> np.array:
|
|
1065
|
+
"""Add new markers to unseeded but eligible regions when they are bordering
|
|
1066
|
+
an appropriate boundary.
|
|
1067
|
+
:hidden:
|
|
1068
|
+
Parameters
|
|
1069
|
+
----------
|
|
1070
|
+
dim_to_run: {'hdim_1', 'hdim_2'}
|
|
1071
|
+
what dimension to run
|
|
1072
|
+
segmentation_mask: np.array
|
|
1073
|
+
the incomming segmentation mask
|
|
1074
|
+
unseeded_labels: np.array
|
|
1075
|
+
The list of labels that are unseeded
|
|
1076
|
+
border_min: int
|
|
1077
|
+
minimum real point in the dimension we are running on
|
|
1078
|
+
border_max: int
|
|
1079
|
+
maximum real point in the dimension we are running on (inclusive)
|
|
1080
|
+
markers_arr: np.array
|
|
1081
|
+
The array of markers to re-run segmentation with
|
|
1082
|
+
inplace: bool
|
|
1083
|
+
whether or not to modify markers_arr in place
|
|
1084
|
+
|
|
1085
|
+
Returns
|
|
1086
|
+
-------
|
|
1087
|
+
markers_arr with new markers added
|
|
1088
|
+
|
|
1089
|
+
"""
|
|
1090
|
+
|
|
1091
|
+
# if we are okay modifying the marker array inplace, do that
|
|
1092
|
+
if inplace:
|
|
1093
|
+
markers_out = markers_arr
|
|
1094
|
+
else:
|
|
1095
|
+
# If we can't modify the marker array inplace, make a deep copy.
|
|
1096
|
+
markers_out = copy.deepcopy(markers_arr)
|
|
1097
|
+
|
|
1098
|
+
# identify border points and the loop points depending on what we want to run
|
|
1099
|
+
if dim_to_run == "hdim_1":
|
|
1100
|
+
border_axnum = 1
|
|
1101
|
+
elif dim_to_run == "hdim_2":
|
|
1102
|
+
border_axnum = 2
|
|
1103
|
+
# loop through vertical levels
|
|
1104
|
+
for border_ind, border_opposite in [
|
|
1105
|
+
(border_min, border_max),
|
|
1106
|
+
(border_max, border_min),
|
|
1107
|
+
]:
|
|
1108
|
+
label_border_pts = np.take(unseeded_labels, border_ind, axis=border_axnum)
|
|
1109
|
+
seg_opp_pts = np.take(segmentation_mask, border_opposite, axis=border_axnum)
|
|
1110
|
+
if dim_to_run == "hdim_1":
|
|
1111
|
+
cond_to_check = np.logical_and(label_border_pts != 0, seg_opp_pts > 0)
|
|
1112
|
+
markers_out[:, border_ind, :][cond_to_check] = seg_opp_pts[cond_to_check]
|
|
1113
|
+
|
|
1114
|
+
elif dim_to_run == "hdim_2":
|
|
1115
|
+
cond_to_check = np.logical_and(label_border_pts != 0, seg_opp_pts > 0)
|
|
1116
|
+
markers_out[:, :, border_ind][cond_to_check] = seg_opp_pts[cond_to_check]
|
|
1117
|
+
return markers_out
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
@decorators.iris_to_xarray()
|
|
1121
|
+
def segmentation(
|
|
1122
|
+
features: pd.DataFrame,
|
|
1123
|
+
field: xr.DataArray,
|
|
1124
|
+
dxy: float,
|
|
1125
|
+
threshold: float = 3e-3,
|
|
1126
|
+
target: Literal["maximum", "minimum"] = "maximum",
|
|
1127
|
+
level: Union[None, slice] = None,
|
|
1128
|
+
method: Literal["watershed"] = "watershed",
|
|
1129
|
+
max_distance: Union[None, float] = None,
|
|
1130
|
+
vertical_coord: Union[str, None] = None,
|
|
1131
|
+
PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = "none",
|
|
1132
|
+
seed_3D_flag: Literal["column", "box"] = "column",
|
|
1133
|
+
seed_3D_size: Union[int, tuple[int]] = 5,
|
|
1134
|
+
segment_number_below_threshold: int = 0,
|
|
1135
|
+
segment_number_unassigned: int = 0,
|
|
1136
|
+
statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
|
|
1137
|
+
time_padding: Optional[datetime.timedelta] = datetime.timedelta(seconds=0.5),
|
|
1138
|
+
) -> tuple[xr.DataArray, pd.DataFrame]:
|
|
1139
|
+
"""Use watershedding to determine region above a threshold
|
|
1140
|
+
value around initial seeding position for all time steps of
|
|
1141
|
+
the input data. Works both in 2D (based on single seeding
|
|
1142
|
+
point) and 3D and returns a mask with zeros everywhere around
|
|
1143
|
+
the identified regions and the feature id inside the regions.
|
|
1144
|
+
|
|
1145
|
+
Calls segmentation_timestep at each individal timestep of the
|
|
1146
|
+
input data.
|
|
1147
|
+
|
|
1148
|
+
Parameters
|
|
1149
|
+
----------
|
|
1150
|
+
features : pandas.DataFrame
|
|
1151
|
+
Output from trackpy/maketrack.
|
|
1152
|
+
|
|
1153
|
+
field : iris.cube.Cube or xarray.DataArray
|
|
1154
|
+
Containing the field to perform the watershedding on.
|
|
1155
|
+
|
|
1156
|
+
dxy : float
|
|
1157
|
+
Grid spacing of the input data in meters.
|
|
1158
|
+
|
|
1159
|
+
threshold : float, optional
|
|
1160
|
+
Threshold for the watershedding field to be used for the mask.
|
|
1161
|
+
Default is 3e-3.
|
|
1162
|
+
|
|
1163
|
+
target : {'maximum', 'minimum'}, optional
|
|
1164
|
+
Flag to determine if tracking is targetting minima or maxima in
|
|
1165
|
+
the data. Default is 'maximum'.
|
|
1166
|
+
|
|
1167
|
+
level : slice of iris.cube.Cube, optional
|
|
1168
|
+
Levels at which to seed the cells for the watershedding
|
|
1169
|
+
algorithm. Default is None.
|
|
1170
|
+
|
|
1171
|
+
method : {'watershed'}, optional
|
|
1172
|
+
Flag determining the algorithm to use (currently watershedding
|
|
1173
|
+
implemented). 'random_walk' could be uncommented.
|
|
1174
|
+
|
|
1175
|
+
max_distance : float, optional
|
|
1176
|
+
Maximum distance from a marker allowed to be classified as
|
|
1177
|
+
belonging to that cell in meters. Default is None.
|
|
1178
|
+
|
|
1179
|
+
vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
|
|
1180
|
+
'geopotential_height'}, optional
|
|
1181
|
+
Name of the vertical coordinate for use in 3D segmentation case
|
|
1182
|
+
|
|
1183
|
+
PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
|
|
1184
|
+
Sets whether to use periodic boundaries, and if so in which directions.
|
|
1185
|
+
'none' means that we do not have periodic boundaries
|
|
1186
|
+
'hdim_1' means that we are periodic along hdim1
|
|
1187
|
+
'hdim_2' means that we are periodic along hdim2
|
|
1188
|
+
'both' means that we are periodic along both horizontal dimensions
|
|
1189
|
+
|
|
1190
|
+
seed_3D_flag: str('column', 'box')
|
|
1191
|
+
Seed 3D field at feature positions with either the full column (default)
|
|
1192
|
+
or a box of user-set size
|
|
1193
|
+
|
|
1194
|
+
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
|
|
1195
|
+
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
|
|
1196
|
+
integer (units of number of pixels), the seed box is identical in all dimensions.
|
|
1197
|
+
If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
|
|
1198
|
+
Note: we strongly recommend the use of odd numbers for this. If you give
|
|
1199
|
+
an even number, your seed box will be biased and not centered
|
|
1200
|
+
around the feature.
|
|
1201
|
+
Note: if two seed boxes overlap, the feature that is seeded will be the
|
|
1202
|
+
closer feature.
|
|
1203
|
+
segment_number_below_threshold: int
|
|
1204
|
+
the marker to use to indicate a segmentation point is below the threshold.
|
|
1205
|
+
segment_number_unassigned: int
|
|
1206
|
+
the marker to use to indicate a segmentation point is above the threshold but unsegmented.
|
|
1207
|
+
statistic : dict, optional
|
|
1208
|
+
Default is None. Optional parameter to calculate bulk statistics within feature detection.
|
|
1209
|
+
Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
|
|
1210
|
+
time_padding: timedelta, optional
|
|
1211
|
+
If set, allows for segmentation to be associated with a feature input
|
|
1212
|
+
timestep that is time_padding off of the feature. Extremely useful when
|
|
1213
|
+
converting between micro- and nanoseconds, as is common when using Pandas
|
|
1214
|
+
dataframes.
|
|
1215
|
+
|
|
1216
|
+
Returns
|
|
1217
|
+
-------
|
|
1218
|
+
segmentation_out : iris.cube.Cube
|
|
1219
|
+
Mask, 0 outside and integer numbers according to track
|
|
1220
|
+
inside the area/volume of the feature.
|
|
1221
|
+
|
|
1222
|
+
features_out : pandas.DataFrame
|
|
1223
|
+
Feature dataframe including the number of cells (2D or 3D) in
|
|
1224
|
+
the segmented area/volume of the feature at the timestep.
|
|
1225
|
+
|
|
1226
|
+
Raises
|
|
1227
|
+
------
|
|
1228
|
+
ValueError
|
|
1229
|
+
If field_in.ndim is neither 3 nor 4 and 'time' is not included
|
|
1230
|
+
in coords.
|
|
1231
|
+
"""
|
|
1232
|
+
import pandas as pd
|
|
1233
|
+
|
|
1234
|
+
time_var_name: str = "time"
|
|
1235
|
+
seg_out_type: str = "int64"
|
|
1236
|
+
|
|
1237
|
+
logging.info("Start watershedding")
|
|
1238
|
+
|
|
1239
|
+
# check input for right dimensions:
|
|
1240
|
+
if not (field.ndim == 3 or field.ndim == 4):
|
|
1241
|
+
raise ValueError(
|
|
1242
|
+
"input to segmentation step must be 3D or 4D including a time dimension"
|
|
1243
|
+
)
|
|
1244
|
+
try:
|
|
1245
|
+
ndim_time = internal_utils.find_axis_from_coord(field, time_var_name)
|
|
1246
|
+
except ValueError as exc:
|
|
1247
|
+
raise ValueError(
|
|
1248
|
+
"input to segmentation step must include a dimension named '{0}'".format(
|
|
1249
|
+
time_var_name
|
|
1250
|
+
)
|
|
1251
|
+
) from exc
|
|
1252
|
+
|
|
1253
|
+
# create our output dataarray
|
|
1254
|
+
segmentation_out_data = xr.DataArray(
|
|
1255
|
+
np.zeros(field.shape, dtype=int),
|
|
1256
|
+
coords=field.coords,
|
|
1257
|
+
dims=field.dims,
|
|
1258
|
+
name="segmentation_mask",
|
|
1259
|
+
).assign_attrs(threshold=threshold)
|
|
1260
|
+
|
|
1261
|
+
features_out_list = []
|
|
1262
|
+
|
|
1263
|
+
if len(field.coords[time_var_name]) == 1:
|
|
1264
|
+
warnings.warn(
|
|
1265
|
+
"As of v1.6.0, segmentation with time length 1 will return time as a coordinate"
|
|
1266
|
+
" instead of dropping it (i.e., output will now be 1xMxN instead of MxN). ",
|
|
1267
|
+
UserWarning,
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
for (
|
|
1271
|
+
time_iteration_number,
|
|
1272
|
+
time_iteration_value,
|
|
1273
|
+
field_at_time,
|
|
1274
|
+
features_i,
|
|
1275
|
+
) in field_and_features_over_time(
|
|
1276
|
+
field, features, time_var_name=time_var_name, time_padding=time_padding
|
|
1277
|
+
):
|
|
1278
|
+
segmentation_out_i, features_out_i = segmentation_timestep(
|
|
1279
|
+
field_at_time,
|
|
1280
|
+
features_i,
|
|
1281
|
+
dxy,
|
|
1282
|
+
threshold=threshold,
|
|
1283
|
+
target=target,
|
|
1284
|
+
level=level,
|
|
1285
|
+
method=method,
|
|
1286
|
+
max_distance=max_distance,
|
|
1287
|
+
vertical_coord=vertical_coord,
|
|
1288
|
+
PBC_flag=PBC_flag,
|
|
1289
|
+
seed_3D_flag=seed_3D_flag,
|
|
1290
|
+
seed_3D_size=seed_3D_size,
|
|
1291
|
+
segment_number_unassigned=segment_number_unassigned,
|
|
1292
|
+
segment_number_below_threshold=segment_number_below_threshold,
|
|
1293
|
+
statistic=statistic,
|
|
1294
|
+
)
|
|
1295
|
+
segmentation_out_data.loc[{time_var_name: time_iteration_value}] = (
|
|
1296
|
+
segmentation_out_i
|
|
1297
|
+
)
|
|
1298
|
+
features_out_list.append(features_out_i)
|
|
1299
|
+
logging.debug(f"Finished segmentation for {time_iteration_value.values}")
|
|
1300
|
+
|
|
1301
|
+
# Merge output from individual timesteps:
|
|
1302
|
+
features_out = pd.concat(features_out_list)
|
|
1303
|
+
logging.debug("Finished segmentation")
|
|
1304
|
+
return segmentation_out_data, features_out
|
|
1305
|
+
|
|
1306
|
+
|
|
1307
|
+
def watershedding_3D(track, field_in, **kwargs):
|
|
1308
|
+
"""Wrapper for the segmentation()-function."""
|
|
1309
|
+
kwargs.pop("method", None)
|
|
1310
|
+
return segmentation_3D(track, field_in, method="watershed", **kwargs)
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
def watershedding_2D(track, field_in, **kwargs):
|
|
1314
|
+
"""Wrapper for the segmentation()-function."""
|
|
1315
|
+
kwargs.pop("method", None)
|
|
1316
|
+
return segmentation_2D(track, field_in, method="watershed", **kwargs)
|