tobac 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tobac/__init__.py +112 -0
- tobac/analysis/__init__.py +31 -0
- tobac/analysis/cell_analysis.py +628 -0
- tobac/analysis/feature_analysis.py +212 -0
- tobac/analysis/spatial.py +619 -0
- tobac/centerofgravity.py +226 -0
- tobac/feature_detection.py +1758 -0
- tobac/merge_split.py +324 -0
- tobac/plotting.py +2321 -0
- tobac/segmentation/__init__.py +10 -0
- tobac/segmentation/watershed_segmentation.py +1316 -0
- tobac/testing.py +1179 -0
- tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
- tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
- tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
- tobac/tests/test_analysis_spatial.py +1109 -0
- tobac/tests/test_convert.py +265 -0
- tobac/tests/test_datetime.py +216 -0
- tobac/tests/test_decorators.py +148 -0
- tobac/tests/test_feature_detection.py +1321 -0
- tobac/tests/test_generators.py +273 -0
- tobac/tests/test_import.py +24 -0
- tobac/tests/test_iris_xarray_match_utils.py +244 -0
- tobac/tests/test_merge_split.py +351 -0
- tobac/tests/test_pbc_utils.py +497 -0
- tobac/tests/test_sample_data.py +197 -0
- tobac/tests/test_testing.py +747 -0
- tobac/tests/test_tracking.py +714 -0
- tobac/tests/test_utils.py +650 -0
- tobac/tests/test_utils_bulk_statistics.py +789 -0
- tobac/tests/test_utils_coordinates.py +328 -0
- tobac/tests/test_utils_internal.py +97 -0
- tobac/tests/test_xarray_utils.py +232 -0
- tobac/tracking.py +613 -0
- tobac/utils/__init__.py +27 -0
- tobac/utils/bulk_statistics.py +360 -0
- tobac/utils/datetime.py +184 -0
- tobac/utils/decorators.py +540 -0
- tobac/utils/general.py +753 -0
- tobac/utils/generators.py +87 -0
- tobac/utils/internal/__init__.py +2 -0
- tobac/utils/internal/coordinates.py +430 -0
- tobac/utils/internal/iris_utils.py +462 -0
- tobac/utils/internal/label_props.py +82 -0
- tobac/utils/internal/xarray_utils.py +439 -0
- tobac/utils/mask.py +364 -0
- tobac/utils/periodic_boundaries.py +419 -0
- tobac/wrapper.py +244 -0
- tobac-1.6.2.dist-info/METADATA +154 -0
- tobac-1.6.2.dist-info/RECORD +53 -0
- tobac-1.6.2.dist-info/WHEEL +5 -0
- tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
- tobac-1.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import tobac.testing
|
|
3
|
+
import tobac.testing as tbtest
|
|
4
|
+
from collections import Counter
|
|
5
|
+
import numpy as np
|
|
6
|
+
import datetime
|
|
7
|
+
|
|
8
|
+
import tobac.utils as tb_utils
|
|
9
|
+
import tobac.utils.internal as internal_utils
|
|
10
|
+
import tobac.testing as tb_test
|
|
11
|
+
|
|
12
|
+
import pandas.testing as pd_test
|
|
13
|
+
import numpy as np
|
|
14
|
+
from scipy import fft
|
|
15
|
+
import xarray as xr
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def lists_equal_without_order(a, b):
|
|
19
|
+
"""
|
|
20
|
+
This will make sure the inner list contain the same,
|
|
21
|
+
but doesn't account for duplicate groups.
|
|
22
|
+
from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000
|
|
23
|
+
"""
|
|
24
|
+
for l1 in a:
|
|
25
|
+
check_counter = Counter(l1)
|
|
26
|
+
if not any(Counter(l2) == check_counter for l2 in b):
|
|
27
|
+
return False
|
|
28
|
+
return True
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_get_label_props_in_dict():
|
|
32
|
+
"""Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases."""
|
|
33
|
+
import skimage.measure as skim
|
|
34
|
+
|
|
35
|
+
test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type="xarray")
|
|
36
|
+
test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type="xarray")
|
|
37
|
+
|
|
38
|
+
# make sure it works for 3D data
|
|
39
|
+
labels_3D = skim.label(test_3D_data.values[0])
|
|
40
|
+
|
|
41
|
+
output_3D = tb_utils.get_label_props_in_dict(labels_3D)
|
|
42
|
+
|
|
43
|
+
# make sure it is a dict
|
|
44
|
+
assert type(output_3D) is dict
|
|
45
|
+
# make sure we get at least one output, there should be at least one label.
|
|
46
|
+
assert len(output_3D) > 0
|
|
47
|
+
|
|
48
|
+
# make sure it works for 2D data
|
|
49
|
+
labels_2D = skim.label(test_2D_data.values[0])
|
|
50
|
+
|
|
51
|
+
output_2D = tb_utils.get_label_props_in_dict(labels_2D)
|
|
52
|
+
|
|
53
|
+
# make sure it is a dict
|
|
54
|
+
assert type(output_2D) is dict
|
|
55
|
+
# make sure we get at least one output, there should be at least one label.
|
|
56
|
+
assert len(output_2D) > 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_get_indices_of_labels_from_reg_prop_dict():
|
|
60
|
+
"""Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases."""
|
|
61
|
+
import skimage.measure as skim
|
|
62
|
+
import numpy as np
|
|
63
|
+
|
|
64
|
+
test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type="xarray")
|
|
65
|
+
test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type="xarray")
|
|
66
|
+
|
|
67
|
+
# make sure it works for 3D data
|
|
68
|
+
labels_3D = skim.label(test_3D_data.values[0])
|
|
69
|
+
nx_3D = test_3D_data.values[0].shape[2]
|
|
70
|
+
ny_3D = test_3D_data.values[0].shape[1]
|
|
71
|
+
nz_3D = test_3D_data.values[0].shape[0]
|
|
72
|
+
|
|
73
|
+
labels_2D = skim.label(test_2D_data.values[0])
|
|
74
|
+
nx_2D = test_2D_data.values[0].shape[1]
|
|
75
|
+
ny_2D = test_2D_data.values[0].shape[0]
|
|
76
|
+
|
|
77
|
+
region_props_3D = tb_utils.get_label_props_in_dict(labels_3D)
|
|
78
|
+
region_props_2D = tb_utils.get_label_props_in_dict(labels_2D)
|
|
79
|
+
|
|
80
|
+
# get_indices_of_labels_from_reg_prop_dict
|
|
81
|
+
|
|
82
|
+
[
|
|
83
|
+
curr_loc_indices,
|
|
84
|
+
z_indices,
|
|
85
|
+
y_indices,
|
|
86
|
+
x_indices,
|
|
87
|
+
] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_3D)
|
|
88
|
+
|
|
89
|
+
for index_key in curr_loc_indices:
|
|
90
|
+
# there should be at least one value in each.
|
|
91
|
+
assert curr_loc_indices[index_key] > 0
|
|
92
|
+
|
|
93
|
+
assert np.all(z_indices[index_key] >= 0) and np.all(
|
|
94
|
+
z_indices[index_key] < nz_3D
|
|
95
|
+
)
|
|
96
|
+
assert np.all(x_indices[index_key] >= 0) and np.all(
|
|
97
|
+
x_indices[index_key] < nx_3D
|
|
98
|
+
)
|
|
99
|
+
assert np.all(y_indices[index_key] >= 0) and np.all(
|
|
100
|
+
y_indices[index_key] < ny_3D
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
[
|
|
104
|
+
curr_loc_indices,
|
|
105
|
+
y_indices,
|
|
106
|
+
x_indices,
|
|
107
|
+
] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_2D)
|
|
108
|
+
|
|
109
|
+
for index_key in curr_loc_indices:
|
|
110
|
+
# there should be at least one value in each.
|
|
111
|
+
assert curr_loc_indices[index_key] > 0
|
|
112
|
+
|
|
113
|
+
assert np.all(x_indices[index_key] >= 0) and np.all(
|
|
114
|
+
x_indices[index_key] < nx_2D
|
|
115
|
+
)
|
|
116
|
+
assert np.all(y_indices[index_key] >= 0) and np.all(
|
|
117
|
+
y_indices[index_key] < ny_2D
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Test error if empty dict is passed:
|
|
121
|
+
with pytest.raises(ValueError, match="No regions!"):
|
|
122
|
+
tb_utils.get_indices_of_labels_from_reg_prop_dict({})
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@pytest.mark.parametrize(
|
|
126
|
+
"feature_loc, min_max_coords, lengths, expected_coord_interp",
|
|
127
|
+
[
|
|
128
|
+
((0, 0), (0, 1, 0, 1), (2, 2), (0, 0)),
|
|
129
|
+
((0, 0), (0, 1), (2,), (0,)),
|
|
130
|
+
],
|
|
131
|
+
)
|
|
132
|
+
def test_add_coordinates_2D(
|
|
133
|
+
feature_loc, min_max_coords, lengths, expected_coord_interp
|
|
134
|
+
):
|
|
135
|
+
"""
|
|
136
|
+
Tests ```utils.add_coordinates``` for a 2D case with
|
|
137
|
+
both 1D and 2D coordinates
|
|
138
|
+
"""
|
|
139
|
+
import xarray as xr
|
|
140
|
+
import numpy as np
|
|
141
|
+
import datetime
|
|
142
|
+
|
|
143
|
+
feat_interp = tbtest.generate_single_feature(
|
|
144
|
+
feature_loc[0], feature_loc[1], max_h1=9999, max_h2=9999
|
|
145
|
+
)
|
|
146
|
+
grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths)
|
|
147
|
+
|
|
148
|
+
ndims = len(lengths)
|
|
149
|
+
dim_names = ["time", "longitude", "latitude"]
|
|
150
|
+
dim_names = dim_names[:ndims]
|
|
151
|
+
|
|
152
|
+
# Note that this is arbitrary.
|
|
153
|
+
base_time = datetime.datetime(2022, 1, 1)
|
|
154
|
+
|
|
155
|
+
coord_dict = {"time": [base_time]}
|
|
156
|
+
if ndims == 1:
|
|
157
|
+
# force at least a 2D array for data
|
|
158
|
+
lengths = lengths * 2
|
|
159
|
+
dim_names = ["time", "longitude", "latitude"]
|
|
160
|
+
coord_dict["longitude"] = grid_coords
|
|
161
|
+
coord_dict["latitude"] = grid_coords
|
|
162
|
+
|
|
163
|
+
elif ndims == 2:
|
|
164
|
+
dim_names = ["time", "x", "y"]
|
|
165
|
+
coord_dict["longitude"] = (("x", "y"), grid_coords[0])
|
|
166
|
+
coord_dict["latitude"] = (("x", "y"), grid_coords[1])
|
|
167
|
+
|
|
168
|
+
data_xr = xr.DataArray(np.empty((1,) + lengths), coords=coord_dict, dims=dim_names)
|
|
169
|
+
|
|
170
|
+
feats_with_coords = tb_utils.add_coordinates(feat_interp, data_xr.to_iris())
|
|
171
|
+
|
|
172
|
+
print(feats_with_coords.iloc[0]["longitude"])
|
|
173
|
+
assert feats_with_coords.iloc[0]["longitude"] == expected_coord_interp[0]
|
|
174
|
+
if ndims == 2:
|
|
175
|
+
assert feats_with_coords.iloc[0]["latitude"] == expected_coord_interp[1]
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@pytest.mark.parametrize(
|
|
179
|
+
"feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp",
|
|
180
|
+
[
|
|
181
|
+
((0, 0, 0), None, (0, 1, 0, 1), (2, 2), (0, 0)),
|
|
182
|
+
((0, 0, 0), (1, 1, 1), (0, 1, 0, 1), (2, 2), (0, 0)),
|
|
183
|
+
((0.5, 0.5, 0.5), None, (0, 3, 3, 6), (2, 2), (1.5, 4.5)),
|
|
184
|
+
((0, 0, 0), None, (0, 1), (2,), (0,)),
|
|
185
|
+
((0, 0, 0), None, (0, 1, 0, 1, 0, 1), (2, 2, 2), (0, 0, 0)),
|
|
186
|
+
],
|
|
187
|
+
)
|
|
188
|
+
def test_add_coordinates_3D(
|
|
189
|
+
feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp
|
|
190
|
+
):
|
|
191
|
+
"""
|
|
192
|
+
Tests ```utils.add_coordinates_3D``` for a 3D case with
|
|
193
|
+
1D, 2D, and 3D coordinates
|
|
194
|
+
"""
|
|
195
|
+
import xarray as xr
|
|
196
|
+
import numpy as np
|
|
197
|
+
import datetime
|
|
198
|
+
import pandas as pd
|
|
199
|
+
|
|
200
|
+
feat_interp = tbtest.generate_single_feature(
|
|
201
|
+
feature_loc[1], feature_loc[2], start_v=feature_loc[0], max_h1=9999, max_h2=9999
|
|
202
|
+
)
|
|
203
|
+
if delta_feat is not None:
|
|
204
|
+
feat_interp_2 = tbtest.generate_single_feature(
|
|
205
|
+
feature_loc[1] + delta_feat[1],
|
|
206
|
+
feature_loc[2] + delta_feat[2],
|
|
207
|
+
start_v=feature_loc[0] + delta_feat[0],
|
|
208
|
+
max_h1=9999,
|
|
209
|
+
max_h2=9999,
|
|
210
|
+
feature_num=2,
|
|
211
|
+
)
|
|
212
|
+
feat_interp = pd.concat([feat_interp, feat_interp_2], ignore_index=True)
|
|
213
|
+
|
|
214
|
+
grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths)
|
|
215
|
+
|
|
216
|
+
ndims = len(lengths)
|
|
217
|
+
dim_names = ["time", "longitude", "latitude"]
|
|
218
|
+
dim_names = dim_names[:ndims]
|
|
219
|
+
|
|
220
|
+
# Note that this is arbitrary.
|
|
221
|
+
base_time = datetime.datetime(2022, 1, 1)
|
|
222
|
+
|
|
223
|
+
coord_dict = {"time": [base_time]}
|
|
224
|
+
if ndims == 1:
|
|
225
|
+
# force at least a 3D array for data
|
|
226
|
+
lengths = lengths * 3
|
|
227
|
+
dim_names = ["time", "longitude", "latitude", "z"]
|
|
228
|
+
coord_dict["longitude"] = grid_coords
|
|
229
|
+
# we only test lon, so it doesn't really matter here what these are.
|
|
230
|
+
coord_dict["latitude"] = grid_coords
|
|
231
|
+
coord_dict["z"] = grid_coords
|
|
232
|
+
|
|
233
|
+
elif ndims == 2:
|
|
234
|
+
lengths = lengths + (lengths[0],)
|
|
235
|
+
dim_names = ["time", "x", "y", "z"]
|
|
236
|
+
coord_dict["longitude"] = (("x", "y"), grid_coords[0])
|
|
237
|
+
coord_dict["latitude"] = (("x", "y"), grid_coords[1])
|
|
238
|
+
# We only test lon and lat, so it doesn't matter what this is.
|
|
239
|
+
coord_dict["z"] = np.linspace(0, 1, lengths[0])
|
|
240
|
+
|
|
241
|
+
elif ndims == 3:
|
|
242
|
+
dim_names = ["time", "x", "y", "z"]
|
|
243
|
+
coord_dict["longitude"] = (("x", "y", "z"), grid_coords[0])
|
|
244
|
+
coord_dict["latitude"] = (("x", "y", "z"), grid_coords[1])
|
|
245
|
+
coord_dict["altitude"] = (("x", "y", "z"), grid_coords[2])
|
|
246
|
+
|
|
247
|
+
data_xr = xr.DataArray(np.empty((1,) + lengths), coords=coord_dict, dims=dim_names)
|
|
248
|
+
|
|
249
|
+
if ndims <= 2:
|
|
250
|
+
feats_with_coords = tb_utils.add_coordinates_3D(feat_interp, data_xr.to_iris())
|
|
251
|
+
else:
|
|
252
|
+
feats_with_coords = tb_utils.add_coordinates_3D(
|
|
253
|
+
feat_interp, data_xr.to_iris(), vertical_coord=2
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
assert np.isclose(feats_with_coords.iloc[0]["longitude"], expected_coord_interp[0])
|
|
257
|
+
if ndims >= 2:
|
|
258
|
+
assert np.isclose(
|
|
259
|
+
feats_with_coords.iloc[0]["latitude"], expected_coord_interp[1]
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
if ndims >= 3:
|
|
263
|
+
assert np.isclose(
|
|
264
|
+
feats_with_coords.iloc[0]["altitude"], expected_coord_interp[2]
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
@pytest.mark.parametrize(
|
|
269
|
+
"vertical_coord_names, vertical_coord_pass_in, expect_raise",
|
|
270
|
+
[
|
|
271
|
+
(["z"], None, False),
|
|
272
|
+
(["pudding"], None, True),
|
|
273
|
+
(["pudding"], "pudding", False),
|
|
274
|
+
(["z", "model_level_number"], "pudding", True),
|
|
275
|
+
(["z", "model_level_number"], None, True),
|
|
276
|
+
(["z", "model_level_number"], "z", False),
|
|
277
|
+
],
|
|
278
|
+
)
|
|
279
|
+
def test_find_dataframe_vertical_coord(
|
|
280
|
+
vertical_coord_names, vertical_coord_pass_in, expect_raise
|
|
281
|
+
):
|
|
282
|
+
"""Tests ```tobac.utils.find_dataframe_vertical_coord```
|
|
283
|
+
|
|
284
|
+
Parameters
|
|
285
|
+
----------
|
|
286
|
+
vertical_coord_names: array-like
|
|
287
|
+
Names of vertical coordinates to add
|
|
288
|
+
vertical_coord_pass_in: str
|
|
289
|
+
Value to pass into `vertical_coord`
|
|
290
|
+
expect_raise: bool
|
|
291
|
+
True if we expect a ValueError to be raised, False otherwise
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
test_feat = tbtest.generate_single_feature(0, 0, max_h1=100, max_h2=100)
|
|
295
|
+
for vertical_name in vertical_coord_names:
|
|
296
|
+
test_feat[vertical_name] = 0.0
|
|
297
|
+
|
|
298
|
+
if expect_raise:
|
|
299
|
+
with pytest.raises(ValueError):
|
|
300
|
+
internal_utils.find_dataframe_vertical_coord(
|
|
301
|
+
test_feat, vertical_coord=vertical_coord_pass_in
|
|
302
|
+
)
|
|
303
|
+
else:
|
|
304
|
+
assert (
|
|
305
|
+
internal_utils.find_dataframe_vertical_coord(
|
|
306
|
+
test_feat, vertical_coord=vertical_coord_pass_in
|
|
307
|
+
)
|
|
308
|
+
== vertical_coord_names[0]
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def test_spectral_filtering():
|
|
313
|
+
"""Testing tobac.utils.spectral_filtering with random test data that contains a wave signal."""
|
|
314
|
+
|
|
315
|
+
# set wavelengths for filtering and grid spacing
|
|
316
|
+
dxy = 4000
|
|
317
|
+
lambda_min = 400 * 1000
|
|
318
|
+
lambda_max = 1000 * 1000
|
|
319
|
+
|
|
320
|
+
# get wavelengths for domain
|
|
321
|
+
matrix = np.zeros((200, 100))
|
|
322
|
+
Ni = matrix.shape[-2]
|
|
323
|
+
Nj = matrix.shape[-1]
|
|
324
|
+
m, n = np.meshgrid(np.arange(Ni), np.arange(Nj), indexing="ij")
|
|
325
|
+
alpha = np.sqrt(m**2 / Ni**2 + n**2 / Nj**2)
|
|
326
|
+
# turn off warning for zero divide here, because it is not avoidable with normalized wavenumbers
|
|
327
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
328
|
+
lambda_mn = 2 * dxy / alpha
|
|
329
|
+
|
|
330
|
+
# seed wave signal that lies within wavelength range for filtering
|
|
331
|
+
signal_min = np.where(lambda_mn[0] < lambda_min)[0].min()
|
|
332
|
+
signal_idx = np.random.randint(signal_min, matrix.shape[-1])
|
|
333
|
+
matrix[0, signal_idx] = 1
|
|
334
|
+
wave_data = fft.idctn(matrix)
|
|
335
|
+
|
|
336
|
+
# use spectral filtering function on random wave data
|
|
337
|
+
transfer_function, filtered_data = tb_utils.general.spectral_filtering(
|
|
338
|
+
dxy, wave_data, lambda_min, lambda_max, return_transfer_function=True
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# a few checks on the output
|
|
342
|
+
wavelengths = transfer_function[0]
|
|
343
|
+
# first element in wavelengths-space is inf because normalized wavelengths are 0 here
|
|
344
|
+
assert wavelengths[0, 0] == np.inf
|
|
345
|
+
# the first elements should correspond to twice the distance of the corresponding axis (in m)
|
|
346
|
+
# this is because the maximum spatial scale is half a wavelength through the domain
|
|
347
|
+
assert wavelengths[1, 0] == (dxy) * wave_data.shape[-2] * 2
|
|
348
|
+
assert wavelengths[0, 1] == (dxy) * wave_data.shape[-1] * 2
|
|
349
|
+
|
|
350
|
+
# check that filtered/ smoothed field exhibits smaller range of values
|
|
351
|
+
assert (filtered_data.max() - filtered_data.min()) < (
|
|
352
|
+
wave_data.max() - wave_data.min()
|
|
353
|
+
)
|
|
354
|
+
# because the randomly generated wave lies outside of range that is set for filtering,
|
|
355
|
+
# make sure that the filtering results in the disappearance of this signal
|
|
356
|
+
assert (
|
|
357
|
+
abs(
|
|
358
|
+
np.floor(np.log10(abs(filtered_data.mean())))
|
|
359
|
+
- np.floor(np.log10(abs(wave_data.mean())))
|
|
360
|
+
)
|
|
361
|
+
>= 1
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def test_combine_tobac_feats():
|
|
366
|
+
"""tests tobac.utils.combine_tobac_feats
|
|
367
|
+
Test by generating two single feature dataframes,
|
|
368
|
+
combining them with this function, and then
|
|
369
|
+
testing to see if a single dataframe
|
|
370
|
+
matches.
|
|
371
|
+
"""
|
|
372
|
+
|
|
373
|
+
single_feat_1 = tb_test.generate_single_feature(
|
|
374
|
+
1,
|
|
375
|
+
1,
|
|
376
|
+
start_date=datetime.datetime(2022, 1, 1, 0, 0),
|
|
377
|
+
frame_start=0,
|
|
378
|
+
max_h1=100,
|
|
379
|
+
max_h2=100,
|
|
380
|
+
feature_size=5,
|
|
381
|
+
)
|
|
382
|
+
single_feat_2 = tb_test.generate_single_feature(
|
|
383
|
+
2,
|
|
384
|
+
2,
|
|
385
|
+
start_date=datetime.datetime(2022, 1, 1, 0, 5),
|
|
386
|
+
frame_start=0,
|
|
387
|
+
max_h1=100,
|
|
388
|
+
max_h2=100,
|
|
389
|
+
feature_size=5,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
combined_feat = tb_utils.combine_feature_dataframes([single_feat_1, single_feat_2])
|
|
393
|
+
|
|
394
|
+
tot_feat = tb_test.generate_single_feature(
|
|
395
|
+
1,
|
|
396
|
+
1,
|
|
397
|
+
spd_h1=1,
|
|
398
|
+
spd_h2=1,
|
|
399
|
+
num_frames=2,
|
|
400
|
+
frame_start=0,
|
|
401
|
+
max_h1=100,
|
|
402
|
+
max_h2=100,
|
|
403
|
+
feature_size=5,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
pd_test.assert_frame_equal(combined_feat, tot_feat)
|
|
407
|
+
|
|
408
|
+
# Now try preserving the old feature numbers.
|
|
409
|
+
combined_feat = tb_utils.combine_feature_dataframes(
|
|
410
|
+
[single_feat_1, single_feat_2], old_feature_column_name="old_feat_column"
|
|
411
|
+
)
|
|
412
|
+
assert np.all(list(combined_feat["old_feat_column"].values) == [1, 1])
|
|
413
|
+
assert np.all(list(combined_feat["feature"].values) == [1, 2])
|
|
414
|
+
|
|
415
|
+
# Test that a ValueError is raised if non-unique features are present
|
|
416
|
+
with pytest.raises(ValueError):
|
|
417
|
+
combined_feat = tb_utils.combine_feature_dataframes(
|
|
418
|
+
[single_feat_1, single_feat_2],
|
|
419
|
+
renumber_features=False,
|
|
420
|
+
old_feature_column_name="old_feat_column",
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Add a new feature with new feature number
|
|
424
|
+
single_feat_3 = tb_test.generate_single_feature(
|
|
425
|
+
0,
|
|
426
|
+
0,
|
|
427
|
+
start_date=datetime.datetime(2022, 1, 1, 0, 5),
|
|
428
|
+
frame_start=0,
|
|
429
|
+
max_h1=100,
|
|
430
|
+
max_h2=100,
|
|
431
|
+
feature_num=3,
|
|
432
|
+
feature_size=3,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# Test renumber_features=False
|
|
436
|
+
combined_feat = tb_utils.combine_feature_dataframes(
|
|
437
|
+
[single_feat_1, single_feat_3],
|
|
438
|
+
renumber_features=False,
|
|
439
|
+
old_feature_column_name="old_feat_column",
|
|
440
|
+
)
|
|
441
|
+
assert np.all(list(combined_feat["feature"].values) == [1, 3])
|
|
442
|
+
|
|
443
|
+
# Test sortby over one column
|
|
444
|
+
combined_feat = tb_utils.combine_feature_dataframes(
|
|
445
|
+
[single_feat_1, single_feat_3],
|
|
446
|
+
old_feature_column_name="old_feat_column",
|
|
447
|
+
sort_features_by="num",
|
|
448
|
+
)
|
|
449
|
+
assert np.all(list(combined_feat["feature"].values) == [1, 2])
|
|
450
|
+
assert np.all(list(combined_feat["old_feat_column"].values) == [3, 1])
|
|
451
|
+
|
|
452
|
+
# Test sortby over a list of columns
|
|
453
|
+
combined_feat = tb_utils.combine_feature_dataframes(
|
|
454
|
+
[single_feat_1, single_feat_3],
|
|
455
|
+
old_feature_column_name="old_feat_column",
|
|
456
|
+
sort_features_by=["hdim_1", "hdim_2"],
|
|
457
|
+
)
|
|
458
|
+
assert np.all(list(combined_feat["feature"].values) == [1, 2])
|
|
459
|
+
assert np.all(list(combined_feat["old_feat_column"].values) == [3, 1])
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def test_transform_feature_points():
|
|
463
|
+
"""Tests tobac.utils.general.transform_feature_points"""
|
|
464
|
+
|
|
465
|
+
# generate features
|
|
466
|
+
orig_feat_df_1 = tb_test.generate_single_feature(0, 95, max_h1=1000, max_h2=1000)
|
|
467
|
+
orig_feat_df_2 = tb_test.generate_single_feature(5, 105, max_h1=1000, max_h2=1000)
|
|
468
|
+
|
|
469
|
+
orig_feat_df = tb_utils.combine_tobac_feats([orig_feat_df_1, orig_feat_df_2])
|
|
470
|
+
|
|
471
|
+
# just make their lat/lons the same as the hdims.
|
|
472
|
+
orig_feat_df["latitude"] = orig_feat_df["hdim_1"]
|
|
473
|
+
orig_feat_df["longitude"] = orig_feat_df["hdim_2"]
|
|
474
|
+
|
|
475
|
+
# Make a test dataset with lats spanning from -25 to 24
|
|
476
|
+
# and lons spanning from 90 to 139.
|
|
477
|
+
test_lat = np.linspace(-25, 24, 50)
|
|
478
|
+
test_lon = np.linspace(90, 139, 50)
|
|
479
|
+
in_xr = xr.Dataset(
|
|
480
|
+
{"data": (("latitude", "longitude"), np.empty((50, 50)))},
|
|
481
|
+
coords={"latitude": test_lat, "longitude": test_lon},
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
new_feat_df = tb_utils.general.transform_feature_points(
|
|
485
|
+
orig_feat_df,
|
|
486
|
+
in_xr["data"].to_iris(),
|
|
487
|
+
max_time_away=datetime.timedelta(minutes=1),
|
|
488
|
+
max_space_away=20 * 1000,
|
|
489
|
+
)
|
|
490
|
+
# recall that these are the *array positions*
|
|
491
|
+
# so [25, 5] for "hdim_1" and "hdim_2" are lat 0, long 95.
|
|
492
|
+
assert np.all(new_feat_df["hdim_1"] == [25, 30])
|
|
493
|
+
assert np.all(new_feat_df["hdim_2"] == [5, 15])
|
|
494
|
+
|
|
495
|
+
# now test max space apart - we should drop the second feature,
|
|
496
|
+
# which is at 5, 105 lat/lon as the maximum latitude in the new dataset is 0.
|
|
497
|
+
# we set the max space away at 20km.
|
|
498
|
+
test_lat = np.linspace(-49, 0, 50)
|
|
499
|
+
in_xr = xr.Dataset(
|
|
500
|
+
{"data": (("latitude", "longitude"), np.empty((50, 50)))},
|
|
501
|
+
coords={"latitude": test_lat, "longitude": test_lon},
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
new_feat_df = tb_utils.general.transform_feature_points(
|
|
505
|
+
orig_feat_df,
|
|
506
|
+
in_xr["data"].to_iris(),
|
|
507
|
+
max_space_away=20000,
|
|
508
|
+
max_time_away=datetime.timedelta(minutes=1),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
assert np.all(new_feat_df["hdim_1"] == [49])
|
|
512
|
+
assert np.all(new_feat_df["hdim_2"] == [5])
|
|
513
|
+
|
|
514
|
+
# now test max time apart
|
|
515
|
+
test_lat = np.linspace(-25, 24, 50)
|
|
516
|
+
in_xr = xr.Dataset(
|
|
517
|
+
{"data": (("time", "latitude", "longitude"), np.empty((2, 50, 50)))},
|
|
518
|
+
coords={
|
|
519
|
+
"latitude": test_lat,
|
|
520
|
+
"longitude": test_lon,
|
|
521
|
+
"time": [
|
|
522
|
+
datetime.datetime(2023, 1, 1, 0, 0),
|
|
523
|
+
datetime.datetime(2023, 1, 1, 0, 5),
|
|
524
|
+
],
|
|
525
|
+
},
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
orig_feat_df["time"] = datetime.datetime(2023, 1, 1, 0, 0, 5)
|
|
529
|
+
new_feat_df = tb_utils.general.transform_feature_points(
|
|
530
|
+
orig_feat_df,
|
|
531
|
+
in_xr["data"].to_iris(),
|
|
532
|
+
max_time_away=datetime.timedelta(minutes=10),
|
|
533
|
+
max_space_away=20 * 1000,
|
|
534
|
+
)
|
|
535
|
+
# we should still have both features, but they should have the new time.
|
|
536
|
+
assert np.all(new_feat_df["hdim_1"] == [25, 30])
|
|
537
|
+
assert np.all(new_feat_df["hdim_2"] == [5, 15])
|
|
538
|
+
assert np.all(
|
|
539
|
+
new_feat_df["time"]
|
|
540
|
+
== [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 0, 0)]
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# now make the features have time on the next day
|
|
544
|
+
# both should be dropped.
|
|
545
|
+
orig_feat_df["time"] = datetime.datetime(2023, 1, 2, 0, 0)
|
|
546
|
+
new_feat_df = tb_utils.general.transform_feature_points(
|
|
547
|
+
orig_feat_df,
|
|
548
|
+
in_xr["data"].to_iris(),
|
|
549
|
+
max_time_away=datetime.timedelta(minutes=1),
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
assert np.all(new_feat_df["hdim_1"] == [])
|
|
553
|
+
assert np.all(new_feat_df["hdim_2"] == [])
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def test_transform_feature_points_3D():
|
|
557
|
+
"""Tests tobac.utils.general.transform_feature_points for a 3D case"""
|
|
558
|
+
|
|
559
|
+
orig_feat_df_1 = tb_test.generate_single_feature(
|
|
560
|
+
0, 95, 10, max_h1=1000, max_h2=1000
|
|
561
|
+
)
|
|
562
|
+
orig_feat_df_2 = tb_test.generate_single_feature(
|
|
563
|
+
5, 105, 20, max_h1=1000, max_h2=1000
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
orig_feat_df = tb_utils.combine_feature_dataframes([orig_feat_df_1, orig_feat_df_2])
|
|
567
|
+
|
|
568
|
+
orig_feat_df["latitude"] = orig_feat_df["hdim_1"]
|
|
569
|
+
orig_feat_df["longitude"] = orig_feat_df["hdim_2"]
|
|
570
|
+
orig_feat_df["altitude"] = orig_feat_df["vdim"] * 1000
|
|
571
|
+
|
|
572
|
+
test_lat = np.linspace(-25, 24, 50)
|
|
573
|
+
test_lon = np.linspace(90, 139, 50)
|
|
574
|
+
test_alt = np.arange(0, 21, 2) * 1000
|
|
575
|
+
in_xr = xr.Dataset(
|
|
576
|
+
{"data": (("altitude", "latitude", "longitude"), np.empty((11, 50, 50)))},
|
|
577
|
+
coords={"latitude": test_lat, "longitude": test_lon, "altitude": test_alt},
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
new_feat_df = tb_utils.general.transform_feature_points(
|
|
581
|
+
orig_feat_df,
|
|
582
|
+
in_xr["data"].to_iris(),
|
|
583
|
+
max_time_away=datetime.timedelta(minutes=1),
|
|
584
|
+
max_space_away=20 * 1000,
|
|
585
|
+
max_vspace_away=200,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
assert np.all(new_feat_df["hdim_1"] == [25, 30])
|
|
589
|
+
assert np.all(new_feat_df["hdim_2"] == [5, 15])
|
|
590
|
+
assert np.all(new_feat_df["vdim"] == [5, 10])
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def test_get_spacings():
|
|
594
|
+
"""Tests tobac.utils.get_spacings."""
|
|
595
|
+
|
|
596
|
+
x_values = np.linspace(100, 500, 5)
|
|
597
|
+
y_values = np.linspace(400, 200, 5)
|
|
598
|
+
t_values = np.array([0, 1, 2])
|
|
599
|
+
|
|
600
|
+
in_xr = xr.DataArray(
|
|
601
|
+
np.zeros((len(t_values), len(y_values), len(x_values))),
|
|
602
|
+
dims=["time", "y", "x"],
|
|
603
|
+
coords={
|
|
604
|
+
"x": (
|
|
605
|
+
"x",
|
|
606
|
+
x_values,
|
|
607
|
+
{"units": "meters", "standard_name": "projection_x_coordinate"},
|
|
608
|
+
),
|
|
609
|
+
"y": (
|
|
610
|
+
"y",
|
|
611
|
+
y_values,
|
|
612
|
+
{"units": "meters", "standard_name": "projection_y_coordinate"},
|
|
613
|
+
),
|
|
614
|
+
"time": (
|
|
615
|
+
"time",
|
|
616
|
+
t_values,
|
|
617
|
+
{"units": "hours since 1970-01-01 00:00:00", "standard_name": "time"},
|
|
618
|
+
),
|
|
619
|
+
},
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
# Test with arithmetic average and different dx and dy
|
|
623
|
+
dxy, dt = tb_utils.get_spacings(in_xr)
|
|
624
|
+
assert dxy == (100 + 50) / 2
|
|
625
|
+
assert dt == 3600
|
|
626
|
+
|
|
627
|
+
# Test with geometric average and different dx and dy
|
|
628
|
+
dxy, _ = tb_utils.get_spacings(in_xr, average_method="geometric")
|
|
629
|
+
assert dxy == np.sqrt(100 * 50)
|
|
630
|
+
|
|
631
|
+
# Test with specified grid spacing and time spacing
|
|
632
|
+
dxy, dt = tb_utils.get_spacings(in_xr, grid_spacing=15, time_spacing=1800)
|
|
633
|
+
assert dxy == 15
|
|
634
|
+
assert dt == 1800
|
|
635
|
+
|
|
636
|
+
in_xr = xr.DataArray(
|
|
637
|
+
np.zeros((len(t_values), len(y_values), len(x_values))),
|
|
638
|
+
dims=["time", "y", "x"],
|
|
639
|
+
coords={
|
|
640
|
+
"time": (
|
|
641
|
+
"time",
|
|
642
|
+
t_values,
|
|
643
|
+
{"units": "hours since 1970-01-01 00:00:00", "standard_name": "time"},
|
|
644
|
+
)
|
|
645
|
+
},
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
# Test with missing data
|
|
649
|
+
with pytest.raises(ValueError):
|
|
650
|
+
tb_utils.get_spacings(in_xr)
|