tobac 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. tobac/__init__.py +112 -0
  2. tobac/analysis/__init__.py +31 -0
  3. tobac/analysis/cell_analysis.py +628 -0
  4. tobac/analysis/feature_analysis.py +212 -0
  5. tobac/analysis/spatial.py +619 -0
  6. tobac/centerofgravity.py +226 -0
  7. tobac/feature_detection.py +1758 -0
  8. tobac/merge_split.py +324 -0
  9. tobac/plotting.py +2321 -0
  10. tobac/segmentation/__init__.py +10 -0
  11. tobac/segmentation/watershed_segmentation.py +1316 -0
  12. tobac/testing.py +1179 -0
  13. tobac/tests/segmentation_tests/test_iris_xarray_segmentation.py +0 -0
  14. tobac/tests/segmentation_tests/test_segmentation.py +1183 -0
  15. tobac/tests/segmentation_tests/test_segmentation_time_pad.py +104 -0
  16. tobac/tests/test_analysis_spatial.py +1109 -0
  17. tobac/tests/test_convert.py +265 -0
  18. tobac/tests/test_datetime.py +216 -0
  19. tobac/tests/test_decorators.py +148 -0
  20. tobac/tests/test_feature_detection.py +1321 -0
  21. tobac/tests/test_generators.py +273 -0
  22. tobac/tests/test_import.py +24 -0
  23. tobac/tests/test_iris_xarray_match_utils.py +244 -0
  24. tobac/tests/test_merge_split.py +351 -0
  25. tobac/tests/test_pbc_utils.py +497 -0
  26. tobac/tests/test_sample_data.py +197 -0
  27. tobac/tests/test_testing.py +747 -0
  28. tobac/tests/test_tracking.py +714 -0
  29. tobac/tests/test_utils.py +650 -0
  30. tobac/tests/test_utils_bulk_statistics.py +789 -0
  31. tobac/tests/test_utils_coordinates.py +328 -0
  32. tobac/tests/test_utils_internal.py +97 -0
  33. tobac/tests/test_xarray_utils.py +232 -0
  34. tobac/tracking.py +613 -0
  35. tobac/utils/__init__.py +27 -0
  36. tobac/utils/bulk_statistics.py +360 -0
  37. tobac/utils/datetime.py +184 -0
  38. tobac/utils/decorators.py +540 -0
  39. tobac/utils/general.py +753 -0
  40. tobac/utils/generators.py +87 -0
  41. tobac/utils/internal/__init__.py +2 -0
  42. tobac/utils/internal/coordinates.py +430 -0
  43. tobac/utils/internal/iris_utils.py +462 -0
  44. tobac/utils/internal/label_props.py +82 -0
  45. tobac/utils/internal/xarray_utils.py +439 -0
  46. tobac/utils/mask.py +364 -0
  47. tobac/utils/periodic_boundaries.py +419 -0
  48. tobac/wrapper.py +244 -0
  49. tobac-1.6.2.dist-info/METADATA +154 -0
  50. tobac-1.6.2.dist-info/RECORD +53 -0
  51. tobac-1.6.2.dist-info/WHEEL +5 -0
  52. tobac-1.6.2.dist-info/licenses/LICENSE +29 -0
  53. tobac-1.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,650 @@
1
+ import pytest
2
+ import tobac.testing
3
+ import tobac.testing as tbtest
4
+ from collections import Counter
5
+ import numpy as np
6
+ import datetime
7
+
8
+ import tobac.utils as tb_utils
9
+ import tobac.utils.internal as internal_utils
10
+ import tobac.testing as tb_test
11
+
12
+ import pandas.testing as pd_test
13
+ import numpy as np
14
+ from scipy import fft
15
+ import xarray as xr
16
+
17
+
18
+ def lists_equal_without_order(a, b):
19
+ """
20
+ This will make sure the inner list contain the same,
21
+ but doesn't account for duplicate groups.
22
+ from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000
23
+ """
24
+ for l1 in a:
25
+ check_counter = Counter(l1)
26
+ if not any(Counter(l2) == check_counter for l2 in b):
27
+ return False
28
+ return True
29
+
30
+
31
+ def test_get_label_props_in_dict():
32
+ """Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases."""
33
+ import skimage.measure as skim
34
+
35
+ test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type="xarray")
36
+ test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type="xarray")
37
+
38
+ # make sure it works for 3D data
39
+ labels_3D = skim.label(test_3D_data.values[0])
40
+
41
+ output_3D = tb_utils.get_label_props_in_dict(labels_3D)
42
+
43
+ # make sure it is a dict
44
+ assert type(output_3D) is dict
45
+ # make sure we get at least one output, there should be at least one label.
46
+ assert len(output_3D) > 0
47
+
48
+ # make sure it works for 2D data
49
+ labels_2D = skim.label(test_2D_data.values[0])
50
+
51
+ output_2D = tb_utils.get_label_props_in_dict(labels_2D)
52
+
53
+ # make sure it is a dict
54
+ assert type(output_2D) is dict
55
+ # make sure we get at least one output, there should be at least one label.
56
+ assert len(output_2D) > 0
57
+
58
+
59
+ def test_get_indices_of_labels_from_reg_prop_dict():
60
+ """Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases."""
61
+ import skimage.measure as skim
62
+ import numpy as np
63
+
64
+ test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type="xarray")
65
+ test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type="xarray")
66
+
67
+ # make sure it works for 3D data
68
+ labels_3D = skim.label(test_3D_data.values[0])
69
+ nx_3D = test_3D_data.values[0].shape[2]
70
+ ny_3D = test_3D_data.values[0].shape[1]
71
+ nz_3D = test_3D_data.values[0].shape[0]
72
+
73
+ labels_2D = skim.label(test_2D_data.values[0])
74
+ nx_2D = test_2D_data.values[0].shape[1]
75
+ ny_2D = test_2D_data.values[0].shape[0]
76
+
77
+ region_props_3D = tb_utils.get_label_props_in_dict(labels_3D)
78
+ region_props_2D = tb_utils.get_label_props_in_dict(labels_2D)
79
+
80
+ # get_indices_of_labels_from_reg_prop_dict
81
+
82
+ [
83
+ curr_loc_indices,
84
+ z_indices,
85
+ y_indices,
86
+ x_indices,
87
+ ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_3D)
88
+
89
+ for index_key in curr_loc_indices:
90
+ # there should be at least one value in each.
91
+ assert curr_loc_indices[index_key] > 0
92
+
93
+ assert np.all(z_indices[index_key] >= 0) and np.all(
94
+ z_indices[index_key] < nz_3D
95
+ )
96
+ assert np.all(x_indices[index_key] >= 0) and np.all(
97
+ x_indices[index_key] < nx_3D
98
+ )
99
+ assert np.all(y_indices[index_key] >= 0) and np.all(
100
+ y_indices[index_key] < ny_3D
101
+ )
102
+
103
+ [
104
+ curr_loc_indices,
105
+ y_indices,
106
+ x_indices,
107
+ ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_2D)
108
+
109
+ for index_key in curr_loc_indices:
110
+ # there should be at least one value in each.
111
+ assert curr_loc_indices[index_key] > 0
112
+
113
+ assert np.all(x_indices[index_key] >= 0) and np.all(
114
+ x_indices[index_key] < nx_2D
115
+ )
116
+ assert np.all(y_indices[index_key] >= 0) and np.all(
117
+ y_indices[index_key] < ny_2D
118
+ )
119
+
120
+ # Test error if empty dict is passed:
121
+ with pytest.raises(ValueError, match="No regions!"):
122
+ tb_utils.get_indices_of_labels_from_reg_prop_dict({})
123
+
124
+
125
+ @pytest.mark.parametrize(
126
+ "feature_loc, min_max_coords, lengths, expected_coord_interp",
127
+ [
128
+ ((0, 0), (0, 1, 0, 1), (2, 2), (0, 0)),
129
+ ((0, 0), (0, 1), (2,), (0,)),
130
+ ],
131
+ )
132
+ def test_add_coordinates_2D(
133
+ feature_loc, min_max_coords, lengths, expected_coord_interp
134
+ ):
135
+ """
136
+ Tests ```utils.add_coordinates``` for a 2D case with
137
+ both 1D and 2D coordinates
138
+ """
139
+ import xarray as xr
140
+ import numpy as np
141
+ import datetime
142
+
143
+ feat_interp = tbtest.generate_single_feature(
144
+ feature_loc[0], feature_loc[1], max_h1=9999, max_h2=9999
145
+ )
146
+ grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths)
147
+
148
+ ndims = len(lengths)
149
+ dim_names = ["time", "longitude", "latitude"]
150
+ dim_names = dim_names[:ndims]
151
+
152
+ # Note that this is arbitrary.
153
+ base_time = datetime.datetime(2022, 1, 1)
154
+
155
+ coord_dict = {"time": [base_time]}
156
+ if ndims == 1:
157
+ # force at least a 2D array for data
158
+ lengths = lengths * 2
159
+ dim_names = ["time", "longitude", "latitude"]
160
+ coord_dict["longitude"] = grid_coords
161
+ coord_dict["latitude"] = grid_coords
162
+
163
+ elif ndims == 2:
164
+ dim_names = ["time", "x", "y"]
165
+ coord_dict["longitude"] = (("x", "y"), grid_coords[0])
166
+ coord_dict["latitude"] = (("x", "y"), grid_coords[1])
167
+
168
+ data_xr = xr.DataArray(np.empty((1,) + lengths), coords=coord_dict, dims=dim_names)
169
+
170
+ feats_with_coords = tb_utils.add_coordinates(feat_interp, data_xr.to_iris())
171
+
172
+ print(feats_with_coords.iloc[0]["longitude"])
173
+ assert feats_with_coords.iloc[0]["longitude"] == expected_coord_interp[0]
174
+ if ndims == 2:
175
+ assert feats_with_coords.iloc[0]["latitude"] == expected_coord_interp[1]
176
+
177
+
178
+ @pytest.mark.parametrize(
179
+ "feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp",
180
+ [
181
+ ((0, 0, 0), None, (0, 1, 0, 1), (2, 2), (0, 0)),
182
+ ((0, 0, 0), (1, 1, 1), (0, 1, 0, 1), (2, 2), (0, 0)),
183
+ ((0.5, 0.5, 0.5), None, (0, 3, 3, 6), (2, 2), (1.5, 4.5)),
184
+ ((0, 0, 0), None, (0, 1), (2,), (0,)),
185
+ ((0, 0, 0), None, (0, 1, 0, 1, 0, 1), (2, 2, 2), (0, 0, 0)),
186
+ ],
187
+ )
188
+ def test_add_coordinates_3D(
189
+ feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp
190
+ ):
191
+ """
192
+ Tests ```utils.add_coordinates_3D``` for a 3D case with
193
+ 1D, 2D, and 3D coordinates
194
+ """
195
+ import xarray as xr
196
+ import numpy as np
197
+ import datetime
198
+ import pandas as pd
199
+
200
+ feat_interp = tbtest.generate_single_feature(
201
+ feature_loc[1], feature_loc[2], start_v=feature_loc[0], max_h1=9999, max_h2=9999
202
+ )
203
+ if delta_feat is not None:
204
+ feat_interp_2 = tbtest.generate_single_feature(
205
+ feature_loc[1] + delta_feat[1],
206
+ feature_loc[2] + delta_feat[2],
207
+ start_v=feature_loc[0] + delta_feat[0],
208
+ max_h1=9999,
209
+ max_h2=9999,
210
+ feature_num=2,
211
+ )
212
+ feat_interp = pd.concat([feat_interp, feat_interp_2], ignore_index=True)
213
+
214
+ grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths)
215
+
216
+ ndims = len(lengths)
217
+ dim_names = ["time", "longitude", "latitude"]
218
+ dim_names = dim_names[:ndims]
219
+
220
+ # Note that this is arbitrary.
221
+ base_time = datetime.datetime(2022, 1, 1)
222
+
223
+ coord_dict = {"time": [base_time]}
224
+ if ndims == 1:
225
+ # force at least a 3D array for data
226
+ lengths = lengths * 3
227
+ dim_names = ["time", "longitude", "latitude", "z"]
228
+ coord_dict["longitude"] = grid_coords
229
+ # we only test lon, so it doesn't really matter here what these are.
230
+ coord_dict["latitude"] = grid_coords
231
+ coord_dict["z"] = grid_coords
232
+
233
+ elif ndims == 2:
234
+ lengths = lengths + (lengths[0],)
235
+ dim_names = ["time", "x", "y", "z"]
236
+ coord_dict["longitude"] = (("x", "y"), grid_coords[0])
237
+ coord_dict["latitude"] = (("x", "y"), grid_coords[1])
238
+ # We only test lon and lat, so it doesn't matter what this is.
239
+ coord_dict["z"] = np.linspace(0, 1, lengths[0])
240
+
241
+ elif ndims == 3:
242
+ dim_names = ["time", "x", "y", "z"]
243
+ coord_dict["longitude"] = (("x", "y", "z"), grid_coords[0])
244
+ coord_dict["latitude"] = (("x", "y", "z"), grid_coords[1])
245
+ coord_dict["altitude"] = (("x", "y", "z"), grid_coords[2])
246
+
247
+ data_xr = xr.DataArray(np.empty((1,) + lengths), coords=coord_dict, dims=dim_names)
248
+
249
+ if ndims <= 2:
250
+ feats_with_coords = tb_utils.add_coordinates_3D(feat_interp, data_xr.to_iris())
251
+ else:
252
+ feats_with_coords = tb_utils.add_coordinates_3D(
253
+ feat_interp, data_xr.to_iris(), vertical_coord=2
254
+ )
255
+
256
+ assert np.isclose(feats_with_coords.iloc[0]["longitude"], expected_coord_interp[0])
257
+ if ndims >= 2:
258
+ assert np.isclose(
259
+ feats_with_coords.iloc[0]["latitude"], expected_coord_interp[1]
260
+ )
261
+
262
+ if ndims >= 3:
263
+ assert np.isclose(
264
+ feats_with_coords.iloc[0]["altitude"], expected_coord_interp[2]
265
+ )
266
+
267
+
268
+ @pytest.mark.parametrize(
269
+ "vertical_coord_names, vertical_coord_pass_in, expect_raise",
270
+ [
271
+ (["z"], None, False),
272
+ (["pudding"], None, True),
273
+ (["pudding"], "pudding", False),
274
+ (["z", "model_level_number"], "pudding", True),
275
+ (["z", "model_level_number"], None, True),
276
+ (["z", "model_level_number"], "z", False),
277
+ ],
278
+ )
279
+ def test_find_dataframe_vertical_coord(
280
+ vertical_coord_names, vertical_coord_pass_in, expect_raise
281
+ ):
282
+ """Tests ```tobac.utils.find_dataframe_vertical_coord```
283
+
284
+ Parameters
285
+ ----------
286
+ vertical_coord_names: array-like
287
+ Names of vertical coordinates to add
288
+ vertical_coord_pass_in: str
289
+ Value to pass into `vertical_coord`
290
+ expect_raise: bool
291
+ True if we expect a ValueError to be raised, False otherwise
292
+ """
293
+
294
+ test_feat = tbtest.generate_single_feature(0, 0, max_h1=100, max_h2=100)
295
+ for vertical_name in vertical_coord_names:
296
+ test_feat[vertical_name] = 0.0
297
+
298
+ if expect_raise:
299
+ with pytest.raises(ValueError):
300
+ internal_utils.find_dataframe_vertical_coord(
301
+ test_feat, vertical_coord=vertical_coord_pass_in
302
+ )
303
+ else:
304
+ assert (
305
+ internal_utils.find_dataframe_vertical_coord(
306
+ test_feat, vertical_coord=vertical_coord_pass_in
307
+ )
308
+ == vertical_coord_names[0]
309
+ )
310
+
311
+
312
+ def test_spectral_filtering():
313
+ """Testing tobac.utils.spectral_filtering with random test data that contains a wave signal."""
314
+
315
+ # set wavelengths for filtering and grid spacing
316
+ dxy = 4000
317
+ lambda_min = 400 * 1000
318
+ lambda_max = 1000 * 1000
319
+
320
+ # get wavelengths for domain
321
+ matrix = np.zeros((200, 100))
322
+ Ni = matrix.shape[-2]
323
+ Nj = matrix.shape[-1]
324
+ m, n = np.meshgrid(np.arange(Ni), np.arange(Nj), indexing="ij")
325
+ alpha = np.sqrt(m**2 / Ni**2 + n**2 / Nj**2)
326
+ # turn off warning for zero divide here, because it is not avoidable with normalized wavenumbers
327
+ with np.errstate(divide="ignore", invalid="ignore"):
328
+ lambda_mn = 2 * dxy / alpha
329
+
330
+ # seed wave signal that lies within wavelength range for filtering
331
+ signal_min = np.where(lambda_mn[0] < lambda_min)[0].min()
332
+ signal_idx = np.random.randint(signal_min, matrix.shape[-1])
333
+ matrix[0, signal_idx] = 1
334
+ wave_data = fft.idctn(matrix)
335
+
336
+ # use spectral filtering function on random wave data
337
+ transfer_function, filtered_data = tb_utils.general.spectral_filtering(
338
+ dxy, wave_data, lambda_min, lambda_max, return_transfer_function=True
339
+ )
340
+
341
+ # a few checks on the output
342
+ wavelengths = transfer_function[0]
343
+ # first element in wavelengths-space is inf because normalized wavelengths are 0 here
344
+ assert wavelengths[0, 0] == np.inf
345
+ # the first elements should correspond to twice the distance of the corresponding axis (in m)
346
+ # this is because the maximum spatial scale is half a wavelength through the domain
347
+ assert wavelengths[1, 0] == (dxy) * wave_data.shape[-2] * 2
348
+ assert wavelengths[0, 1] == (dxy) * wave_data.shape[-1] * 2
349
+
350
+ # check that filtered/ smoothed field exhibits smaller range of values
351
+ assert (filtered_data.max() - filtered_data.min()) < (
352
+ wave_data.max() - wave_data.min()
353
+ )
354
+ # because the randomly generated wave lies outside of range that is set for filtering,
355
+ # make sure that the filtering results in the disappearance of this signal
356
+ assert (
357
+ abs(
358
+ np.floor(np.log10(abs(filtered_data.mean())))
359
+ - np.floor(np.log10(abs(wave_data.mean())))
360
+ )
361
+ >= 1
362
+ )
363
+
364
+
365
+ def test_combine_tobac_feats():
366
+ """tests tobac.utils.combine_tobac_feats
367
+ Test by generating two single feature dataframes,
368
+ combining them with this function, and then
369
+ testing to see if a single dataframe
370
+ matches.
371
+ """
372
+
373
+ single_feat_1 = tb_test.generate_single_feature(
374
+ 1,
375
+ 1,
376
+ start_date=datetime.datetime(2022, 1, 1, 0, 0),
377
+ frame_start=0,
378
+ max_h1=100,
379
+ max_h2=100,
380
+ feature_size=5,
381
+ )
382
+ single_feat_2 = tb_test.generate_single_feature(
383
+ 2,
384
+ 2,
385
+ start_date=datetime.datetime(2022, 1, 1, 0, 5),
386
+ frame_start=0,
387
+ max_h1=100,
388
+ max_h2=100,
389
+ feature_size=5,
390
+ )
391
+
392
+ combined_feat = tb_utils.combine_feature_dataframes([single_feat_1, single_feat_2])
393
+
394
+ tot_feat = tb_test.generate_single_feature(
395
+ 1,
396
+ 1,
397
+ spd_h1=1,
398
+ spd_h2=1,
399
+ num_frames=2,
400
+ frame_start=0,
401
+ max_h1=100,
402
+ max_h2=100,
403
+ feature_size=5,
404
+ )
405
+
406
+ pd_test.assert_frame_equal(combined_feat, tot_feat)
407
+
408
+ # Now try preserving the old feature numbers.
409
+ combined_feat = tb_utils.combine_feature_dataframes(
410
+ [single_feat_1, single_feat_2], old_feature_column_name="old_feat_column"
411
+ )
412
+ assert np.all(list(combined_feat["old_feat_column"].values) == [1, 1])
413
+ assert np.all(list(combined_feat["feature"].values) == [1, 2])
414
+
415
+ # Test that a ValueError is raised if non-unique features are present
416
+ with pytest.raises(ValueError):
417
+ combined_feat = tb_utils.combine_feature_dataframes(
418
+ [single_feat_1, single_feat_2],
419
+ renumber_features=False,
420
+ old_feature_column_name="old_feat_column",
421
+ )
422
+
423
+ # Add a new feature with new feature number
424
+ single_feat_3 = tb_test.generate_single_feature(
425
+ 0,
426
+ 0,
427
+ start_date=datetime.datetime(2022, 1, 1, 0, 5),
428
+ frame_start=0,
429
+ max_h1=100,
430
+ max_h2=100,
431
+ feature_num=3,
432
+ feature_size=3,
433
+ )
434
+
435
+ # Test renumber_features=False
436
+ combined_feat = tb_utils.combine_feature_dataframes(
437
+ [single_feat_1, single_feat_3],
438
+ renumber_features=False,
439
+ old_feature_column_name="old_feat_column",
440
+ )
441
+ assert np.all(list(combined_feat["feature"].values) == [1, 3])
442
+
443
+ # Test sortby over one column
444
+ combined_feat = tb_utils.combine_feature_dataframes(
445
+ [single_feat_1, single_feat_3],
446
+ old_feature_column_name="old_feat_column",
447
+ sort_features_by="num",
448
+ )
449
+ assert np.all(list(combined_feat["feature"].values) == [1, 2])
450
+ assert np.all(list(combined_feat["old_feat_column"].values) == [3, 1])
451
+
452
+ # Test sortby over a list of columns
453
+ combined_feat = tb_utils.combine_feature_dataframes(
454
+ [single_feat_1, single_feat_3],
455
+ old_feature_column_name="old_feat_column",
456
+ sort_features_by=["hdim_1", "hdim_2"],
457
+ )
458
+ assert np.all(list(combined_feat["feature"].values) == [1, 2])
459
+ assert np.all(list(combined_feat["old_feat_column"].values) == [3, 1])
460
+
461
+
462
+ def test_transform_feature_points():
463
+ """Tests tobac.utils.general.transform_feature_points"""
464
+
465
+ # generate features
466
+ orig_feat_df_1 = tb_test.generate_single_feature(0, 95, max_h1=1000, max_h2=1000)
467
+ orig_feat_df_2 = tb_test.generate_single_feature(5, 105, max_h1=1000, max_h2=1000)
468
+
469
+ orig_feat_df = tb_utils.combine_tobac_feats([orig_feat_df_1, orig_feat_df_2])
470
+
471
+ # just make their lat/lons the same as the hdims.
472
+ orig_feat_df["latitude"] = orig_feat_df["hdim_1"]
473
+ orig_feat_df["longitude"] = orig_feat_df["hdim_2"]
474
+
475
+ # Make a test dataset with lats spanning from -25 to 24
476
+ # and lons spanning from 90 to 139.
477
+ test_lat = np.linspace(-25, 24, 50)
478
+ test_lon = np.linspace(90, 139, 50)
479
+ in_xr = xr.Dataset(
480
+ {"data": (("latitude", "longitude"), np.empty((50, 50)))},
481
+ coords={"latitude": test_lat, "longitude": test_lon},
482
+ )
483
+
484
+ new_feat_df = tb_utils.general.transform_feature_points(
485
+ orig_feat_df,
486
+ in_xr["data"].to_iris(),
487
+ max_time_away=datetime.timedelta(minutes=1),
488
+ max_space_away=20 * 1000,
489
+ )
490
+ # recall that these are the *array positions*
491
+ # so [25, 5] for "hdim_1" and "hdim_2" are lat 0, long 95.
492
+ assert np.all(new_feat_df["hdim_1"] == [25, 30])
493
+ assert np.all(new_feat_df["hdim_2"] == [5, 15])
494
+
495
+ # now test max space apart - we should drop the second feature,
496
+ # which is at 5, 105 lat/lon as the maximum latitude in the new dataset is 0.
497
+ # we set the max space away at 20km.
498
+ test_lat = np.linspace(-49, 0, 50)
499
+ in_xr = xr.Dataset(
500
+ {"data": (("latitude", "longitude"), np.empty((50, 50)))},
501
+ coords={"latitude": test_lat, "longitude": test_lon},
502
+ )
503
+
504
+ new_feat_df = tb_utils.general.transform_feature_points(
505
+ orig_feat_df,
506
+ in_xr["data"].to_iris(),
507
+ max_space_away=20000,
508
+ max_time_away=datetime.timedelta(minutes=1),
509
+ )
510
+
511
+ assert np.all(new_feat_df["hdim_1"] == [49])
512
+ assert np.all(new_feat_df["hdim_2"] == [5])
513
+
514
+ # now test max time apart
515
+ test_lat = np.linspace(-25, 24, 50)
516
+ in_xr = xr.Dataset(
517
+ {"data": (("time", "latitude", "longitude"), np.empty((2, 50, 50)))},
518
+ coords={
519
+ "latitude": test_lat,
520
+ "longitude": test_lon,
521
+ "time": [
522
+ datetime.datetime(2023, 1, 1, 0, 0),
523
+ datetime.datetime(2023, 1, 1, 0, 5),
524
+ ],
525
+ },
526
+ )
527
+
528
+ orig_feat_df["time"] = datetime.datetime(2023, 1, 1, 0, 0, 5)
529
+ new_feat_df = tb_utils.general.transform_feature_points(
530
+ orig_feat_df,
531
+ in_xr["data"].to_iris(),
532
+ max_time_away=datetime.timedelta(minutes=10),
533
+ max_space_away=20 * 1000,
534
+ )
535
+ # we should still have both features, but they should have the new time.
536
+ assert np.all(new_feat_df["hdim_1"] == [25, 30])
537
+ assert np.all(new_feat_df["hdim_2"] == [5, 15])
538
+ assert np.all(
539
+ new_feat_df["time"]
540
+ == [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 0, 0)]
541
+ )
542
+
543
+ # now make the features have time on the next day
544
+ # both should be dropped.
545
+ orig_feat_df["time"] = datetime.datetime(2023, 1, 2, 0, 0)
546
+ new_feat_df = tb_utils.general.transform_feature_points(
547
+ orig_feat_df,
548
+ in_xr["data"].to_iris(),
549
+ max_time_away=datetime.timedelta(minutes=1),
550
+ )
551
+
552
+ assert np.all(new_feat_df["hdim_1"] == [])
553
+ assert np.all(new_feat_df["hdim_2"] == [])
554
+
555
+
556
+ def test_transform_feature_points_3D():
557
+ """Tests tobac.utils.general.transform_feature_points for a 3D case"""
558
+
559
+ orig_feat_df_1 = tb_test.generate_single_feature(
560
+ 0, 95, 10, max_h1=1000, max_h2=1000
561
+ )
562
+ orig_feat_df_2 = tb_test.generate_single_feature(
563
+ 5, 105, 20, max_h1=1000, max_h2=1000
564
+ )
565
+
566
+ orig_feat_df = tb_utils.combine_feature_dataframes([orig_feat_df_1, orig_feat_df_2])
567
+
568
+ orig_feat_df["latitude"] = orig_feat_df["hdim_1"]
569
+ orig_feat_df["longitude"] = orig_feat_df["hdim_2"]
570
+ orig_feat_df["altitude"] = orig_feat_df["vdim"] * 1000
571
+
572
+ test_lat = np.linspace(-25, 24, 50)
573
+ test_lon = np.linspace(90, 139, 50)
574
+ test_alt = np.arange(0, 21, 2) * 1000
575
+ in_xr = xr.Dataset(
576
+ {"data": (("altitude", "latitude", "longitude"), np.empty((11, 50, 50)))},
577
+ coords={"latitude": test_lat, "longitude": test_lon, "altitude": test_alt},
578
+ )
579
+
580
+ new_feat_df = tb_utils.general.transform_feature_points(
581
+ orig_feat_df,
582
+ in_xr["data"].to_iris(),
583
+ max_time_away=datetime.timedelta(minutes=1),
584
+ max_space_away=20 * 1000,
585
+ max_vspace_away=200,
586
+ )
587
+
588
+ assert np.all(new_feat_df["hdim_1"] == [25, 30])
589
+ assert np.all(new_feat_df["hdim_2"] == [5, 15])
590
+ assert np.all(new_feat_df["vdim"] == [5, 10])
591
+
592
+
593
+ def test_get_spacings():
594
+ """Tests tobac.utils.get_spacings."""
595
+
596
+ x_values = np.linspace(100, 500, 5)
597
+ y_values = np.linspace(400, 200, 5)
598
+ t_values = np.array([0, 1, 2])
599
+
600
+ in_xr = xr.DataArray(
601
+ np.zeros((len(t_values), len(y_values), len(x_values))),
602
+ dims=["time", "y", "x"],
603
+ coords={
604
+ "x": (
605
+ "x",
606
+ x_values,
607
+ {"units": "meters", "standard_name": "projection_x_coordinate"},
608
+ ),
609
+ "y": (
610
+ "y",
611
+ y_values,
612
+ {"units": "meters", "standard_name": "projection_y_coordinate"},
613
+ ),
614
+ "time": (
615
+ "time",
616
+ t_values,
617
+ {"units": "hours since 1970-01-01 00:00:00", "standard_name": "time"},
618
+ ),
619
+ },
620
+ )
621
+
622
+ # Test with arithmetic average and different dx and dy
623
+ dxy, dt = tb_utils.get_spacings(in_xr)
624
+ assert dxy == (100 + 50) / 2
625
+ assert dt == 3600
626
+
627
+ # Test with geometric average and different dx and dy
628
+ dxy, _ = tb_utils.get_spacings(in_xr, average_method="geometric")
629
+ assert dxy == np.sqrt(100 * 50)
630
+
631
+ # Test with specified grid spacing and time spacing
632
+ dxy, dt = tb_utils.get_spacings(in_xr, grid_spacing=15, time_spacing=1800)
633
+ assert dxy == 15
634
+ assert dt == 1800
635
+
636
+ in_xr = xr.DataArray(
637
+ np.zeros((len(t_values), len(y_values), len(x_values))),
638
+ dims=["time", "y", "x"],
639
+ coords={
640
+ "time": (
641
+ "time",
642
+ t_values,
643
+ {"units": "hours since 1970-01-01 00:00:00", "standard_name": "time"},
644
+ )
645
+ },
646
+ )
647
+
648
+ # Test with missing data
649
+ with pytest.raises(ValueError):
650
+ tb_utils.get_spacings(in_xr)