ticoi 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ticoi might be problematic. Click here for more details.

@@ -0,0 +1,2204 @@
1
+ """
2
+ Class object to store and manipulate velocity observation data
3
+
4
+ Author : Laurane Charrier, Lei Guo, Nathan Lioret
5
+ Reference:
6
+ Charrier, L., Yan, Y., Koeniguer, E. C., Leinss, S., & Trouvé, E. (2021). Extraction of velocity time series with an optimal temporal sampling from displacement
7
+ observation networks. IEEE Transactions on Geoscience and Remote Sensing.
8
+ Charrier, L., Yan, Y., Colin Koeniguer, E., Mouginot, J., Millan, R., & Trouvé, E. (2022). Fusion of multi-temporal and multi-sensor ice velocity observations.
9
+ ISPRS annals of the photogrammetry, remote sensing and spatial information sciences, 3, 311-318.
10
+ """
11
+
12
+ import itertools
13
+ import os
14
+ import time
15
+ import warnings
16
+ from functools import reduce
17
+ from typing import Optional
18
+
19
+ import dask
20
+ import dask.array as da
21
+ import geopandas
22
+ import geopandas as gpd
23
+ import numpy as np
24
+ import pandas as pd
25
+ import rasterio as rio
26
+ import rasterio.enums
27
+ import rasterio.warp
28
+ import xarray as xr
29
+ from dask.array.lib.stride_tricks import sliding_window_view
30
+ from dask.diagnostics import ProgressBar
31
+ from joblib import Parallel, delayed
32
+ from pyproj import CRS, Proj, Transformer
33
+ from rasterio.features import rasterize
34
+ from tqdm import tqdm
35
+
36
+ from ticoi.filtering_functions import dask_filt_warpper, dask_smooth_wrapper
37
+ from ticoi.inversion_functions import construction_dates_range_np
38
+ from ticoi.mjd2date import mjd2date
39
+
40
+ # %% ======================================================================== #
41
+ # CUBE DATA CLASS #
42
+ # =========================================================================%% #
43
+
44
+
45
+ class CubeDataClass:
46
+ def __init__(self, cube=None, ds=None):
47
+ """
48
+ Initialisation of the main attributes, or copy cube's attributes and ds dataset if given.
49
+
50
+ :param cube: [cube_data_class] --- Cube to copy
51
+ :param ds: [xr dataset | None] --- New dataset. If None, copy cube's dataset
52
+ """
53
+
54
+ if not isinstance(cube, CubeDataClass):
55
+ self.filedir = ""
56
+ self.filename = ""
57
+ self.nx = 250
58
+ self.ny = 250
59
+ self.nz = 0
60
+ self.author = ""
61
+ self.source = ""
62
+ self.ds = xr.Dataset({})
63
+ self.resolution = 50
64
+ self.is_TICO = False
65
+
66
+ else: # load the cube information
67
+ self.filedir = cube.filedir
68
+ self.filename = cube.filename
69
+ self.nx = cube.nx
70
+ self.ny = cube.ny
71
+ self.nz = cube.nz
72
+ self.author = cube.author
73
+ self.source = cube.source
74
+ self.ds = cube.ds if ds is None else ds
75
+ self.resolution = cube.resolution
76
+ self.is_TICO = cube.is_TICO
77
+
78
+ def update_dimension(self, time_dim: str = "mid_date"):
79
+ """
80
+ Update the attributes corresponding to cube dimensions: nx, ny, and nz
81
+
82
+ :param time_dim: [str] [default is 'mid_date'] --- Name of the z dimension within the original dataset self.ds
83
+ """
84
+
85
+ self.nx = self.ds["x"].sizes["x"]
86
+ self.ny = self.ds["y"].sizes["y"]
87
+ self.nz = self.ds[time_dim].sizes[time_dim]
88
+ if len(self.ds["x"]) != 0 and len(self.ds["y"]) != 0:
89
+ self.resolution = self.ds["x"].values[1] - self.ds["x"].values[0]
90
+ else:
91
+ raise ValueError("Your cube is empty, please check the subset or buffer coordinates you provided")
92
+
93
+ def subset(self, proj: str, subset: list):
94
+ """
95
+ Crop the dataset according to 4 coordinates describing a rectangle.
96
+
97
+ :param proj: [str] --- EPSG system of the coordinates given in subset
98
+ :param subset: [list] --- A list of 4 float, these values are used to give a subset of the dataset : [xmin, xmax, ymax, ymin]
99
+ """
100
+
101
+ if CRS(self.ds.proj4) != CRS(proj):
102
+ transformer = Transformer.from_crs(
103
+ CRS(proj), CRS(self.ds.proj4)
104
+ ) # convert the coordinates from proj to self.ds.proj4
105
+ lon1, lat1 = transformer.transform(subset[2], subset[1])
106
+ lon2, lat2 = transformer.transform(subset[3], subset[1])
107
+ lon3, lat3 = transformer.transform(subset[2], subset[1])
108
+ lon4, lat4 = transformer.transform(subset[3], subset[0])
109
+ self.ds = self.ds.sel(
110
+ x=slice(np.min([lon1, lon2, lon3, lon4]), np.max([lon1, lon2, lon3, lon4])),
111
+ y=slice(np.max([lat1, lat2, lat3, lat4]), np.min([lat1, lat2, lat3, lat4])),
112
+ )
113
+ del lon1, lon2, lon3, lon4, lat1, lat2, lat3, lat4
114
+ else:
115
+ self.ds = self.ds.sel(
116
+ x=slice(np.min([subset[0], subset[1]]), np.max([subset[0], subset[1]])),
117
+ y=slice(np.max([subset[2], subset[3]]), np.min([subset[2], subset[3]])),
118
+ )
119
+
120
+ if len(self.ds["x"].values) == 0 and len(self.ds["y"].values) == 0:
121
+ print(f"[Data loading] The given subset is not part of cube {self.filename}")
122
+
123
+ def buffer(self, proj: str, buffer: list):
124
+ """
125
+ Crop the dataset around a given pixel, the amount of surroundings pixels kept is given by the buffer.
126
+
127
+ :param proj: [str] --- EPSG system of the coordinates given in subset
128
+ :param buffer: [list] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last is the buffer size
129
+ """
130
+
131
+ if CRS(self.ds.proj4) != CRS(proj): # Convert the coordinates from proj to self.ds.proj4
132
+ transformer = Transformer.from_crs(CRS(proj), CRS(self.ds.proj4))
133
+ i1, j1 = transformer.transform(buffer[1] + buffer[2], buffer[0] - buffer[2])
134
+ i2, j2 = transformer.transform(buffer[1] - buffer[2], buffer[0] + buffer[2])
135
+ i3, j3 = transformer.transform(buffer[1] + buffer[2], buffer[0] + buffer[2])
136
+ i4, j4 = transformer.transform(buffer[1] - buffer[2], buffer[0] - buffer[2])
137
+ self.ds = self.ds.sel(
138
+ x=slice(np.min([i1, i2, i3, i4]), np.max([i1, i2, i3, i4])),
139
+ y=slice(np.max([j1, j2, j3, j4]), np.min([j1, j2, j3, j4])),
140
+ )
141
+ del i1, i2, j1, j2, i3, i4, j3, j4
142
+ else:
143
+ i1, j1 = buffer[0] - buffer[2], buffer[1] + buffer[2]
144
+ i2, j2 = buffer[0] + buffer[2], buffer[1] - buffer[2]
145
+ self.ds = self.ds.sel(
146
+ x=slice(np.min([i1, i2]), np.max([i1, i2])), y=slice(np.max([j1, j2]), np.min([j1, j2]))
147
+ )
148
+ del i1, i2, j1, j2, buffer
149
+
150
+ if len(self.ds["x"].values) == 0 and len(self.ds["y"].values) == 0:
151
+ print(f"[Data loading] The given pixel and its surrounding buffer are not part of cube {self.filename}")
152
+
153
+ def determine_optimal_chunk_size(
154
+ self,
155
+ variable_name: str = "vx",
156
+ x_dim: str = "x",
157
+ y_dim: str = "y",
158
+ time_dim: str = "mid_date",
159
+ verbose: bool = False,
160
+ ) -> (int, int, int): # type: ignore
161
+ """
162
+ A function to determine the optimal chunk size for a given time series array based on its size.
163
+ This function is from gtsa DOI 10.5281/zenodo.8188085.
164
+
165
+ :param variable_name: [str] [default is 'vx'] --- Name of the variable containing the time series array
166
+ :param x_dim: [str] [default is 'x'] --- Name of the x dimension in the array
167
+ :param y_dim: [str] [default is 'y'] --- Name of the y dimension in the array
168
+ :param time_dim: [str] [default is 'mid_date'] --- Name of the z dimension within the original dataset self.ds
169
+ :param verbose: [bool] [default is False] --- Boolean flag to control verbosity of output
170
+
171
+ :return tc: [int] --- Chunk size along the time dimension
172
+ :return yc: [int] --- Chunk size along the y dimension
173
+ :return xc: [int] --- Chunk size along the x dimension
174
+ """
175
+
176
+ if verbose:
177
+ print("[Data loading] Dask chunk size:")
178
+
179
+ # set chunk size to 5 MB if single time series array < 1 MB in size, else increase to max of 1 GB chunk sizes.
180
+ time_series_array_size = (
181
+ self.ds[variable_name]
182
+ .sel(
183
+ {
184
+ x_dim: self.ds[variable_name][x_dim].values[0],
185
+ y_dim: self.ds[variable_name][y_dim].values[0],
186
+ }
187
+ )
188
+ .nbytes
189
+ )
190
+ mb = 1048576
191
+ if time_series_array_size < 1e6:
192
+ chunk_size_limit = 50 * mb
193
+ elif time_series_array_size < 1e7:
194
+ chunk_size_limit = 100 * mb
195
+ elif time_series_array_size < 1e8:
196
+ chunk_size_limit = 200 * mb
197
+ else:
198
+ chunk_size_limit = 1000 * mb
199
+
200
+ time_axis = self.ds[variable_name].dims.index(time_dim)
201
+ x_axis = self.ds[variable_name].dims.index(x_dim)
202
+ y_axis = self.ds[variable_name].dims.index(y_dim)
203
+ axis_sizes = {i: -1 if i == time_axis else "auto" for i in range(3)}
204
+ arr = self.ds[variable_name].data.rechunk(axis_sizes, block_size_limit=chunk_size_limit, balance=True)
205
+ tc, yc, xc = arr.chunks[time_axis][0], arr.chunks[y_axis][0], arr.chunks[x_axis][0]
206
+ chunksize = self.ds[variable_name][:tc, :yc, :xc].nbytes / 1e6
207
+ if verbose:
208
+ print("[Data loading] Chunk shape:", "(" + ",".join([str(x) for x in [tc, yc, xc]]) + ")")
209
+ print(
210
+ "[Data loading] Chunk size:",
211
+ self.ds[variable_name][:tc, :yc, :xc].nbytes,
212
+ "(" + str(round(chunksize, 1)) + "MB)",
213
+ )
214
+ return tc, yc, xc
215
+
216
+ # %% ==================================================================== #
217
+ # CUBE LOADING METHODS #
218
+ # =====================================================================%% #
219
+
220
+ def load_itslive(
221
+ self,
222
+ filepath: str,
223
+ conf: bool = False,
224
+ subset: list | None = None,
225
+ buffer: list | None = None,
226
+ pick_date: list | None = None,
227
+ pick_sensor: list | None = None,
228
+ pick_temp_bas: list | None = None,
229
+ proj: str = "EPSG:4326",
230
+ verbose: bool = False,
231
+ ):
232
+ """
233
+ Load a cube dataset written by ITS_LIVE.
234
+
235
+ :param filepath: [str] --- Filepath of the dataset
236
+ :param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
237
+ :param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
238
+ :param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
239
+ :param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
240
+ :param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
241
+ :param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
242
+ :param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
243
+ :param verbose: [bool] [default is False] --- Print information throughout the process
244
+ """
245
+
246
+ if verbose:
247
+ print(f"[Data loading] Path to cube file : {filepath}")
248
+
249
+ self.filedir = os.path.dirname(filepath) # Path were is stored the netcdf file
250
+ self.filename = os.path.basename(filepath) # Name of the netcdf file
251
+ self.ds = self.ds.assign_attrs({"proj4": self.ds["mapping"].proj4text})
252
+ self.author = self.ds.author.split(", a NASA")[0]
253
+ self.source = self.ds.url
254
+
255
+ if subset is not None: # Crop according to 4 coordinates
256
+ self.subset(proj, subset)
257
+ elif buffer is not None: # Crop the dataset around a given pixel, according to a given buffer
258
+ self.buffer(proj, buffer)
259
+ if pick_date is not None:
260
+ self.ds = self.ds.where(
261
+ (
262
+ (self.ds["acquisition_date_img1"] >= np.datetime64(pick_date[0]))
263
+ & (self.ds["acquisition_date_img2"] <= np.datetime64(pick_date[1]))
264
+ ).compute(),
265
+ drop=True,
266
+ )
267
+
268
+ self.update_dimension() # Update self.nx,self.ny,self.nz
269
+
270
+ if conf:
271
+ minconfx = np.nanmin(self.ds["vx_error"].values[:])
272
+ maxconfx = np.nanmax(self.ds["vx_error"].values[:])
273
+ minconfy = np.nanmin(self.ds["vy_error"].values[:])
274
+ maxconfy = np.nanmax(self.ds["vy_error"].values[:])
275
+
276
+ date1 = np.array([np.datetime64(date_str, "D") for date_str in self.ds["acquisition_date_img1"].values])
277
+ date2 = np.array([np.datetime64(date_str, "D") for date_str in self.ds["acquisition_date_img2"].values])
278
+
279
+ # np.char.strip is used to remove the null character ('�') from each element and np.core.defchararray.add to
280
+ # concatenate array of different types
281
+ try:
282
+ sensor = np._core.defchararray.add(
283
+ np.char.strip(self.ds["mission_img1"].values.astype(str), "�"),
284
+ np.char.strip(self.ds["satellite_img1"].values.astype(str), "�"),
285
+ ).astype("U10")
286
+ except AttributeError: # in old numpy version module 'numpy._core' has no attribute 'defchararray'
287
+ sensor = np.core.defchararray.add(
288
+ np.char.strip(self.ds["mission_img1"].values.astype(str), "�"),
289
+ np.char.strip(self.ds["satellite_img1"].values.astype(str), "�"),
290
+ ).astype("U10")
291
+ sensor[sensor == "L7"] = "Landsat-7"
292
+ sensor[sensor == "L8"] = "Landsat-8"
293
+ sensor[sensor == "L9"] = "Landsat-9"
294
+ sensor[np.isin(sensor, ["S1A", "S1B"])] = "Sentinel-1"
295
+ sensor[np.isin(sensor, ["S2A", "S2B"])] = "Sentinel-2"
296
+
297
+ if conf: # Normalize the error between 0 and 1, and convert error in confidence
298
+ errorx = 1 - (self.ds["vx_error"].values - minconfx) / (maxconfx - minconfx)
299
+ errory = 1 - (self.ds["vy_error"].values - minconfy) / (maxconfy - minconfy)
300
+ else:
301
+ errorx = self.ds["vx_error"].values
302
+ errory = self.ds["vy_error"].values
303
+
304
+ # Drop variables not in the specified list
305
+ variables_to_keep = ["vx", "vy", "mid_date", "x", "y"]
306
+ self.ds = self.ds.drop_vars([var for var in self.ds.variables if var not in variables_to_keep])
307
+ # Drop attributes not in the specified list
308
+ attributes_to_keep = ["date_created", "mapping", "author", "proj4"]
309
+ self.ds.attrs = {attr: self.ds.attrs[attr] for attr in attributes_to_keep if attr in self.ds.attrs}
310
+
311
+ # self.ds = self.ds.unify_chunks() # to avoid error ValueError: Object has inconsistent chunks along
312
+ # dimension mid_date. This can be fixed by calling unify_chunks(). Create new variable and chunk them
313
+ self.ds["sensor"] = xr.DataArray(sensor, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
314
+ self.ds = self.ds.unify_chunks()
315
+ self.ds["date1"] = xr.DataArray(date1, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
316
+ self.ds = self.ds.unify_chunks()
317
+ self.ds["date2"] = xr.DataArray(date2, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
318
+ self.ds = self.ds.unify_chunks()
319
+ self.ds["source"] = xr.DataArray(["ITS_LIVE"] * self.nz, dims="mid_date").chunk(
320
+ {"mid_date": self.ds.chunks["mid_date"]}
321
+ )
322
+ self.ds = self.ds.unify_chunks()
323
+ self.ds["errorx"] = xr.DataArray(errorx, dims=["mid_date"], coords={"mid_date": self.ds.mid_date}).chunk(
324
+ {"mid_date": self.ds.chunks["mid_date"]}
325
+ )
326
+ self.ds = self.ds.unify_chunks()
327
+ self.ds["errory"] = xr.DataArray(errory, dims=["mid_date"], coords={"mid_date": self.ds.mid_date}).chunk(
328
+ {"mid_date": self.ds.chunks["mid_date"]}
329
+ )
330
+
331
+ if pick_sensor is not None:
332
+ self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
333
+ if pick_temp_bas is not None:
334
+ temp = (self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")
335
+ self.ds = self.ds.where(((pick_temp_bas[0] < temp) & (temp < pick_temp_bas[1])).compute(), drop=True)
336
+ del temp
337
+ self.ds = self.ds.unify_chunks()
338
+
339
+ def load_millan(
340
+ self,
341
+ filepath: str,
342
+ conf: bool = False,
343
+ subset: list | None = None,
344
+ buffer: list | None = None,
345
+ pick_date: list | None = None,
346
+ pick_sensor: list | None = None,
347
+ pick_temp_bas: list | None = None,
348
+ proj: str = "EPSG:4326",
349
+ verbose: bool = False,
350
+ ):
351
+ """
352
+ Load a cube dataset written by R. Millan et al.
353
+
354
+ :param filepath: [str] --- Filepath of the dataset
355
+ :param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
356
+ :param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
357
+ :param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
358
+ :param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
359
+ :param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
360
+ :param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
361
+ :param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
362
+ :param verbose: [bool] [default is False] --- Print information throughout the process
363
+ """
364
+
365
+ if verbose:
366
+ print(f"[Data loading] Path to cube file : {filepath}")
367
+
368
+ self.filedir = os.path.dirname(filepath)
369
+ self.filename = os.path.basename(filepath) # name of the netcdf file
370
+ self.author = "IGE" # name of the author
371
+ self.source = self.ds.source
372
+ del filepath
373
+
374
+ # self.split_cube(n_split=2, dim=['x', 'y'], savepath=f"{self.filedir}/{self.filename[:-3]}_")
375
+
376
+ if subset is not None: # Crop according to 4 coordinates
377
+ self.subset(proj, subset)
378
+ elif buffer is not None: # Crop the dataset around a given pixel, according to a given buffer
379
+ self.buffer(proj, buffer)
380
+
381
+ # Uniformization of the name and format of the time coordinate
382
+ self.ds = self.ds.rename({"z": "mid_date"})
383
+
384
+ date1 = [mjd2date(date_str) for date_str in self.ds["date1"].values] # conversion in date
385
+ date2 = [mjd2date(date_str) for date_str in self.ds["date2"].values]
386
+ self.ds = self.ds.unify_chunks()
387
+ self.ds["date1"] = xr.DataArray(np.array(date1).astype("datetime64[ns]"), dims="mid_date").chunk(
388
+ {"mid_date": self.ds.chunks["mid_date"]}
389
+ )
390
+ self.ds = self.ds.unify_chunks()
391
+ self.ds["date2"] = xr.DataArray(np.array(date2).astype("datetime64[ns]"), dims="mid_date").chunk(
392
+ {"mid_date": self.ds.chunks["mid_date"]}
393
+ )
394
+ self.ds = self.ds.unify_chunks()
395
+ del date1, date2
396
+
397
+ # Temporal subset between two dates
398
+ if pick_date is not None:
399
+ self.ds = self.ds.where(
400
+ (
401
+ (self.ds["date1"] >= np.datetime64(pick_date[0]))
402
+ & (self.ds["date2"] <= np.datetime64(pick_date[1]))
403
+ ).compute(),
404
+ drop=True,
405
+ )
406
+ del pick_date
407
+
408
+ self.ds = self.ds.assign_coords(
409
+ mid_date=np.array(self.ds["date1"] + (self.ds["date2"] - self.ds["date1"]) // 2)
410
+ )
411
+ self.update_dimension()
412
+
413
+ if conf and "confx" not in self.ds.data_vars: # convert the errors into confidence indicators between 0 and 1
414
+ minconfx = np.nanmin(self.ds["error_vx"].values[:])
415
+ maxconfx = np.nanmax(self.ds["error_vx"].values[:])
416
+ minconfy = np.nanmin(self.ds["error_vy"].values[:])
417
+ maxconfy = np.nanmax(self.ds["error_vy"].values[:])
418
+ errorx = 1 - (self.ds["error_vx"].values - minconfx) / (maxconfx - minconfx)
419
+ errory = 1 - (self.ds["error_vy"].values - minconfy) / (maxconfy - minconfy)
420
+ else:
421
+ errorx = self.ds["error_vx"].values[:]
422
+ errory = self.ds["error_vy"].values[:]
423
+
424
+ # Homogenize sensors names
425
+ sensor = np.char.strip(
426
+ self.ds["sensor"].values.astype(str), "�"
427
+ ) # np.char.strip is used to remove the null character ('�') from each element
428
+ sensor[np.isin(sensor, ["S1"])] = "Sentinel-1"
429
+ sensor[np.isin(sensor, ["S2"])] = "Sentinel-2"
430
+ sensor[np.isin(sensor, ["landsat-8", "L8", "L8. "])] = "Landsat-8"
431
+
432
+ # Drop variables not in the specified list
433
+ self.ds = self.ds.drop_vars(
434
+ [var for var in self.ds.variables if var not in ["vx", "vy", "mid_date", "x", "y", "date1", "date2"]]
435
+ )
436
+ self.ds = self.ds.transpose("mid_date", "y", "x")
437
+
438
+ # Store the variable in xarray dataset
439
+ self.ds["sensor"] = xr.DataArray(sensor, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
440
+ del sensor
441
+ self.ds = self.ds.unify_chunks()
442
+ self.ds["source"] = xr.DataArray(["IGE"] * self.nz, dims="mid_date").chunk(
443
+ {"mid_date": self.ds.chunks["mid_date"]}
444
+ )
445
+ self.ds = self.ds.unify_chunks()
446
+ self.ds["errorx"] = xr.DataArray(
447
+ np.tile(errorx[:, np.newaxis, np.newaxis], (1, self.ny, self.nx)),
448
+ dims=["mid_date", "y", "x"],
449
+ coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
450
+ ).chunk(chunks=self.ds.chunks)
451
+ self.ds = self.ds.unify_chunks()
452
+ self.ds["errory"] = xr.DataArray(
453
+ np.tile(errory[:, np.newaxis, np.newaxis], (1, self.ny, self.nx)),
454
+ dims=["mid_date", "y", "x"],
455
+ coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
456
+ ).chunk(chunks=self.ds.chunks)
457
+ del errorx, errory
458
+
459
+ # Pick sensors or temporal baselines
460
+ if pick_sensor is not None:
461
+ self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
462
+ if pick_temp_bas is not None:
463
+ self.ds = self.ds.sel(
464
+ mid_date=(pick_temp_bas[0] < ((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")))
465
+ & (((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")) < pick_temp_bas[1])
466
+ )
467
+ self.ds = self.ds.unify_chunks()
468
+
469
+ def load_ducasse(
470
+ self,
471
+ filepath: str,
472
+ conf: bool = False,
473
+ subset: list | None = None,
474
+ buffer: list | None = None,
475
+ pick_date: list | None = None,
476
+ pick_sensor: list | None = None,
477
+ pick_temp_bas: list | None = None,
478
+ proj: str = "EPSG:4326",
479
+ verbose: bool = False,
480
+ ):
481
+ """
482
+ Load a cube dataset written by E. Ducasse et al. (Pleiades data)
483
+
484
+ :param filepath: [str] --- Filepath of the dataset
485
+ :param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
486
+ :param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
487
+ :param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
488
+ :param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
489
+ :param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
490
+ :param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
491
+ :param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
492
+ :param verbose: [bool] [default is False] --- Print information throughout the process
493
+ """
494
+
495
+ if verbose:
496
+ print(f"[Data loading] Path to cube file : {filepath}")
497
+
498
+ self.ds = self.ds.chunk({"x": 125, "y": 125, "time": 2000}) # set chunk
499
+ self.filedir = os.path.dirname(filepath)
500
+ self.filename = os.path.basename(filepath) # name of the netcdf file
501
+ self.author = "IGE" # name of the author
502
+ del filepath
503
+
504
+ # Spatial subset
505
+ if subset is not None: # crop according to 4 coordinates
506
+ self.subset(proj, subset)
507
+ elif buffer is not None: # crop the dataset around a given pixel, according to a given buffer
508
+ self.buffer(proj, buffer)
509
+
510
+ # Uniformization of the name and format of the time coordinate
511
+ self.ds = self.ds.rename({"time": "mid_date"})
512
+
513
+ date1 = [date_str.split(" ")[0] for date_str in self.ds["mid_date"].values]
514
+ date2 = [date_str.split(" ")[1] for date_str in self.ds["mid_date"].values]
515
+ self.ds["date1"] = xr.DataArray(np.array(date1).astype("datetime64[ns]"), dims="mid_date").chunk(
516
+ {"mid_date": self.ds.chunks["mid_date"]}
517
+ )
518
+ self.ds["date2"] = xr.DataArray(np.array(date2).astype("datetime64[ns]"), dims="mid_date").chunk(
519
+ {"mid_date": self.ds.chunks["mid_date"]}
520
+ )
521
+ del date1, date2
522
+
523
+ # Temporal subset between two dates
524
+ if pick_date is not None:
525
+ self.ds = self.ds.where(
526
+ (
527
+ (self.ds["date1"] >= np.datetime64(pick_date[0]))
528
+ & (self.ds["date2"] <= np.datetime64(pick_date[1]))
529
+ ).compute(),
530
+ drop=True,
531
+ )
532
+ del pick_date
533
+
534
+ self.ds = self.ds.assign_coords(
535
+ mid_date=np.array(self.ds["date1"] + (self.ds["date2"] - self.ds["date1"]) // 2)
536
+ )
537
+ self.update_dimension() # update self.nx, self.ny and self.nz
538
+
539
+ # Drop variables not in the specified list
540
+ variables_to_keep = ["vx", "vy", "mid_date", "x", "y", "date1", "date2"]
541
+ self.ds = self.ds.drop_vars([var for var in self.ds.variables if var not in variables_to_keep])
542
+ self.ds = self.ds.transpose("mid_date", "y", "x")
543
+
544
+ # Store the variable in xarray dataset
545
+ self.ds["sensor"] = xr.DataArray(["Pleiades"] * self.nz, dims="mid_date").chunk(
546
+ {"mid_date": self.ds.chunks["mid_date"]}
547
+ )
548
+ self.ds["source"] = xr.DataArray(["IGE"] * self.nz, dims="mid_date").chunk(
549
+ {"mid_date": self.ds.chunks["mid_date"]}
550
+ )
551
+ self.ds["vy"] = -self.ds["vy"]
552
+
553
+ # Pick sensors or temporal baselines
554
+ if pick_sensor is not None:
555
+ self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
556
+ if pick_temp_bas is not None:
557
+ self.ds = self.ds.sel(
558
+ mid_date=(pick_temp_bas[0] < ((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")))
559
+ & (((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")) < pick_temp_bas[1])
560
+ )
561
+
562
+ # Set errors equal to one (no information on the error here)
563
+ self.ds["errorx"] = xr.DataArray(np.ones(self.ds["mid_date"].size), dims="mid_date").chunk(
564
+ {"mid_date": self.ds.chunks["mid_date"]}
565
+ )
566
+ self.ds["errory"] = xr.DataArray(np.ones(self.ds["mid_date"].size), dims="mid_date").chunk(
567
+ {"mid_date": self.ds.chunks["mid_date"]}
568
+ )
569
+
570
+ def load_charrier(
571
+ self,
572
+ filepath: str,
573
+ conf: bool = False,
574
+ subset: list | None = None,
575
+ buffer: list | None = None,
576
+ pick_date: list | None = None,
577
+ pick_sensor: list | None = None,
578
+ pick_temp_bas: list | None = None,
579
+ proj: str = "EPSG:4326",
580
+ verbose: bool = False,
581
+ ):
582
+ """
583
+ Load a cube dataset written by L.Charrier et al.
584
+
585
+ :param filepath: [str] --- Filepath of the dataset
586
+ :param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
587
+ :param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
588
+ :param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
589
+ :param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
590
+ :param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
591
+ :param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
592
+ :param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
593
+ :param verbose: [bool] [default is False] --- Print information throughout the process
594
+ """
595
+
596
+ if verbose:
597
+ print(f"[Data loading] Path to cube file {'(TICO cube)' if self.is_TICO else ''} : {filepath}")
598
+
599
+ # information about the cube
600
+ self.filedir = os.path.dirname(filepath)
601
+ self.filename = os.path.basename(filepath) # Name of the netcdf file
602
+ if self.ds.author == "J. Mouginot, R.Millan, A.Derkacheva_aligned":
603
+ self.author = "IGE" # Name of the author
604
+ else:
605
+ self.author = self.ds.author
606
+ self.source = self.ds.source
607
+ del filepath
608
+
609
+ # Select specific data within the cube
610
+ if subset is not None: # Crop according to 4 coordinates
611
+ self.subset(proj, subset)
612
+ elif buffer is not None: # Crop the dataset around a given pixel, according to a given buffer
613
+ self.buffer(proj, buffer)
614
+
615
+ time_dim = "mid_date" if not self.is_TICO else "second_date" # 'date2' if we load TICO data
616
+ self.update_dimension(time_dim)
617
+
618
+ # Temporal subset between two dates
619
+ if pick_date is not None:
620
+ if not self.is_TICO:
621
+ self.ds = self.ds.where(
622
+ (
623
+ (self.ds["date1"] >= np.datetime64(pick_date[0]))
624
+ & (self.ds["date2"] <= np.datetime64(pick_date[1]))
625
+ ).compute(),
626
+ drop=True,
627
+ )
628
+ else:
629
+ self.ds = self.ds.where(
630
+ (
631
+ (self.ds["second_date"] >= np.datetime64(pick_date[0]))
632
+ & (self.ds["second_date"] <= np.datetime64(pick_date[1]))
633
+ ).compute(),
634
+ drop=True,
635
+ )
636
+ del pick_date
637
+
638
+ self.update_dimension(time_dim)
639
+
640
+ # Pick sensors or temporal baselines
641
+ if pick_sensor is not None:
642
+ if not self.is_TICO:
643
+ self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
644
+ else:
645
+ self.ds = self.ds.sel(second_date=self.ds["sensor"].isin(pick_sensor))
646
+
647
+ # Following properties are not available for TICO cubes
648
+ if not self.is_TICO:
649
+ # Pick specific temporal baselines
650
+ if pick_temp_bas is not None:
651
+ self.ds = self.ds.sel(
652
+ mid_date=(pick_temp_bas[0] < ((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")))
653
+ & (((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")) < pick_temp_bas[1])
654
+ )
655
+
656
+ # Convert the errors into confidence indicators between 0 and 1
657
+ if conf and "confx" not in self.ds.data_vars:
658
+ minconfx = np.nanmin(self.ds["errorx"].values[:])
659
+ maxconfx = np.nanmax(self.ds["errorx"].values[:])
660
+ minconfy = np.nanmin(self.ds["errory"].values[:])
661
+ maxconfy = np.nanmax(self.ds["errory"].values[:])
662
+ errorx = 1 - (self.ds["errorx"].values - minconfx) / (maxconfx - minconfx)
663
+ errory = 1 - (self.ds["errory"].values - minconfy) / (maxconfy - minconfy)
664
+ self.ds["errorx"] = xr.DataArray(
665
+ errorx,
666
+ dims=["mid_date", "y", "x"],
667
+ coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
668
+ ).chunk(chunks=self.ds.chunks)
669
+ self.ds["errory"] = xr.DataArray(
670
+ errory,
671
+ dims=["mid_date", "y", "x"],
672
+ coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
673
+ ).chunk(chunks=self.ds.chunks)
674
+
675
+ # For cube written with write_result_TICOI
676
+ if "source" not in self.ds.variables:
677
+ self.ds["source"] = xr.DataArray([self.ds.author] * self.nz, dims="mid_date").chunk(
678
+ {"mid_date": self.ds.chunks["mid_date"]}
679
+ )
680
+ if "sensor" not in self.ds.variables:
681
+ self.ds["sensor"] = xr.DataArray([self.ds.sensor] * self.nz, dims="mid_date").chunk(
682
+ {"mid_date": self.ds.chunks["mid_date"]}
683
+ )
684
+
685
+ def load(
686
+ self,
687
+ filepath: list | str,
688
+ chunks: dict | str | int = {},
689
+ conf: bool = False,
690
+ subset: str | None = None,
691
+ buffer: str | None = None,
692
+ pick_date: str | None = None,
693
+ pick_sensor: str | None = None,
694
+ pick_temp_bas: str | None = None,
695
+ proj: str = "EPSG:4326",
696
+ mask: str | xr.DataArray = None,
697
+ reproj_coord: bool = False,
698
+ reproj_vel: bool = False,
699
+ verbose: bool = False,
700
+ ):
701
+ """
702
+ Load a cube dataset from a file in format netcdf (.nc) or zarr. The data are directly stored within the present object.
703
+
704
+ :param filepath: [list | str] --- Filepath of the dataset, if list of filepaths, load all the cubes and merge them
705
+ :param chunks: [dict] --- Dictionary with the size of chunks for each dimension, if chunks=-1 loads the dataset with dask using a single chunk for all arrays.
706
+ chunks={} loads the dataset with dask using engine preferred chunks if exposed by the backend, otherwise with a single chunk for all arrays,
707
+ chunks='auto' will use dask auto chunking taking into account the engine preferred chunks.
708
+ :param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
709
+ :param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
710
+ :param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
711
+ :param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
712
+ :param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
713
+ :param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
714
+ :param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
715
+ :param mask: [str | xr dataarray | None] [default is None] --- Mask some of the data of the cube, either a dataarray with 0 and 1, or a path to a dataarray or an .shp file
716
+ :param reproj_coord: [bool] [default is False] --- If True reproject the second cube of the list filepath to the grid coordinates of the first cube
717
+ :param reproj_vel: [bool] [default is False] --- If True reproject the velocity components, to match the coordinate system of the first cube
718
+
719
+ :param verbose: [bool] [default is False] --- Print information throughout the process
720
+ """
721
+ self.__init__()
722
+
723
+ assert isinstance(filepath, list) or isinstance(filepath, str), (
724
+ f"The filepath must be a string (path to the cube file) or a list of strings, not {type(filepath)}."
725
+ )
726
+
727
+ time_dim_name = {
728
+ "ITS_LIVE, a NASA MEaSUREs project (its-live.jpl.nasa.gov)": "mid_date",
729
+ "J. Mouginot, R.Millan, A.Derkacheva": "z",
730
+ "J. Mouginot, R.Millan, A.Derkacheva_aligned": "mid_date",
731
+ "L. Charrier, L. Guo": "mid_date",
732
+ "L. Charrier": "mid_date",
733
+ "E. Ducasse": "time",
734
+ "S. Leinss, L. Charrier": "mid_date",
735
+ "IGE": "mid_date",
736
+ } # dictionary to set the name of time_dimension for a given author
737
+
738
+ if isinstance(filepath, list): # Merge several cubes
739
+ self.load(
740
+ filepath[0],
741
+ chunks=chunks,
742
+ conf=conf,
743
+ subset=subset,
744
+ buffer=buffer,
745
+ pick_date=pick_date,
746
+ pick_sensor=pick_sensor,
747
+ pick_temp_bas=pick_temp_bas,
748
+ proj=proj,
749
+ mask=mask,
750
+ verbose=verbose,
751
+ )
752
+
753
+ for n in range(1, len(filepath)):
754
+ cube2 = CubeDataClass()
755
+ sub = [
756
+ self.ds["x"].min().values,
757
+ self.ds["x"].max().values,
758
+ self.ds["y"].min().values,
759
+ self.ds["y"].max().values,
760
+ ]
761
+ cube2.load(
762
+ filepath[n],
763
+ chunks=chunks,
764
+ conf=conf,
765
+ subset=sub,
766
+ pick_date=pick_date,
767
+ pick_sensor=pick_sensor,
768
+ pick_temp_bas=pick_temp_bas,
769
+ proj=self.ds.proj4,
770
+ mask=mask,
771
+ verbose=verbose,
772
+ )
773
+ # Align the new cube to the main one (interpolate the coordinate and/or reproject it)
774
+ if reproj_vel or reproj_coord:
775
+ cube2 = self.align_cube(
776
+ cube2, reproj_vel=reproj_vel, reproj_coord=reproj_coord, interp_method="nearest"
777
+ )
778
+ self.merge_cube(cube2) # Merge the new cube to the main one
779
+ del cube2
780
+ if chunks == {}: # Rechunk with optimal chunk size
781
+ var_name = "vx" if not self.is_TICO else "dx"
782
+ time_dim = time_dim_name[self.ds.author] if not self.is_TICO else "second_date"
783
+ tc, yc, xc = self.determine_optimal_chunk_size(
784
+ variable_name=var_name, x_dim="x", y_dim="y", time_dim=time_dim, verbose=verbose
785
+ )
786
+ self.ds = self.ds.chunk({time_dim: tc, "x": xc, "y": yc})
787
+
788
+ else: # Load one cube
789
+ with dask.config.set(**{"array.slicing.split_large_chunks": False}): # To avoid creating the large chunks
790
+ if filepath.split(".")[-1] == "nc":
791
+ try:
792
+ self.ds = xr.open_dataset(filepath, engine="h5netcdf", chunks=chunks)
793
+ except NotImplementedError: # Can not use auto rechunking with object dtype. We are unable to estimate the size in bytes of object data
794
+ chunks = {}
795
+ self.ds = xr.open_dataset(filepath, engine="h5netcdf", chunks=chunks) # Set no chunks
796
+
797
+ if "Author" in self.ds.attrs: # Uniformization of the attribute Author to author
798
+ self.ds.attrs["author"] = self.ds.attrs.pop("Author")
799
+
800
+ self.is_TICO = False if time_dim_name[self.ds.author] in self.ds.dims else True
801
+ time_dim = time_dim_name[self.ds.author] if not self.is_TICO else "second_date"
802
+ var_name = "vx" if not self.is_TICO else "dx"
803
+
804
+ if chunks == {}: # Rechunk with optimal chunk size
805
+ tc, yc, xc = self.determine_optimal_chunk_size(
806
+ variable_name=var_name, x_dim="x", y_dim="y", time_dim=time_dim, verbose=verbose
807
+ )
808
+ self.ds = self.ds.chunk({time_dim: tc, "x": xc, "y": yc})
809
+
810
+ elif filepath.split(".")[-1] == "zarr": # the is not rechunked
811
+ if chunks == {}:
812
+ chunks = "auto" # Change the default value to auto
813
+ self.ds = xr.open_dataset(
814
+ filepath, decode_timedelta=False, engine="zarr", consolidated=True, chunks=chunks
815
+ )
816
+ self.is_TICO = False
817
+ var_name = "vx"
818
+
819
+ if verbose:
820
+ print("[Data loading] File open")
821
+
822
+ dico_load = {
823
+ "ITS_LIVE, a NASA MEaSUREs project (its-live.jpl.nasa.gov)": self.load_itslive,
824
+ "J. Mouginot, R.Millan, A.Derkacheva": self.load_millan,
825
+ "J. Mouginot, R.Millan, A.Derkacheva_aligned": self.load_charrier,
826
+ "L. Charrier, L. Guo": self.load_charrier,
827
+ "L. Charrier": self.load_charrier,
828
+ "E. Ducasse": self.load_ducasse,
829
+ "S. Leinss, L. Charrier": self.load_charrier,
830
+ }
831
+ dico_load[self.ds.author](
832
+ filepath,
833
+ pick_date=pick_date,
834
+ subset=subset,
835
+ conf=conf,
836
+ pick_sensor=pick_sensor,
837
+ pick_temp_bas=pick_temp_bas,
838
+ buffer=buffer,
839
+ proj=proj,
840
+ )
841
+
842
+ time_dim = "mid_date" if not self.is_TICO else "second_date"
843
+
844
+ # Rechunk again if the size of the cube is changed:
845
+ if any(x is not None for x in [pick_date, subset, buffer, pick_sensor, pick_temp_bas]):
846
+ tc, yc, xc = self.determine_optimal_chunk_size(
847
+ variable_name=var_name, x_dim="x", y_dim="y", time_dim=time_dim, verbose=verbose
848
+ )
849
+ self.ds = self.ds.chunk({time_dim: tc, "x": xc, "y": yc})
850
+
851
+ # Reorder the coordinates to keep the consistency
852
+ self.ds = self.ds.copy().sortby(time_dim).transpose("x", "y", time_dim)
853
+ self.standardize_cube_for_processing(time_dim)
854
+
855
+ if mask is not None:
856
+ self.mask_cube(mask)
857
+
858
+ if self.ds.rio.crs is None:
859
+ self.ds.rio.write_crs(self.ds.proj4)
860
+
861
+ if verbose:
862
+ print(f"[Data loading] Author : {self.ds.author}")
863
+
864
+ def standardize_cube_for_processing(self, time_dim="mid_date"):
865
+ """
866
+ Prepare the xarray dataset for the processing: transpose the dimension, add a variable temporal_baseline, errors if they do not exist
867
+
868
+ :param time_dim_name: [str] [default is 'mid_date'] --- Name of the z dimension within the original dataset self.ds
869
+ """
870
+
871
+ self.ds = self.ds.unify_chunks()
872
+ if self.ds.chunksizes[time_dim] != (self.nz,): # no chunk in time
873
+ self.ds = self.ds.chunk({time_dim: self.nz})
874
+
875
+ if not self.is_TICO:
876
+ # Create a variable for temporal_baseline
877
+ self.ds["temporal_baseline"] = xr.DataArray(
878
+ (self.ds["date2"] - self.ds["date1"]).dt.days.values, dims="mid_date"
879
+ )
880
+
881
+ # Add errors if not already there
882
+ if "errorx" not in self.ds.variables:
883
+ self.ds["errorx"] = ("mid_date", np.ones(len(self.ds["mid_date"])))
884
+ self.ds["errory"] = ("mid_date", np.ones(len(self.ds["mid_date"])))
885
+
886
+ if self.ds.rio.write_crs:
887
+ self.ds = self.ds.rio.write_crs(self.ds.proj4) # add the crs to the xarray dataset if missing
888
+
889
+ def prepare_interpolation_date(
890
+ self,
891
+ ) -> (np.datetime64, np.datetime64): # type: ignore
892
+ """
893
+ Define the first and last date required for the interpolation, as the first date and last in the observations.
894
+ The purpose is to have homogenized results
895
+
896
+ :param cube: dataset
897
+
898
+ :return: first and last date required for the interpolation
899
+ """
900
+
901
+ # Prepare interpolation dates
902
+ cube_date1 = self.date1_().tolist()
903
+ cube_date1 = cube_date1 + self.date2_().tolist()
904
+ cube_date1.remove(np.min(cube_date1))
905
+ first_date_interpol = np.min(cube_date1)
906
+ last_date_interpol = np.max(self.date2_())
907
+
908
+ return first_date_interpol, last_date_interpol
909
+
910
+ # %% ==================================================================== #
911
+ # ACCESSORS #
912
+ # =====================================================================%% #
913
+
914
+ def sensor_(self) -> list:
915
+ """
916
+ Accessor to the sensors whoch captured the data.
917
+
918
+ :return: [list] --- List of sensor
919
+ """
920
+
921
+ return self.ds["sensor"].values.tolist()
922
+
923
+ def source_(self) -> list:
924
+ """
925
+ Accessor to the source of the data.
926
+
927
+ :return: [list] --- List of source
928
+ """
929
+
930
+ return self.ds["source"].values.tolist()
931
+
932
+ def temp_base_(self, return_list: bool = True, format_date: str = "float") -> list | np.ndarray:
933
+ """
934
+ Get the temporal baseline of the dataset.
935
+
936
+ :param return_list: [bool] [default is True] --- If True return of a list of date, else return a np array
937
+ :param format_date: [str] [default is 'float'] --- 'float' or 'D' format of the date as output
938
+
939
+ :return: [list | np array] --- List of the temporal baselines
940
+ """
941
+
942
+ if format_date == "D":
943
+ temp = self.ds["date2"] - self.ds["date1"]
944
+ elif format_date == "float":
945
+ # temp = (self.ds['date2'].values-self.ds['date1'].values).astype('timedelta64[D]'))/ np.timedelta64(1, 'D')
946
+ temp = (self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")
947
+ else:
948
+ raise NameError("Please enter format as float or D")
949
+ if return_list:
950
+ return temp.values.tolist()
951
+ else:
952
+ return temp.values
953
+
954
+ def date1_(self) -> np.array:
955
+ """
956
+ Accessor to the first dates of acquisition.
957
+
958
+ :return: [np array] --- np array of date1
959
+ """
960
+
961
+ return np.asarray(self.ds["date1"]).astype("datetime64[D]")
962
+
963
+ def date2_(self) -> np.array:
964
+ """
965
+ Accessor to the second dates of acquisition.
966
+
967
+ :return: [np array] --- np array of date2
968
+ """
969
+
970
+ return np.asarray(self.ds["date2"]).astype("datetime64[D]")
971
+
972
+ def datec_(self) -> np.array:
973
+ """
974
+ Accessor to the central dates of the data.
975
+
976
+ :return: [np array] --- np array of central date
977
+ """
978
+
979
+ return (self.date1_() + self.temp_base_(return_list=False, format_date="D") // 2).astype("datetime64[D]")
980
+
981
+ def vv_(self) -> np.array:
982
+ """
983
+ Accessor to the magnitude of the velocities.
984
+
985
+ :return: [np array] --- np array of velocity magnitude
986
+ """
987
+
988
+ return np.sqrt(self.ds["vx"] ** 2 + self.ds["vy"] ** 2)
989
+
990
+ def EPSG_code_(self) -> int:
991
+ """
992
+ Accessor to the EPSG code of the dataset.
993
+ """
994
+
995
+ return self.ds.rio.crs.to_epsg()
996
+
997
+ # %% ==================================================================== #
998
+ # PIXEL LOADING METHODS #
999
+ # =====================================================================%% #
1000
+
1001
+ def convert_coordinates(self, i: int | float, j: int | float, proj: str, verbose: bool = False) -> (float, float): # type: ignore
1002
+ """
1003
+ Convert the coordinate (i, j) which are in projection proj, to projection of the cube dataset.
1004
+
1005
+ :params i, j: [int | float] --- Coordinates to be converted
1006
+ :param proj: [str] --- Projection of (i, j) coordinates
1007
+ :param verbose: [bool] [default is False] --- If True, print some text
1008
+
1009
+ :return i, j: [int | float] --- Converted (i, j)
1010
+ """
1011
+
1012
+ # Convert coordinates if needed
1013
+ if proj == "EPSG:4326":
1014
+ myproj = Proj(self.ds.proj4)
1015
+ i, j = myproj(i, j)
1016
+ if verbose:
1017
+ print(f"[Data loading] Converted to projection {self.ds.proj4}: {i, j}")
1018
+ else:
1019
+ if CRS(self.ds.proj4) != CRS(proj):
1020
+ transformer = Transformer.from_crs(CRS(proj), CRS(self.ds.proj4))
1021
+ i, j = transformer.transform(i, j)
1022
+ if verbose:
1023
+ print(f"[Data loading] Converted to projection {self.ds.proj4}: {i, j}")
1024
+ return i, j
1025
+
1026
+ def load_pixel(
1027
+ self,
1028
+ i: int | float,
1029
+ j: int | float,
1030
+ unit: int = 365,
1031
+ regu: int | str = "1accelnotnull",
1032
+ coef: int = 100,
1033
+ flag: xr.Dataset | None = None,
1034
+ solver: str = "LSMR_ini",
1035
+ interp: str = "nearest",
1036
+ proj: str = "EPSG:4326",
1037
+ rolling_mean: xr.Dataset | None = None,
1038
+ visual: bool = False,
1039
+ output_format="np",
1040
+ ) -> (Optional[list], Optional[list], Optional[np.array], Optional[np.array], Optional[np.array]): # type: ignore
1041
+ """
1042
+ Load data at pixel (i, j) and compute prior to inversion (rolling mean, mean, dates range...).
1043
+
1044
+ :params i, j: [int | float] --- Coordinates to be converted
1045
+ :param unit: [int] [default is 365] --- 1 for m/d, 365 for m/y
1046
+ :param regu: [int | str] [default is '1accelnotnull'] --- Type of regularization
1047
+ :param coef: [int] [default is 100] --- Coef of Tikhonov regularisation
1048
+ :param flag: [xr dataset | None] [default is None] --- If not None, the values of the coefficient used for stable areas, surge glacier and non surge glacier
1049
+ :param solver: [str] [default is 'LSMR_ini'] --- Solver of the inversion: 'LSMR', 'LSMR_ini', 'LS', 'LS_bounded', 'LSQR'
1050
+ :param interp: [str] [default is 'nearest'] --- Interpolation method used to load the pixel when it is not in the dataset ('nearest' or 'linear')
1051
+ :param proj: [str] [default is 'EPSG:4326'] --- Projection of (i, j) coordinates
1052
+ :param rolling_mean: [xr dataset | None] [default is None] --- Filtered dataset (e.g. rolling mean)
1053
+ :param visual: [bool] [default is False] --- Return additional information (sensor and source) for future plots
1054
+ :param output_format [str] [default is np] --- Format of the output data (np for numpy or df for pandas dataframe)
1055
+
1056
+ :return data: [list | None] --- A list 2 elements : the first one is np.ndarray with the observed
1057
+ :return mean: [list | None] --- A list with average vx and vy if solver=LSMR_ini, but the regularization do not require an apriori on the acceleration
1058
+ :return dates_range: [list | None] --- Dates between which the displacements will be inverted
1059
+ :return regu: [np array | Nothing] --- If flag is not None, regularisation method to be used for each pixel
1060
+ :return coef: [np array | Nothing] --- If flag is not None, regularisation coefficient to be used for each pixel
1061
+ """
1062
+
1063
+ # Variables to keep
1064
+ var_to_keep = (
1065
+ ["date1", "date2", "vx", "vy", "errorx", "errory", "temporal_baseline"]
1066
+ if not visual
1067
+ else ["date1", "date2", "vx", "vy", "errorx", "errory", "temporal_baseline", "sensor", "source"]
1068
+ )
1069
+
1070
+ if proj == "int":
1071
+ data = self.ds.isel(x=i, y=j)[var_to_keep]
1072
+ else:
1073
+ i, j = self.convert_coordinates(i, j, proj=proj) # convert the coordinates to the projection of the cube
1074
+ # Interpolate only necessary variables and drop NaN values
1075
+ if interp == "nearest":
1076
+ data = self.ds.sel(x=i, y=j, method="nearest")[var_to_keep]
1077
+ data = data.dropna(dim="mid_date")
1078
+ else:
1079
+ data = self.ds.interp(x=i, y=j, method=interp)[var_to_keep].dropna(
1080
+ dim="mid_date"
1081
+ ) # 282 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1082
+
1083
+ if flag is not None:
1084
+ if isinstance(regu, dict) and isinstance(coef, dict):
1085
+ flag = np.round(flag["flag"].sel(x=i, y=j, method="nearest").values)
1086
+ regu = regu[flag]
1087
+ coef = coef[flag]
1088
+ else:
1089
+ raise ValueError("regu must be a dict if assign_flag is True!")
1090
+
1091
+ data_dates = data[["date1", "date2"]].to_array().values.T
1092
+ if data_dates.dtype == "<M8[ns]": # Convert to days if needed
1093
+ data_dates = data_dates.astype("datetime64[D]")
1094
+
1095
+ if (solver == "LSMR_ini" or regu == "1accelnotnull" or regu == "directionxy") and rolling_mean is not None:
1096
+ if len(rolling_mean.sizes) == 3: # if regu == 1accelnotnul, rolling_mean have a time dimesion
1097
+ # Load rolling mean for the given pixel, only on the dates available
1098
+ dates_range = construction_dates_range_np(data_dates)
1099
+ mean = rolling_mean.sel(
1100
+ mid_date=dates_range[:-1] + np.diff(dates_range) // 2, x=i, y=j, method="nearest"
1101
+ )[["vx_filt", "vy_filt"]]
1102
+ mean = [mean[i].values / unit for i in ["vx_filt", "vy_filt"]] # Convert it to m/day
1103
+
1104
+ else: # elif solver= LSMR_ini, rolling_mean is an average in time per pixel
1105
+ mean = rolling_mean.sel(x=i, y=j, method="nearest")[["vx", "vy"]]
1106
+ mean = [mean[i].values / unit for i in ["vx", "vy"]] # Convert it to m/day
1107
+ dates_range = None
1108
+
1109
+ else: # If there is no apriori and no initialization
1110
+ mean = None
1111
+ dates_range = None
1112
+
1113
+ # data_values is composed of vx, vy, errorx, errory, temporal baseline
1114
+ if visual:
1115
+ if output_format == "np":
1116
+ data_str = data[["sensor", "source"]].to_array().values.T
1117
+ data_values = data.drop_vars(["date1", "date2", "sensor", "source"]).to_array().values.T
1118
+ data = [data_dates, data_values, data_str]
1119
+ elif output_format == "df":
1120
+ data = data.to_pandas()
1121
+ else:
1122
+ raise ValueError(
1123
+ "Please enter np if you want to have as output a numpy array, and df if you want a pandas dataframe"
1124
+ )
1125
+ else:
1126
+ data_values = data.drop_vars(["date1", "date2"]).to_array().values.T
1127
+ data = [data_dates, data_values]
1128
+
1129
+ if flag is not None:
1130
+ return data, mean, dates_range, regu, coef
1131
+ else:
1132
+ return data, mean, dates_range
1133
+
1134
+ # %% ==================================================================== #
1135
+ # CUBE PROCESSING #
1136
+ # =====================================================================%% #
1137
+
1138
+ def delete_outliers(
1139
+ self,
1140
+ delete_outliers: str | float,
1141
+ flag: xr.Dataset | None = None,
1142
+ slope: xr.Dataset | None = None,
1143
+ aspect: xr.Dataset | None = None,
1144
+ direction: xr.Dataset | None = None,
1145
+ **kwargs,
1146
+ ):
1147
+ """
1148
+ Delete outliers according to a certain criterium.
1149
+
1150
+ :param delete_outliers: [str | float] --- If float delete all velocities which a quality indicator higher than delete_outliers, if median_filter delete outliers that an angle 45° away from the average vector
1151
+ :param flag: [xr dataset | None] [default is None] --- If not None, the values of the coefficient used for stable areas, surge glacier and non surge glacier
1152
+ """
1153
+
1154
+ if isinstance(delete_outliers, int) or isinstance(delete_outliers, str):
1155
+ if isinstance(delete_outliers, int): # filter according to the maximal error
1156
+ inlier_mask = dask_filt_warpper(
1157
+ self.ds["vx"], self.ds["vy"], filt_method="error", error_thres=delete_outliers
1158
+ )
1159
+
1160
+ elif isinstance(delete_outliers, str): # filter according to vcc_angle, zscore, median_angle
1161
+ axis = self.ds["vx"].dims.index("mid_date")
1162
+ inlier_mask = dask_filt_warpper(
1163
+ self.ds["vx"],
1164
+ self.ds["vy"],
1165
+ filt_method=delete_outliers,
1166
+ slope=slope,
1167
+ aspect=aspect,
1168
+ direction=direction,
1169
+ axis=axis,
1170
+ **kwargs,
1171
+ )
1172
+
1173
+ if flag is not None:
1174
+ if delete_outliers != "vvc_angle":
1175
+ flag = flag["flag"].values if flag["flag"].shape[0] == self.nx else flag["flag"].values.T
1176
+ flag_condition = flag == 0
1177
+ flag_condition = np.expand_dims(flag_condition, axis=axis)
1178
+ inlier_mask = np.logical_or(inlier_mask, flag_condition)
1179
+
1180
+ inlier_flag = xr.DataArray(inlier_mask, dims=self.ds["vx"].dims)
1181
+ for var in ["vx", "vy"]:
1182
+ self.ds[var] = self.ds[var].where(inlier_flag)
1183
+
1184
+ self.ds = self.ds.persist()
1185
+
1186
+ elif isinstance(delete_outliers, dict):
1187
+ for method in delete_outliers.keys():
1188
+ if method == "error":
1189
+ if delete_outliers["error"] is None:
1190
+ self.delete_outliers("error", flag)
1191
+ else:
1192
+ self.delete_outliers(delete_outliers["error"], flag)
1193
+ elif method == "magnitude":
1194
+ if delete_outliers["magnitude"] is None:
1195
+ self.delete_outliers("magnitude", flag)
1196
+ else:
1197
+ self.delete_outliers("magnitude", flag, magnitude_thres=delete_outliers["magnitude"])
1198
+ elif method == "median_magnitude":
1199
+ if delete_outliers["median_magnitude"] is None:
1200
+ self.delete_outliers("median_magnitude", flag)
1201
+ else:
1202
+ self.delete_outliers(
1203
+ "median_magnitude", flag, median_magnitude_thres=delete_outliers["median_magnitude"]
1204
+ )
1205
+ elif method == "z_score":
1206
+ if delete_outliers["z_score"] is None:
1207
+ self.delete_outliers("z_score", flag)
1208
+ else:
1209
+ self.delete_outliers("z_score", flag, z_thres=delete_outliers["z_score"])
1210
+
1211
+ elif method == "median_angle":
1212
+ if delete_outliers["median_angle"] is None:
1213
+ self.delete_outliers("median_angle", flag)
1214
+ else:
1215
+ self.delete_outliers("median_angle", flag, z_thres=delete_outliers["median_angle"])
1216
+
1217
+ elif method == "vvc_angle":
1218
+ if delete_outliers["vvc_angle"] is None:
1219
+ self.delete_outliers("vvc_angle", flag)
1220
+ else:
1221
+ self.delete_outliers("vvc_angle", flag, **delete_outliers["vvc_angle"])
1222
+ elif method == "topo_angle":
1223
+ self.delete_outliers("topo_angle", flag, slope=slope, aspect=aspect)
1224
+ elif method == "flow_angle":
1225
+ self.delete_outliers("flow_angle", flag, direction=direction)
1226
+ elif method == "mz_score":
1227
+ if delete_outliers["mz_score"] is None:
1228
+ self.delete_outliers("mz_score", flag)
1229
+ else:
1230
+ self.delete_outliers("mz_score", flag, z_thres=delete_outliers["mz_score"])
1231
+ else:
1232
+ raise ValueError(
1233
+ "Filtering method should be either 'median_angle', 'vvc_angle', 'topo_angle', 'z_score','mz_score', 'magnitude', 'median_magnitude' or 'error'."
1234
+ )
1235
+ else:
1236
+ raise ValueError("delete_outliers must be a int, a string or a dict, not {type(delete_outliers)}")
1237
+
1238
+ def mask_cube(self, mask: xr.DataArray | str):
1239
+ """
1240
+ Mask some of the data of the cube (putting it to np.nan).
1241
+
1242
+ :param mask: [str | xr dataarray] --- Either a DataArray with 1 the data to keep and 0 the ones to remove, or a path to a file containing a DataArray or a shapefile to be rasterized
1243
+ """
1244
+
1245
+ if type(mask) is str:
1246
+ if (
1247
+ mask[-3:] == "shp" or mask[-4:] == "gpkg"
1248
+ ): # Convert the shp file or geopackage to an xarray dataset (rasterize the shapefile)
1249
+ polygon = geopandas.read_file(mask).to_crs(CRS(self.ds.proj4))
1250
+ raster = rasterize(
1251
+ [polygon.geometry[0]],
1252
+ out_shape=self.ds.rio.shape,
1253
+ transform=self.ds.rio.transform(),
1254
+ fill=0,
1255
+ dtype="int16",
1256
+ )
1257
+ mask = xr.DataArray(data=raster.T, dims=["x", "y"], coords=self.ds[["x", "y"]].coords)
1258
+ else:
1259
+ mask = xr.open_dataarray(mask)
1260
+ mask.load()
1261
+
1262
+ # Mask the velocities and the errors
1263
+ if not self.is_TICO:
1264
+ self.ds[["vx", "vy", "errorx", "errory"]] = (
1265
+ self.ds[["vx", "vy", "errorx", "errory"]]
1266
+ .where(mask.sel(x=self.ds.x, y=self.ds.y, method="nearest") == 1)
1267
+ .astype("float32")
1268
+ )
1269
+ else:
1270
+ self.ds[["dx", "dy", "xcount_x", "xcount_y"]] = (
1271
+ self.ds[["dx", "dy", "xcount_x", "xcount_y"]]
1272
+ .where(mask.sel(x=self.ds.x, y=self.ds.y, method="nearest") == 1)
1273
+ .astype("float32")
1274
+ )
1275
+
1276
+ def reproject_geotiff_to_cube(self, file_path):
1277
+ """
1278
+ Reproject the geotiff file to the same geometry of the cube
1279
+ :param: file_path: [str] --- path of the geotifffile to be wrapped
1280
+ :return: warped data [np.ndarray] --- warped data with same shape and resolution as the cube
1281
+ """
1282
+ if file_path.split(".")[-1] == "tif":
1283
+ with rio.open(file_path) as src:
1284
+ src_data = src.read(1)
1285
+
1286
+ dst_data = np.empty(shape=self.ds.rio.shape, dtype=np.float32)
1287
+ dst_data, _ = rio.warp.reproject(
1288
+ source=src_data,
1289
+ destination=dst_data,
1290
+ src_transform=src.transform,
1291
+ src_crs=src.crs,
1292
+ dst_crs=CRS.from_proj4(self.ds.proj4),
1293
+ dst_transform=self.ds.rio.transform(),
1294
+ dst_shape=self.ds.rio.shape,
1295
+ resampling=rio.warp.Resampling.bilinear,
1296
+ )
1297
+ dst_data[dst_data == src.nodata] = np.nan
1298
+ return dst_data
1299
+
1300
+ def compute_flow_direction(self, vx_file: str | None = None, vy_file: str | None = None) -> xr.DataArray:
1301
+ """
1302
+ Compute the average flow direction from the input vx and vy files or just from the observations
1303
+ :param: vx_file | vy_file: [str] --- path of the flow velocity file, should be geotiff format
1304
+ :return: direction: [xr.DataArray] --- computed average flow direction at each pixel
1305
+ """
1306
+ if vx_file is not None and vy_file is not None:
1307
+ vx = self.reproject_geotiff_to_cube(vx_file)
1308
+ vy = self.reproject_geotiff_to_cube(vy_file)
1309
+ else:
1310
+ vx = self.ds["vx"].values
1311
+ vy = self.ds["vy"].values
1312
+
1313
+ temporal_baseline = self.ds["temporal_baseline"].values
1314
+ temporal_baseline = temporal_baseline[np.newaxis, np.newaxis, :]
1315
+ vx_weighted = np.nansum(vx * temporal_baseline, axis=2) / np.nansum(temporal_baseline, axis=2)
1316
+ vy_weighted = np.nansum(vy * temporal_baseline, axis=2) / np.nansum(temporal_baseline, axis=2)
1317
+
1318
+ v_mean_weighted = np.sqrt(vx_weighted**2 + vy_weighted**2)
1319
+
1320
+ direction = np.arctan2(vx_weighted, vy_weighted)
1321
+ direction = (np.rad2deg(direction) + 360) % 360
1322
+
1323
+ direction = np.where(v_mean_weighted < 1, np.nan, direction)
1324
+
1325
+ direction = xr.Dataset(
1326
+ data_vars=dict(
1327
+ direction=(["y", "x"], np.array(direction.T)),
1328
+ ),
1329
+ coords=dict(x=(["x"], self.ds.x.data), y=(["y"], self.ds.y.data)),
1330
+ )
1331
+
1332
+ return direction
1333
+
1334
+ def create_flag(self, flag: str = None, field_name: str | None = None, default_value: str | int | None = None):
1335
+ """
1336
+ Create a flag dataset based on the provided shapefile and shapefile field.
1337
+ Which is usually used to divide the pixels into different types, especially for surging glaciers.
1338
+ If you just want to divide by polygon, set the shp_field to None
1339
+
1340
+ :param flag (str, optional): The path to the shapefile. Defaults to None.
1341
+ :param shp_field (str, optional): The name of the shapefile field. Defaults to 'surge_type' (used in RGI7).
1342
+ :param default_value (str | int | None, optional): The default value for the shapefile field. Defaults to 0.
1343
+ :Returns flag: xr.Dataset, The flag dataset with dimensions 'y' and 'x'.
1344
+ """
1345
+
1346
+ if isinstance(flag, str):
1347
+ if flag.split(".")[-1] == "nc": # If flag is a netCDF file
1348
+ flag = xr.open_dataset(flag)
1349
+
1350
+ elif flag.split(".")[-1] in ["shp", "gpkg"]: # If flag is a shape file
1351
+ flag = geopandas.read_file(flag).to_crs(self.ds.proj4).clip(self.ds.rio.bounds())
1352
+
1353
+ # surge-type glacier: 2, other glacier: 1, stable area: 0
1354
+ if field_name is None:
1355
+ if "surge_type" in flag.columns: # RGI inventory, surge-type glacier: 2, other glacier: 0
1356
+ default_value = 0
1357
+ field_name = "surge_type"
1358
+ elif (
1359
+ "Surge_class" in flag.columns
1360
+ ): # HMA surging glacier inventory, surge-type glacier: 2, other glacier: ''
1361
+ default_value = None
1362
+ field_name = "Surge_class"
1363
+
1364
+ if field_name is not None:
1365
+ flag_id = flag[field_name].apply(lambda x: 2 if x != default_value else 1).astype("int16")
1366
+ geom_value = ((geom, value) for geom, value in zip(flag.geometry, flag_id))
1367
+ else:
1368
+ # inside the polygon: 1, outside: 0
1369
+ geom_value = ((geom, 1) for geom in flag.geometry)
1370
+
1371
+ try:
1372
+ flag = rasterio.features.rasterize(
1373
+ geom_value,
1374
+ out_shape=(self.ny, self.nx),
1375
+ transform=self.ds.rio.transform(),
1376
+ all_touched=True,
1377
+ fill=0, # background value
1378
+ dtype="int16",
1379
+ )
1380
+ except:
1381
+ flag = np.zeros(shape=(self.ny, self.nx), dtype="int16")
1382
+
1383
+ flag = xr.Dataset(
1384
+ data_vars=dict(
1385
+ flag=(["y", "x"], flag),
1386
+ ),
1387
+ coords=dict(
1388
+ x=(["x"], self.ds.x.data),
1389
+ y=(["y"], self.ds.y.data),
1390
+ ),
1391
+ )
1392
+
1393
+ elif not isinstance(flag, xr.Dataset):
1394
+ raise ValueError("flag file must be .nc or .shp")
1395
+
1396
+ if "flags" in list(flag.variables):
1397
+ flag = flag.rename({"flags": "flag"})
1398
+
1399
+ return flag
1400
+
1401
+ def filter_cube_before_inversion(
1402
+ self,
1403
+ i: int | float | None = None,
1404
+ j: int | float | None = None,
1405
+ smooth_method: str = "savgol",
1406
+ s_win: int = 3,
1407
+ t_win: int = 90,
1408
+ sigma: int = 3,
1409
+ order: int = 3,
1410
+ unit: int = 365,
1411
+ delete_outliers: str | float | None = None,
1412
+ flag: xr.Dataset | str | None = None,
1413
+ dem_file: str | None = None,
1414
+ regu: int | str = "1accelnotnull",
1415
+ solver: str = "LSMR_ini",
1416
+ proj: str = "EPSG:4326",
1417
+ velo_or_disp: str = "velo",
1418
+ select_baseline: int | None = 180,
1419
+ verbose: bool = False,
1420
+ ) -> xr.Dataset:
1421
+ """
1422
+ Filter the original data before the inversion:
1423
+ -delete outliers according to the provided criterion
1424
+ -compute a spatio-temporal kernel of the data, which can be used as apriori for the inversion (for "1accelnotnull" or "directionxy" )
1425
+ -compute mean velocity along x and y ( for solver = 'LSMR_ini' if regu is not "1accelnotnull" or "directionxy" )
1426
+
1427
+ :params i, j: [int | float] --- Coordinates to be converted
1428
+ :param smooth_method: [str] [default is 'gaussian'] --- Smoothing method to be used to smooth the data in time ('gaussian', 'median', 'emwa', 'savgol')
1429
+ :param s_win: [int] [default is 3] --- Size of the spatial window
1430
+ :param t_win: [int] [default is 90] --- Time window size for 'ewma' smoothing
1431
+ :param sigma: [int] [default is 3] --- Standard deviation for 'gaussian' filter
1432
+ :param order: [int] [default is 3] --- Order of the smoothing function
1433
+ :param unit: [int] [default is 365] --- 365 if the unit is m/y, 1 if the unit is m/d
1434
+ :param delete_outliers: [str | float | None] [default is None] --- If float delete all velocities which a quality indicator higher than delete_outliers
1435
+ :param flag: [xr dataset | None] [default is None] --- If not None, the values of the coefficient used for stable areas, surge glacier and non surge glacier
1436
+ :param regu: [int | str] [default is "1accelnotnull"] --- Regularisation of the solver
1437
+ :param solver: [str] [default is 'LSMR_ini'] --- Solver used to invert the system
1438
+ :param proj: [str] [default is 'EPSG:4326'] --- EPSG of i,j projection
1439
+ :param velo_or_disp: [str] [default is 'velo'] --- 'disp' or 'velo' to indicate the type of the observations : 'disp' mean that self contain displacements values and 'velo' mean it contains velocity
1440
+ :param select_baseline: [int | None] [default is None] --- threshold of the temporal baseline to select, if the number of observation is lower than 3 times the number of estimated displacement with this threshold, it is increased by 30 days
1441
+ :param verbose: [bool] [default is False] --- Print information throughout the process
1442
+
1443
+ :return obs_filt: [xr dataset | None] --- Filtered dataset
1444
+ """
1445
+
1446
+ def loop_rolling(da_arr: xr.Dataset, select_baseline: int | None = 180) -> (np.ndarray, np.ndarray): # type: ignore
1447
+ """
1448
+ A function to calculate spatial mean, resample data, and calculate smoothed velocity.
1449
+
1450
+ :param da_arr: [xr dataset] --- Original data
1451
+ :param select_baseline: [int] [default is None] --- Threshold over the temporal baselines
1452
+
1453
+ :return spatial_mean: [np array] --- smoothed velocity
1454
+ :return date_out: [np array] --- Observed dates
1455
+ """
1456
+
1457
+ # Compute the dates of the estimated displacements time series
1458
+ date_out = date_range[:-1] + np.diff(date_range) // 2
1459
+ mid_dates = self.ds["mid_date"]
1460
+
1461
+ if verbose:
1462
+ start = time.time()
1463
+ if select_baseline is not None: # select data with a temporal baseline lower than a threshold
1464
+ baseline = self.ds["temporal_baseline"].compute()
1465
+ idx = np.where(baseline < select_baseline)
1466
+ while (
1467
+ len(idx[0]) < 3 * len(date_out) & (select_baseline < 200)
1468
+ ): # Increase the threshold by 30, if the number of observation is lower than 3 times the number of estimated displacement
1469
+ select_baseline += 30
1470
+ mid_dates = mid_dates.isel(mid_date=idx[0])
1471
+ da_arr = da_arr.isel(mid_date=idx[0])
1472
+
1473
+ # Find the time axis for dask processing
1474
+ time_axis = self.ds["vx"].dims.index("mid_date")
1475
+ # Apply the selected kernel in time
1476
+ if verbose:
1477
+ with ProgressBar(): # Plot a progress bar
1478
+ filtered_in_time = dask_smooth_wrapper(
1479
+ da_arr.data,
1480
+ mid_dates,
1481
+ t_out=date_out,
1482
+ smooth_method=smooth_method,
1483
+ sigma=sigma,
1484
+ t_win=t_win,
1485
+ order=order,
1486
+ axis=time_axis,
1487
+ ).compute()
1488
+ else:
1489
+ filtered_in_time = dask_smooth_wrapper(
1490
+ da_arr.data,
1491
+ mid_dates,
1492
+ t_out=date_out,
1493
+ smooth_method=smooth_method,
1494
+ sigma=sigma,
1495
+ t_win=t_win,
1496
+ order=order,
1497
+ axis=time_axis,
1498
+ ).compute()
1499
+
1500
+ if verbose:
1501
+ print(f"[Data filtering] Smoothing observations took {round((time.time() - start), 1)} s")
1502
+
1503
+ # Spatial average
1504
+ if (
1505
+ np.min([da_arr["x"].size, da_arr["y"].size]) > s_win
1506
+ ): # The spatial average is performed only if the size of the cube is larger than s_win, the spatial window
1507
+ spatial_axis = tuple(i for i in range(3) if i != time_axis)
1508
+ pad_widths = tuple((s_win // 2, s_win // 2) if i != time_axis else (0, 0) for i in range(3))
1509
+ spatial_mean = da.nanmean(
1510
+ sliding_window_view(filtered_in_time, (s_win, s_win), axis=spatial_axis), axis=(-1, -2)
1511
+ )
1512
+ spatial_mean = da.pad(spatial_mean, pad_widths, mode="edge")
1513
+ else:
1514
+ spatial_mean = filtered_in_time
1515
+
1516
+ return spatial_mean.compute(), np.unique(date_out)
1517
+
1518
+ if np.isnan(self.ds["date1"].values).all():
1519
+ print("[Data filtering] Empty sub-cube (masked data ?)")
1520
+ return None
1521
+
1522
+ if i is not None and j is not None: # Crop the cube dataset around a given pixel
1523
+ i, j = self.convert_coordinates(i, j, proj=proj, verbose=verbose)
1524
+ if verbose:
1525
+ print(f"[Data filtering] Clipping dataset to individual pixel: (x, y) = ({i},{j})")
1526
+ buffer = (s_win + 2) * (self.ds["x"][1] - self.ds["x"][0])
1527
+ self.buffer(self.ds.proj4, [i, j, buffer])
1528
+ self.ds = self.ds.unify_chunks()
1529
+
1530
+ # The spatio-temporal smoothing should be carried on velocity, while we need displacement during inversion
1531
+ if velo_or_disp == "disp": # to provide velocity values
1532
+ self.ds["vx"] = self.ds["vx"] / self.ds["temporal_baseline"] * unit
1533
+ self.ds["vy"] = self.ds["vy"] / self.ds["temporal_baseline"] * unit
1534
+
1535
+ if flag is not None: # create a flag, to identify stable,areas, and eventually surges
1536
+ flag = self.create_flag(flag)
1537
+ flag.load()
1538
+
1539
+ if isinstance(regu, dict):
1540
+ regu = list(regu.values())
1541
+ else:
1542
+ raise ValueError("regu must be a dict if flag is Not None")
1543
+ else:
1544
+ if isinstance(regu, int): # if regu is an integer
1545
+ regu = [regu]
1546
+ elif isinstance(regu, str): # if regu is a string
1547
+ regu = list(regu.split())
1548
+
1549
+ start = time.time()
1550
+
1551
+ if delete_outliers is not None: # remove outliers beforehand
1552
+ slope, aspect, direction = None, None, None
1553
+ if (isinstance(delete_outliers, str) and delete_outliers == "topo_angle") or (
1554
+ isinstance(delete_outliers, dict) and "topo_angle" in delete_outliers.keys()
1555
+ ):
1556
+ if isinstance(dem_file, str):
1557
+ slope, aspect = self.compute_slo_asp(dem_file=dem_file)
1558
+ else:
1559
+ raise ValueError("dem_file must be given if delete_outliers is 'topo_angle'")
1560
+
1561
+ elif (isinstance(delete_outliers, str) and delete_outliers == "flow_angle") or (
1562
+ isinstance(delete_outliers, dict) and "flow_angle" in delete_outliers.keys()
1563
+ ):
1564
+ direction = self.compute_flow_direction(vx_file=None, vy_file=None)
1565
+ self.delete_outliers(
1566
+ delete_outliers=delete_outliers, flag=None, slope=slope, aspect=aspect, direction=direction
1567
+ )
1568
+ if verbose:
1569
+ print(f"[Data filtering] Delete outlier took {round((time.time() - start), 1)} s")
1570
+
1571
+ if "1accelnotnull" in regu or "directionxy" in regu: # compute velocity smoothed using a spatio-temporal filter
1572
+ date_range = np.sort(
1573
+ np.unique(
1574
+ np.concatenate(
1575
+ (
1576
+ self.ds["date1"].values[~np.isnan(self.ds["date1"].values)],
1577
+ self.ds["date2"].values[~np.isnan(self.ds["date2"].values)],
1578
+ ),
1579
+ axis=0,
1580
+ )
1581
+ )
1582
+ ) # dates between which the displacement should be estimated
1583
+ if verbose:
1584
+ start = time.time()
1585
+
1586
+ # spatio-temporal filter
1587
+ vx_filtered, dates_uniq = loop_rolling(
1588
+ self.ds["vx"], select_baseline=select_baseline
1589
+ ) # dates_uniq correspond to the central date of dates_range
1590
+ vy_filtered, dates_uniq = loop_rolling(self.ds["vy"], select_baseline=select_baseline)
1591
+
1592
+ # We obtain one smoothed value for each unique date in date_range
1593
+ obs_filt = xr.Dataset(
1594
+ data_vars=dict(
1595
+ vx_filt=(["x", "y", "mid_date"], vx_filtered), vy_filt=(["x", "y", "mid_date"], vy_filtered)
1596
+ ),
1597
+ coords=dict(x=(["x"], self.ds.x.data), y=(["y"], self.ds.y.data), mid_date=dates_uniq),
1598
+ attrs=dict(description="Smoothed velocity observations", units="m/y", proj4=self.ds.proj4),
1599
+ )
1600
+ del vx_filtered, vy_filtered
1601
+
1602
+ if verbose:
1603
+ print(
1604
+ "[Data filtering] Calculating smoothing mean of the observations completed in {:.2f} seconds".format(
1605
+ time.time() - start
1606
+ )
1607
+ )
1608
+
1609
+ elif (
1610
+ solver == "LSMR_ini"
1611
+ ): # The initialization is based on the averaged velocity over the period, for every pixel
1612
+ obs_filt = self.ds[["vx", "vy"]].mean(dim="mid_date")
1613
+ obs_filt.attrs["description"] = "Averaged velocity over the period"
1614
+ obs_filt.attrs["units"] = "m/y"
1615
+ else:
1616
+ obs_filt = None
1617
+
1618
+ # Unify the observations to displacement to provide displacement values during inversion
1619
+ self.ds["vx"] = self.ds["vx"] * self.ds["temporal_baseline"] / unit
1620
+ self.ds["vy"] = self.ds["vy"] * self.ds["temporal_baseline"] / unit
1621
+
1622
+ if obs_filt is not None:
1623
+ obs_filt.load()
1624
+ self.ds = self.ds.load() # Crash memory without loading
1625
+ # persist() is particularly useful when using a distributed cluster because the data will be loaded into distributed memory across your machines and be much faster to use than reading repeatedly from disk.
1626
+
1627
+ return obs_filt, flag
1628
+
1629
+ def split_cube(self, n_split: int = 2, dim: str | list = "x", savepath: str | None = None):
1630
+ """
1631
+ Split the cube into smaller cubes (taking less memory to load) according to the given dimensions.
1632
+
1633
+ :param n_split: [int] [default is 2] --- Number of split to compute along each dimensions in dim
1634
+ :param dim: [str | list] [default is "x"] --- Dimension.s along which must be split the cube
1635
+ :param savepath: [str | None] [default is None] --- If not None, save the new cubes at this location
1636
+
1637
+ :return cubes: [dict] --- Dictionary of the splitcubes (keys describe the position of the cube)
1638
+ """
1639
+
1640
+ cubes = []
1641
+ for s in range(n_split):
1642
+ if isinstance(dim, str):
1643
+ cube = CubeDataClass(
1644
+ self,
1645
+ self.ds.isel(
1646
+ {
1647
+ dim: slice(
1648
+ s * len(self.ds[dim].values) // n_split,
1649
+ (s + 1) * len(self.ds[dim].values) // n_split,
1650
+ 1,
1651
+ )
1652
+ }
1653
+ ),
1654
+ )
1655
+ cube.update_dimension()
1656
+ if savepath is not None:
1657
+ cube.ds.to_netcdf(f"{savepath}{dim}_{s}.nc")
1658
+ print(f"Split cube saved at {savepath}{dim}_{s}.nc")
1659
+ cubes.append(cube)
1660
+ elif isinstance(dim, list):
1661
+ cube = CubeDataClass(
1662
+ self,
1663
+ self.ds.isel(
1664
+ {
1665
+ dim[0]: slice(
1666
+ s * len(self.ds[dim[0]].values) // 2, (s + 1) * len(self.ds[dim[0]].values) // 2, 1
1667
+ )
1668
+ }
1669
+ ),
1670
+ )
1671
+ if len(dim) > 1:
1672
+ cubes |= cube.split_cube(n_split=n_split, dim=dim[1:], savepath=f"{savepath}{dim[0]}_{s}_")
1673
+ else:
1674
+ if savepath is not None:
1675
+ cube.ds.to_netcdf(f"{savepath}{dim[0]}_{s}.nc")
1676
+ print(f"Split cube saved at {savepath}{dim[0]}_{s}.nc")
1677
+ cubes.append(cube)
1678
+
1679
+ return cubes
1680
+
1681
+ def reproj_coord(
1682
+ self,
1683
+ new_proj: Optional[str] = None,
1684
+ new_res: Optional[float] = None,
1685
+ interp_method: str = "nearest",
1686
+ cube_to_match: Optional["CubeDataClass"] = None,
1687
+ ):
1688
+ """
1689
+ Repreject the cube_data_self to a given projection system, and (optionally) resample this cube to a given resolution.
1690
+ The new projection can be defined by the variable new_proj or by a cube stored in cube_to_match.
1691
+ The new resolution can be defined by the variable new_res or by a cube stored in cube_to_match.
1692
+
1693
+ :param new_proj: [str] --- EPSG code of the new projection
1694
+ :param new_res: [float] --- new resolution in the unit of the new projection system
1695
+ :param interp_method: [str] ---
1696
+ :param cube_to_match: [cube_data_class] --- cube used as a reference to reproject self
1697
+ """
1698
+ # assign coordinate system
1699
+ if cube_to_match is not None:
1700
+ cube_to_match.ds = cube_to_match.ds.rio.write_crs(cube_to_match.ds.proj4)
1701
+ self.ds = self.ds.rio.write_crs(self.ds.proj4)
1702
+ self.ds = self.ds.transpose("mid_date", "y", "x")
1703
+
1704
+ # Reproject coordinates
1705
+ if cube_to_match is not None:
1706
+ if interp_method == "nearest":
1707
+ self.ds = self.ds.rio.reproject_match(cube_to_match.ds, resampling=rasterio.enums.Resampling.nearest)
1708
+ elif interp_method == "bilinear":
1709
+ self.ds = self.ds.rio.reproject_match(cube_to_match.ds, resampling=rasterio.enums.Resampling.bilinear)
1710
+ if new_res is not None or new_proj is not None:
1711
+ print("The new projection has been defined according to cube_to_match.")
1712
+ elif new_res is None:
1713
+ self.ds = self.ds.rio.reproject(new_proj)
1714
+ else:
1715
+ self.ds = self.ds.rio.reproject(new_proj, resolution=new_res)
1716
+
1717
+ # Reject abnormal data (when the cube sizes are not the same and data are missing, the interpolation leads to infinite or nearly-infinite values)
1718
+ self.ds[["vx", "vy"]] = self.ds[["vx", "vy"]].where(
1719
+ (np.abs(self.ds["vx"].values) < 10000) | (np.abs(self.ds["vy"].values) < 10000), np.nan
1720
+ )
1721
+
1722
+ # Update of cube_data_classxr attributes
1723
+ warnings.filterwarnings("ignore", category=UserWarning, module="pyproj") # prevent to have a warning
1724
+ if new_proj is None:
1725
+ new_proj = cube_to_match.ds.proj4
1726
+ self.ds = self.ds.assign_attrs({"proj4": new_proj})
1727
+ else:
1728
+ self.ds = self.ds.assign_attrs({"proj4": CRS.from_epsg(new_proj[5:]).to_proj4()})
1729
+ self.ds = self.ds.assign_coords({"x": self.ds.x, "y": self.ds.y})
1730
+ self.update_dimension()
1731
+
1732
+ def reproj_vel(
1733
+ self,
1734
+ new_proj: Optional[str] = None,
1735
+ cube_to_match: Optional["CubeDataClass"] = None,
1736
+ unit: int = 365,
1737
+ nb_cpu: int = 8,
1738
+ ):
1739
+ """
1740
+ Reproject the velocity vector in a new projection grid (i.e. the x and y variables are not changed, only vx and vy are modified).
1741
+ The new projection can be defined by the variable new_proj or by a cube stored in cube_to_match.
1742
+
1743
+ :param new_proj: [str] --- EPSG code of the new projection
1744
+ :param cube_to_match: [cube_data_class] --- cube used as a reference to reproject self
1745
+ :param unit: [int] [default is 365] --- 365 if the unit of the velocity are m/y, 1 if they are m/d
1746
+ :param nb_cpu: [int] [default is 8] --- number of CPUs used for the parallelization
1747
+ """
1748
+
1749
+ if new_proj is None:
1750
+ if cube_to_match is not None:
1751
+ new_proj = cube_to_match.ds.proj4
1752
+ transformer = Transformer.from_crs(self.ds.proj4, new_proj)
1753
+ else:
1754
+ raise ValueError("Please provide new_proj or cube_to_match")
1755
+ else:
1756
+ transformer = Transformer.from_crs(self.ds.proj4, CRS.from_epsg(new_proj[5:]).to_proj4())
1757
+
1758
+ # Prepare grid and transformer
1759
+ grid = np.meshgrid(self.ds["x"], self.ds["y"])
1760
+ grid_transformed = transformer.transform(grid[0], grid[1])
1761
+ # temp = self.temp_base_()
1762
+ temp = np.array([30] * self.nz)
1763
+
1764
+ def transform_slice(z, temp, grid, transformer):
1765
+ """Transform the velocity slice for a single time step."""
1766
+ # compute the coordinate for the ending point of the vector
1767
+ endx = (self.ds["vx"].isel(mid_date=z) * temp[z] / unit) + grid[0]
1768
+ endy = (self.ds["vy"].isel(mid_date=z) * temp[z] / unit) + grid[1]
1769
+
1770
+ # Transform final coordinates
1771
+ t = transformer.transform(endx, endy)
1772
+ # Compute differences in the new coordinate system
1773
+ vx = (grid_transformed[0] - t[0]) / temp[z] * unit
1774
+ vy = (t[1] - grid_transformed[1]) / temp[z] * unit
1775
+
1776
+ return vx, vy
1777
+
1778
+ results = np.array(
1779
+ Parallel(n_jobs=nb_cpu, verbose=0)(
1780
+ delayed(transform_slice, temp, grid, transformer)(z) for z in range(self.nz)
1781
+ )
1782
+ )
1783
+ # Unpack the results
1784
+ vx, vy = results[:, 0, :, :], results[:, 1, :, :]
1785
+
1786
+ # Updating DataArrays
1787
+ self.ds["vx"] = xr.DataArray(
1788
+ vx.astype("float32"),
1789
+ dims=["mid_date", "y", "x"],
1790
+ coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
1791
+ )
1792
+ self.ds["vx"].encoding = {"vx": {"dtype": "float32", "scale_factor": 0.1, "units": "m/y"}}
1793
+
1794
+ self.ds["vy"] = xr.DataArray(
1795
+ vy.astype("float32"),
1796
+ dims=["mid_date", "y", "x"],
1797
+ coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
1798
+ )
1799
+ self.ds["vy"].encoding = {"vy": {"dtype": "float32", "scale_factor": 0.1, "units": "m/y"}}
1800
+
1801
+ del grid, transformer, temp, vx, vy
1802
+
1803
+ def align_cube(
1804
+ self,
1805
+ cube: "CubeDataClass",
1806
+ unit: int = 365,
1807
+ reproj_vel: bool = True,
1808
+ reproj_coord: bool = True,
1809
+ interp_method: str = "nearest",
1810
+ nb_cpu: int = 8,
1811
+ ):
1812
+ """
1813
+ Reproject cube to match the resolution, projection, and region of self.
1814
+
1815
+ :param cube: Cube to align to self
1816
+ :param unit: Unit of the velocities (365 for m/y, 1 for m/d) (default is 365)
1817
+ :param reproj_vel: Whether the velocity have to be reprojected or not -> it will modify their value (default is True)
1818
+ :param reproj_coord: Whether the coordinates have to be interpolated or not (using interp_method) (default is True)
1819
+ :param interp_method: Interpolation method used to reproject cube (default is 'nearest')
1820
+ :param nb_cpu: [int] [default is 8] --- number of CPUs used for the parallelization
1821
+
1822
+ :return: Cube projected to self
1823
+ """
1824
+ # if the velocity components have to be reprojected in the new projection system
1825
+ if reproj_vel:
1826
+ cube.reproj_vel(cube_to_match=self, unit=unit, nb_cpu=nb_cpu)
1827
+
1828
+ # if the coordinates have to be reprojected in the new projection system
1829
+ if reproj_coord:
1830
+ cube.reproj_coord(cube_to_match=self)
1831
+
1832
+ cube.ds = cube.ds.assign_attrs({"author": f"{cube.ds.author} aligned"})
1833
+ cube.update_dimension()
1834
+
1835
+ return cube
1836
+
1837
+ def merge_cube(self, cube: "CubeDataClass"):
1838
+ """
1839
+ Merge another cube to the present one. It must have been aligned first (using align_cube)
1840
+
1841
+ :param cube: [cube_data_class] --- The cube to be merged to self
1842
+ """
1843
+
1844
+ # Merge the cubes (must be previously aligned before using align_cube)
1845
+ self.ds = xr.concat([self.ds, cube.ds.sel(x=self.ds["x"], y=self.ds["y"])], dim="mid_date")
1846
+
1847
+ # Update the attributes
1848
+ self.ds = self.ds.chunk(chunks={"mid_date": self.ds["mid_date"].size})
1849
+ self.nz = self.ds["mid_date"].size
1850
+ if (
1851
+ isinstance(self.filedir, list)
1852
+ and isinstance(self.filename, list)
1853
+ and isinstance(self.author, list)
1854
+ and isinstance(self.source, list)
1855
+ ):
1856
+ self.filedir = [self.filedir]
1857
+ self.filename = [self.filename]
1858
+ self.author = [self.author]
1859
+ self.source = [self.source]
1860
+ self.filedir.append(cube.filedir)
1861
+ self.filename.append(cube.filename)
1862
+ self.author.append(cube.author)
1863
+ self.source.append(cube.source)
1864
+
1865
+ def average_cube(
1866
+ self,
1867
+ return_format: str = "geotiff",
1868
+ return_variable: list = ["vv"],
1869
+ save: bool = True,
1870
+ path_save: str | None = None,
1871
+ ):
1872
+ """
1873
+ Compute the mean velocity at each pixel of he cube.
1874
+
1875
+ :param return_format: [str] [default is 'geotiff'] --- Type of the file to be returned ('nc' or 'geotiff')
1876
+ :param return_variable: [list] [default is ['vv']] --- Which variable's mean must be returned
1877
+ :param save: [bool] [default is True] --- If True, save the file to path_save
1878
+ :param path_save: [str | None] [default is None] --- Path where to save the mean velocity file
1879
+
1880
+ :return: xr dataset, with vx_mean, the mean of vx and vy_mean the mean of vy
1881
+ """
1882
+ time_dim = "mid_date" if "mid_date" in self.ds.dims else "time"
1883
+ vx_mean = self.ds["vx"].mean(dim=time_dim)
1884
+ vy_mean = self.ds["vy"].mean(dim=time_dim)
1885
+ dico_variable = {"vx": vx_mean, "vy": vy_mean}
1886
+ if "vv" in return_variable:
1887
+ vv_mean = np.sqrt(vx_mean**2 + vy_mean**2)
1888
+ dico_variable["vv"] = vv_mean
1889
+
1890
+ if return_format == "nc":
1891
+ ds_mean = xr.Dataset({})
1892
+ coords = {"y": self.ds.y, "x": self.ds.x}
1893
+ for variable in return_variable:
1894
+ ds_mean[f"{variable}_mean"] = xr.DataArray(dico_variable[variable], dims=["y", "x"], coords=coords)
1895
+ if save:
1896
+ ds_mean.to_netcdf(path_save)
1897
+ return ds_mean
1898
+
1899
+ elif return_format == "geotiff":
1900
+ ds_mean = []
1901
+ for variable in return_variable:
1902
+ mean_v = dico_variable[variable].to_numpy().astype(np.float32)
1903
+ mean_v = np.flip(mean_v.T, axis=0)
1904
+
1905
+ if save:
1906
+ # Create the GeoTIFF file
1907
+ with rasterio.open(
1908
+ f"{path_save}/mean_velocity_{variable}.tif",
1909
+ "w",
1910
+ driver="GTiff",
1911
+ height=mean_v.shape[0],
1912
+ width=mean_v.shape[1],
1913
+ count=1,
1914
+ dtype=str(mean_v.dtype),
1915
+ crs=CRS.from_proj4(self.ds.proj4),
1916
+ transform=self.ds.rio.transform(),
1917
+ ) as dst:
1918
+ dst.write(mean_v, 1)
1919
+
1920
+ ds_mean.append(mean_v)
1921
+
1922
+ return ds_mean
1923
+ else:
1924
+ raise ValueError("Please enter geotiff or nc")
1925
+
1926
+ def compute_heatmap_moving(
1927
+ self,
1928
+ points_heatmap: pd.DataFrame,
1929
+ variable: str = "vv",
1930
+ method_interp: str = "linear",
1931
+ verbose: bool = False,
1932
+ freq: str = "MS",
1933
+ method: str = "mean",
1934
+ ) -> pd.DataFrame:
1935
+ """
1936
+ Compute a heatmap of the average monthly velocity, average all the velocities which are overlapping a given month
1937
+
1938
+ :param points_heatmap: Points where the heatmap is to be computed
1939
+ :param variable: What variable is to be computed ('vx', 'vy' or 'vv')
1940
+ :param method_interp: Interpolation method used to determine the value at a specified point from the discrete velocities data
1941
+ :param freq: frequency used in the pandas.date_range function (default: 'MS' every first day of the month)
1942
+ :param method: 'mean' or 'median'
1943
+ :param verbose: Print information throughout the process (default is False)
1944
+
1945
+
1946
+ :return: pandas DataFrame, heatmap values where each line corresponds to a date and each row to a point of the line
1947
+ """
1948
+
1949
+ date1 = self.date1_()
1950
+ date2 = self.date2_()
1951
+ # Create a DateTimeIndex range spanning from the minimum date to the maximum date
1952
+ date_range = pd.date_range(np.nanmin(date1), np.nanmax(date2), freq=freq) # 'MS' for start of each month
1953
+ data = np.column_stack((date1, date2)) # Combine date1 and date2 into a single 2D array
1954
+ # Sort data according to the first date
1955
+ data = np.ma.array(sorted(data, key=lambda date: date[0])) # Sort according to the first date
1956
+
1957
+ # Find the index of the dates that have to be averaged, to get the heatmap
1958
+ # Each value of the heatmap corresponds to an average of all the velocities which are overlapping a given period
1959
+ save_line = [[] for _ in range(len(date_range) - 1)]
1960
+ for i_date, _ in enumerate(date_range[:-1]):
1961
+ i = 0
1962
+ while i < data.shape[0] and date_range[i_date + 1] >= data[i, 0]:
1963
+ if date_range[i_date] <= data[i, 1]:
1964
+ save_line[i_date].append(i)
1965
+ i += 1
1966
+ interval_output = pd.Series(
1967
+ [(date_range[k + 1] - date_range[k]) / np.timedelta64(1, "D") for k in range(date_range.shape[0] - 1)]
1968
+ )
1969
+ dates_c = date_range[1:] - pd.to_timedelta((interval_output / 2).astype("int"), "D")
1970
+ del interval_output, date_range, data
1971
+
1972
+ def data_temporalpoint(k: int, points_heatmap):
1973
+ """Get the data at a given spatial point contained in points_heatmap"""
1974
+
1975
+ geopoint = points_heatmap[
1976
+ "geometry"
1977
+ ].iloc[
1978
+ k
1979
+ ] # Return a point at the specified distance along a linear geometric object. # True -> interpretate k/n as fraction and not meters
1980
+
1981
+ i, j = geopoint.x, geopoint.y
1982
+ if verbose:
1983
+ print("i,j", i, j)
1984
+
1985
+ if variable == "vv":
1986
+ v = np.sqrt(
1987
+ self.ds["vx"].interp(x=i, y=j, method=method_interp).load() ** 2
1988
+ + self.ds["vy"].interp(x=i, y=j, method="linear").load() ** 2
1989
+ )
1990
+ elif variable == "vx" or variable == "vy":
1991
+ v = self.ds[variable].interp(x=i, y=j, method=method_interp).load()
1992
+
1993
+ data = np.array([date1, date2, v.values], dtype=object).T
1994
+ data = np.ma.array(sorted(data, key=lambda date: date[0])) # Slort according to the first date
1995
+
1996
+ return data[:, 2]
1997
+
1998
+ for k in range(len(points_heatmap)):
1999
+ if verbose:
2000
+ print("k", k)
2001
+
2002
+ data = data_temporalpoint(k, points_heatmap)
2003
+ vvmasked = np.ma.masked_invalid(np.ma.array(data, dtype="float"))
2004
+
2005
+ if method == "mean":
2006
+ vvmean = [np.ma.mean(vvmasked[lines]) for lines in save_line]
2007
+ elif method == "median":
2008
+ vvmean = [np.ma.median(vvmasked[lines]) for lines in save_line]
2009
+
2010
+ vvdf = pd.DataFrame(vvmean, index=dates_c, columns=[points_heatmap["distance"].iloc[k] / 1000])
2011
+
2012
+ if k > 0:
2013
+ line_df_vv = pd.concat([line_df_vv, vvdf], join="inner", axis=1)
2014
+ else:
2015
+ line_df_vv = vvdf
2016
+
2017
+ return line_df_vv
2018
+
2019
+ # @jit(nopython=True)
2020
+ def nvvc(self, nb_cpu=8, verbose=True):
2021
+ """
2022
+ Compute the Normalized Coherence Vector Velocity for every pixel of the cube.
2023
+
2024
+ """
2025
+
2026
+ def ncvv_pixel(cube, i, j):
2027
+ return (
2028
+ np.sqrt(
2029
+ np.nansum(
2030
+ cube.ds["vx"].isel(x=i, y=j)
2031
+ / np.sqrt(cube.ds["vx"].isel(x=i, y=j) ** 2 + cube.ds["vy"].isel(x=i, y=j) ** 2)
2032
+ )
2033
+ ** 2
2034
+ + np.nansum(
2035
+ cube.ds["vy"].isel(x=i, y=j)
2036
+ / np.sqrt(cube.ds["vx"].isel(x=i, y=j) ** 2 + cube.ds["vy"].isel(x=i, y=j) ** 2)
2037
+ )
2038
+ ** 2
2039
+ )
2040
+ / cube.nz
2041
+ )
2042
+
2043
+ xy_values = itertools.product(range(self.nx), range(self.ny))
2044
+ xy_values_tqdm = tqdm(xy_values, total=self.nx * self.ny, mininterval=0.5)
2045
+
2046
+ return np.array(
2047
+ Parallel(n_jobs=nb_cpu, verbose=0)(
2048
+ delayed(ncvv_pixel)(self, i, j) for i, j in (xy_values_tqdm if verbose else xy_values)
2049
+ )
2050
+ ).reshape(self.nx, self.ny)
2051
+
2052
+ def compute_med_stable_areas(
2053
+ self, shapefile_path, return_as="dataframe", stat_name="med", var_list=["vx", "vy"], invert=True
2054
+ ):
2055
+ """
2056
+ Compute MAD per time step using Dask and apply_ufunc over a shapefile-defined area.
2057
+
2058
+ Parameters:
2059
+
2060
+ shapefile_path (str): Path to shapefile.
2061
+ return_as (str): 'dataframe' or 'cube'.
2062
+ stat_name (str): Base variable name for new data.
2063
+ invert (bool): Whether to invert the shapefile mask.
2064
+
2065
+ Returns:
2066
+ pd.DataFrame or xr.Dataset
2067
+ """
2068
+ # Ensure data has Dask chunks
2069
+ # self.ds = self.ds.chunk({'y': -1, 'x': -1, 'mid_date': 10})
2070
+ print(var_list)
2071
+ # Clip with shapefile
2072
+ gdf = gpd.read_file(shapefile_path)
2073
+ gdf = gdf.to_crs(self.ds.rio.crs)
2074
+ masked = self.ds.rio.clip(gdf.geometry, gdf.crs, drop=False, all_touched=True, invert=invert)
2075
+
2076
+ print("Clipped")
2077
+
2078
+ # Return as DataFrame
2079
+ if return_as == "dataframe":
2080
+ df_vx = (
2081
+ masked["vx"]
2082
+ .median(dim=["x", "y"])
2083
+ .compute()
2084
+ .to_dataframe(name=f"{stat_name}_vx")
2085
+ .reset_index()[["mid_date", f"{stat_name}_vx"]]
2086
+ )
2087
+ df_vy = (
2088
+ masked["vy"]
2089
+ .median(dim=["x", "y"])
2090
+ .compute()
2091
+ .to_dataframe(name=f"{stat_name}_vy")
2092
+ .reset_index()[["mid_date", f"{stat_name}_vy"]]
2093
+ )
2094
+ if len(var_list) == 3:
2095
+ df_v = (
2096
+ masked[var_list[2]]
2097
+ .median(dim=["x", "y"])
2098
+ .compute()
2099
+ .to_dataframe(name=f"{stat_name}_v")
2100
+ .reset_index()[["mid_date", f"{stat_name}_v"]]
2101
+ )
2102
+
2103
+ # Merge on time coordinate (e.g., 'mid_date')
2104
+ if len(var_list) == 3:
2105
+ merged_df = reduce(
2106
+ lambda left, right: pd.merge(left, right, on="mid_date", how="outer"), [df_vx, df_vy, df_v]
2107
+ )
2108
+ else:
2109
+ merged_df = pd.merge(df_vx, df_vy, on="mid_date")
2110
+
2111
+ return merged_df
2112
+
2113
+ # # Return as cube
2114
+ # elif return_as == 'cube':
2115
+ # return self.assign({f'{stat_name}_vx': mad_results['vx'], f'{stat_name}_vy': mad_results['vy']})
2116
+
2117
+ else:
2118
+ raise ValueError("return_as must be 'dataframe' or 'cube'")
2119
+
2120
+ def compute_mad(self, shapefile_path, return_as="dataframe", stat_name="mad", var_list=["vx", "vy"], invert=True):
2121
+ """
2122
+ Compute MAD per time step using Dask and apply_ufunc over a shapefile-defined area.
2123
+
2124
+ Parameters:
2125
+
2126
+ shapefile_path (str): Path to shapefile.
2127
+ return_as (str): 'dataframe' or 'cube'.
2128
+ stat_name (str): Base variable name for new data.
2129
+ invert (bool): Whether to invert the shapefile mask.
2130
+
2131
+ Returns:
2132
+ pd.DataFrame or xr.Dataset
2133
+ """
2134
+ # Ensure data has Dask chunks
2135
+ self.ds = self.ds.chunk({"y": -1, "x": -1, "mid_date": 10})
2136
+ print(var_list)
2137
+ # Clip with shapefile
2138
+ gdf = gpd.read_file(shapefile_path)
2139
+ gdf = gdf.to_crs(self.ds.rio.crs)
2140
+ masked = self.ds.rio.clip(gdf.geometry, gdf.crs, drop=False, all_touched=True, invert=invert)
2141
+
2142
+ print("Clipped")
2143
+
2144
+ # Define MAD function
2145
+ def mad_2d(arr):
2146
+ median = np.nanmedian(arr)
2147
+ return 1.483 * np.nanmedian(np.abs(arr - median))
2148
+
2149
+ mad_results = {} # Store MAD DataArrays
2150
+
2151
+ for var in var_list:
2152
+ data = masked[var]
2153
+
2154
+ mad = xr.apply_ufunc(
2155
+ mad_2d,
2156
+ data,
2157
+ input_core_dims=[["y", "x"]],
2158
+ output_core_dims=[[]],
2159
+ vectorize=True,
2160
+ dask="parallelized",
2161
+ output_dtypes=[data.dtype],
2162
+ )
2163
+
2164
+ mad.name = f"{stat_name}_{var}"
2165
+ mad_results[var] = mad
2166
+
2167
+ # Return as DataFrame
2168
+ if return_as == "dataframe":
2169
+ df_vx = (
2170
+ mad_results["vx"]
2171
+ .compute()
2172
+ .to_dataframe(name=f"{stat_name}_vx")
2173
+ .reset_index()[["mid_date", f"{stat_name}_vx"]]
2174
+ )
2175
+ df_vy = (
2176
+ mad_results["vy"]
2177
+ .compute()
2178
+ .to_dataframe(name=f"{stat_name}_vy")
2179
+ .reset_index()[["mid_date", f"{stat_name}_vy"]]
2180
+ )
2181
+ if len(var_list) == 3:
2182
+ df_v = (
2183
+ mad_results[var_list[2]]
2184
+ .compute()
2185
+ .to_dataframe(name=f"{stat_name}_v")
2186
+ .reset_index()[["mid_date", f"{stat_name}_v"]]
2187
+ )
2188
+
2189
+ # Merge on time coordinate (e.g., 'mid_date')
2190
+ if len(var_list) == 3:
2191
+ merged_df = reduce(
2192
+ lambda left, right: pd.merge(left, right, on="mid_date", how="outer"), [df_vx, df_vy, df_v]
2193
+ )
2194
+ else:
2195
+ merged_df = pd.merge(df_vx, df_vy, on="mid_date")
2196
+
2197
+ return merged_df
2198
+
2199
+ # Return as cube
2200
+ elif return_as == "cube":
2201
+ return self.assign({f"{stat_name}_vx": mad_results["vx"], f"{stat_name}_vy": mad_results["vy"]})
2202
+
2203
+ else:
2204
+ raise ValueError("return_as must be 'dataframe' or 'cube'")