ticoi 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ticoi might be problematic. Click here for more details.
- ticoi/__about__.py +1 -0
- ticoi/__init__.py +0 -0
- ticoi/core.py +1500 -0
- ticoi/cube_data_classxr.py +2204 -0
- ticoi/cube_writer.py +741 -0
- ticoi/example.py +81 -0
- ticoi/filtering_functions.py +676 -0
- ticoi/interpolation_functions.py +236 -0
- ticoi/inversion_functions.py +1015 -0
- ticoi/mjd2date.py +31 -0
- ticoi/optimize_coefficient_functions.py +264 -0
- ticoi/pixel_class.py +1830 -0
- ticoi/seasonality_functions.py +209 -0
- ticoi/utils.py +725 -0
- ticoi-0.0.1.dist-info/METADATA +152 -0
- ticoi-0.0.1.dist-info/RECORD +18 -0
- ticoi-0.0.1.dist-info/WHEEL +4 -0
- ticoi-0.0.1.dist-info/licenses/LICENSE +165 -0
|
@@ -0,0 +1,2204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Class object to store and manipulate velocity observation data
|
|
3
|
+
|
|
4
|
+
Author : Laurane Charrier, Lei Guo, Nathan Lioret
|
|
5
|
+
Reference:
|
|
6
|
+
Charrier, L., Yan, Y., Koeniguer, E. C., Leinss, S., & Trouvé, E. (2021). Extraction of velocity time series with an optimal temporal sampling from displacement
|
|
7
|
+
observation networks. IEEE Transactions on Geoscience and Remote Sensing.
|
|
8
|
+
Charrier, L., Yan, Y., Colin Koeniguer, E., Mouginot, J., Millan, R., & Trouvé, E. (2022). Fusion of multi-temporal and multi-sensor ice velocity observations.
|
|
9
|
+
ISPRS annals of the photogrammetry, remote sensing and spatial information sciences, 3, 311-318.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import itertools
|
|
13
|
+
import os
|
|
14
|
+
import time
|
|
15
|
+
import warnings
|
|
16
|
+
from functools import reduce
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
import dask
|
|
20
|
+
import dask.array as da
|
|
21
|
+
import geopandas
|
|
22
|
+
import geopandas as gpd
|
|
23
|
+
import numpy as np
|
|
24
|
+
import pandas as pd
|
|
25
|
+
import rasterio as rio
|
|
26
|
+
import rasterio.enums
|
|
27
|
+
import rasterio.warp
|
|
28
|
+
import xarray as xr
|
|
29
|
+
from dask.array.lib.stride_tricks import sliding_window_view
|
|
30
|
+
from dask.diagnostics import ProgressBar
|
|
31
|
+
from joblib import Parallel, delayed
|
|
32
|
+
from pyproj import CRS, Proj, Transformer
|
|
33
|
+
from rasterio.features import rasterize
|
|
34
|
+
from tqdm import tqdm
|
|
35
|
+
|
|
36
|
+
from ticoi.filtering_functions import dask_filt_warpper, dask_smooth_wrapper
|
|
37
|
+
from ticoi.inversion_functions import construction_dates_range_np
|
|
38
|
+
from ticoi.mjd2date import mjd2date
|
|
39
|
+
|
|
40
|
+
# %% ======================================================================== #
|
|
41
|
+
# CUBE DATA CLASS #
|
|
42
|
+
# =========================================================================%% #
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CubeDataClass:
|
|
46
|
+
def __init__(self, cube=None, ds=None):
|
|
47
|
+
"""
|
|
48
|
+
Initialisation of the main attributes, or copy cube's attributes and ds dataset if given.
|
|
49
|
+
|
|
50
|
+
:param cube: [cube_data_class] --- Cube to copy
|
|
51
|
+
:param ds: [xr dataset | None] --- New dataset. If None, copy cube's dataset
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
if not isinstance(cube, CubeDataClass):
|
|
55
|
+
self.filedir = ""
|
|
56
|
+
self.filename = ""
|
|
57
|
+
self.nx = 250
|
|
58
|
+
self.ny = 250
|
|
59
|
+
self.nz = 0
|
|
60
|
+
self.author = ""
|
|
61
|
+
self.source = ""
|
|
62
|
+
self.ds = xr.Dataset({})
|
|
63
|
+
self.resolution = 50
|
|
64
|
+
self.is_TICO = False
|
|
65
|
+
|
|
66
|
+
else: # load the cube information
|
|
67
|
+
self.filedir = cube.filedir
|
|
68
|
+
self.filename = cube.filename
|
|
69
|
+
self.nx = cube.nx
|
|
70
|
+
self.ny = cube.ny
|
|
71
|
+
self.nz = cube.nz
|
|
72
|
+
self.author = cube.author
|
|
73
|
+
self.source = cube.source
|
|
74
|
+
self.ds = cube.ds if ds is None else ds
|
|
75
|
+
self.resolution = cube.resolution
|
|
76
|
+
self.is_TICO = cube.is_TICO
|
|
77
|
+
|
|
78
|
+
def update_dimension(self, time_dim: str = "mid_date"):
|
|
79
|
+
"""
|
|
80
|
+
Update the attributes corresponding to cube dimensions: nx, ny, and nz
|
|
81
|
+
|
|
82
|
+
:param time_dim: [str] [default is 'mid_date'] --- Name of the z dimension within the original dataset self.ds
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
self.nx = self.ds["x"].sizes["x"]
|
|
86
|
+
self.ny = self.ds["y"].sizes["y"]
|
|
87
|
+
self.nz = self.ds[time_dim].sizes[time_dim]
|
|
88
|
+
if len(self.ds["x"]) != 0 and len(self.ds["y"]) != 0:
|
|
89
|
+
self.resolution = self.ds["x"].values[1] - self.ds["x"].values[0]
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError("Your cube is empty, please check the subset or buffer coordinates you provided")
|
|
92
|
+
|
|
93
|
+
def subset(self, proj: str, subset: list):
|
|
94
|
+
"""
|
|
95
|
+
Crop the dataset according to 4 coordinates describing a rectangle.
|
|
96
|
+
|
|
97
|
+
:param proj: [str] --- EPSG system of the coordinates given in subset
|
|
98
|
+
:param subset: [list] --- A list of 4 float, these values are used to give a subset of the dataset : [xmin, xmax, ymax, ymin]
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
if CRS(self.ds.proj4) != CRS(proj):
|
|
102
|
+
transformer = Transformer.from_crs(
|
|
103
|
+
CRS(proj), CRS(self.ds.proj4)
|
|
104
|
+
) # convert the coordinates from proj to self.ds.proj4
|
|
105
|
+
lon1, lat1 = transformer.transform(subset[2], subset[1])
|
|
106
|
+
lon2, lat2 = transformer.transform(subset[3], subset[1])
|
|
107
|
+
lon3, lat3 = transformer.transform(subset[2], subset[1])
|
|
108
|
+
lon4, lat4 = transformer.transform(subset[3], subset[0])
|
|
109
|
+
self.ds = self.ds.sel(
|
|
110
|
+
x=slice(np.min([lon1, lon2, lon3, lon4]), np.max([lon1, lon2, lon3, lon4])),
|
|
111
|
+
y=slice(np.max([lat1, lat2, lat3, lat4]), np.min([lat1, lat2, lat3, lat4])),
|
|
112
|
+
)
|
|
113
|
+
del lon1, lon2, lon3, lon4, lat1, lat2, lat3, lat4
|
|
114
|
+
else:
|
|
115
|
+
self.ds = self.ds.sel(
|
|
116
|
+
x=slice(np.min([subset[0], subset[1]]), np.max([subset[0], subset[1]])),
|
|
117
|
+
y=slice(np.max([subset[2], subset[3]]), np.min([subset[2], subset[3]])),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if len(self.ds["x"].values) == 0 and len(self.ds["y"].values) == 0:
|
|
121
|
+
print(f"[Data loading] The given subset is not part of cube {self.filename}")
|
|
122
|
+
|
|
123
|
+
def buffer(self, proj: str, buffer: list):
|
|
124
|
+
"""
|
|
125
|
+
Crop the dataset around a given pixel, the amount of surroundings pixels kept is given by the buffer.
|
|
126
|
+
|
|
127
|
+
:param proj: [str] --- EPSG system of the coordinates given in subset
|
|
128
|
+
:param buffer: [list] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last is the buffer size
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
if CRS(self.ds.proj4) != CRS(proj): # Convert the coordinates from proj to self.ds.proj4
|
|
132
|
+
transformer = Transformer.from_crs(CRS(proj), CRS(self.ds.proj4))
|
|
133
|
+
i1, j1 = transformer.transform(buffer[1] + buffer[2], buffer[0] - buffer[2])
|
|
134
|
+
i2, j2 = transformer.transform(buffer[1] - buffer[2], buffer[0] + buffer[2])
|
|
135
|
+
i3, j3 = transformer.transform(buffer[1] + buffer[2], buffer[0] + buffer[2])
|
|
136
|
+
i4, j4 = transformer.transform(buffer[1] - buffer[2], buffer[0] - buffer[2])
|
|
137
|
+
self.ds = self.ds.sel(
|
|
138
|
+
x=slice(np.min([i1, i2, i3, i4]), np.max([i1, i2, i3, i4])),
|
|
139
|
+
y=slice(np.max([j1, j2, j3, j4]), np.min([j1, j2, j3, j4])),
|
|
140
|
+
)
|
|
141
|
+
del i1, i2, j1, j2, i3, i4, j3, j4
|
|
142
|
+
else:
|
|
143
|
+
i1, j1 = buffer[0] - buffer[2], buffer[1] + buffer[2]
|
|
144
|
+
i2, j2 = buffer[0] + buffer[2], buffer[1] - buffer[2]
|
|
145
|
+
self.ds = self.ds.sel(
|
|
146
|
+
x=slice(np.min([i1, i2]), np.max([i1, i2])), y=slice(np.max([j1, j2]), np.min([j1, j2]))
|
|
147
|
+
)
|
|
148
|
+
del i1, i2, j1, j2, buffer
|
|
149
|
+
|
|
150
|
+
if len(self.ds["x"].values) == 0 and len(self.ds["y"].values) == 0:
|
|
151
|
+
print(f"[Data loading] The given pixel and its surrounding buffer are not part of cube {self.filename}")
|
|
152
|
+
|
|
153
|
+
def determine_optimal_chunk_size(
|
|
154
|
+
self,
|
|
155
|
+
variable_name: str = "vx",
|
|
156
|
+
x_dim: str = "x",
|
|
157
|
+
y_dim: str = "y",
|
|
158
|
+
time_dim: str = "mid_date",
|
|
159
|
+
verbose: bool = False,
|
|
160
|
+
) -> (int, int, int): # type: ignore
|
|
161
|
+
"""
|
|
162
|
+
A function to determine the optimal chunk size for a given time series array based on its size.
|
|
163
|
+
This function is from gtsa DOI 10.5281/zenodo.8188085.
|
|
164
|
+
|
|
165
|
+
:param variable_name: [str] [default is 'vx'] --- Name of the variable containing the time series array
|
|
166
|
+
:param x_dim: [str] [default is 'x'] --- Name of the x dimension in the array
|
|
167
|
+
:param y_dim: [str] [default is 'y'] --- Name of the y dimension in the array
|
|
168
|
+
:param time_dim: [str] [default is 'mid_date'] --- Name of the z dimension within the original dataset self.ds
|
|
169
|
+
:param verbose: [bool] [default is False] --- Boolean flag to control verbosity of output
|
|
170
|
+
|
|
171
|
+
:return tc: [int] --- Chunk size along the time dimension
|
|
172
|
+
:return yc: [int] --- Chunk size along the y dimension
|
|
173
|
+
:return xc: [int] --- Chunk size along the x dimension
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
if verbose:
|
|
177
|
+
print("[Data loading] Dask chunk size:")
|
|
178
|
+
|
|
179
|
+
# set chunk size to 5 MB if single time series array < 1 MB in size, else increase to max of 1 GB chunk sizes.
|
|
180
|
+
time_series_array_size = (
|
|
181
|
+
self.ds[variable_name]
|
|
182
|
+
.sel(
|
|
183
|
+
{
|
|
184
|
+
x_dim: self.ds[variable_name][x_dim].values[0],
|
|
185
|
+
y_dim: self.ds[variable_name][y_dim].values[0],
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
.nbytes
|
|
189
|
+
)
|
|
190
|
+
mb = 1048576
|
|
191
|
+
if time_series_array_size < 1e6:
|
|
192
|
+
chunk_size_limit = 50 * mb
|
|
193
|
+
elif time_series_array_size < 1e7:
|
|
194
|
+
chunk_size_limit = 100 * mb
|
|
195
|
+
elif time_series_array_size < 1e8:
|
|
196
|
+
chunk_size_limit = 200 * mb
|
|
197
|
+
else:
|
|
198
|
+
chunk_size_limit = 1000 * mb
|
|
199
|
+
|
|
200
|
+
time_axis = self.ds[variable_name].dims.index(time_dim)
|
|
201
|
+
x_axis = self.ds[variable_name].dims.index(x_dim)
|
|
202
|
+
y_axis = self.ds[variable_name].dims.index(y_dim)
|
|
203
|
+
axis_sizes = {i: -1 if i == time_axis else "auto" for i in range(3)}
|
|
204
|
+
arr = self.ds[variable_name].data.rechunk(axis_sizes, block_size_limit=chunk_size_limit, balance=True)
|
|
205
|
+
tc, yc, xc = arr.chunks[time_axis][0], arr.chunks[y_axis][0], arr.chunks[x_axis][0]
|
|
206
|
+
chunksize = self.ds[variable_name][:tc, :yc, :xc].nbytes / 1e6
|
|
207
|
+
if verbose:
|
|
208
|
+
print("[Data loading] Chunk shape:", "(" + ",".join([str(x) for x in [tc, yc, xc]]) + ")")
|
|
209
|
+
print(
|
|
210
|
+
"[Data loading] Chunk size:",
|
|
211
|
+
self.ds[variable_name][:tc, :yc, :xc].nbytes,
|
|
212
|
+
"(" + str(round(chunksize, 1)) + "MB)",
|
|
213
|
+
)
|
|
214
|
+
return tc, yc, xc
|
|
215
|
+
|
|
216
|
+
# %% ==================================================================== #
|
|
217
|
+
# CUBE LOADING METHODS #
|
|
218
|
+
# =====================================================================%% #
|
|
219
|
+
|
|
220
|
+
def load_itslive(
|
|
221
|
+
self,
|
|
222
|
+
filepath: str,
|
|
223
|
+
conf: bool = False,
|
|
224
|
+
subset: list | None = None,
|
|
225
|
+
buffer: list | None = None,
|
|
226
|
+
pick_date: list | None = None,
|
|
227
|
+
pick_sensor: list | None = None,
|
|
228
|
+
pick_temp_bas: list | None = None,
|
|
229
|
+
proj: str = "EPSG:4326",
|
|
230
|
+
verbose: bool = False,
|
|
231
|
+
):
|
|
232
|
+
"""
|
|
233
|
+
Load a cube dataset written by ITS_LIVE.
|
|
234
|
+
|
|
235
|
+
:param filepath: [str] --- Filepath of the dataset
|
|
236
|
+
:param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
|
|
237
|
+
:param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
|
|
238
|
+
:param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
|
|
239
|
+
:param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
|
|
240
|
+
:param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
|
|
241
|
+
:param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
|
|
242
|
+
:param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
|
|
243
|
+
:param verbose: [bool] [default is False] --- Print information throughout the process
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
if verbose:
|
|
247
|
+
print(f"[Data loading] Path to cube file : {filepath}")
|
|
248
|
+
|
|
249
|
+
self.filedir = os.path.dirname(filepath) # Path were is stored the netcdf file
|
|
250
|
+
self.filename = os.path.basename(filepath) # Name of the netcdf file
|
|
251
|
+
self.ds = self.ds.assign_attrs({"proj4": self.ds["mapping"].proj4text})
|
|
252
|
+
self.author = self.ds.author.split(", a NASA")[0]
|
|
253
|
+
self.source = self.ds.url
|
|
254
|
+
|
|
255
|
+
if subset is not None: # Crop according to 4 coordinates
|
|
256
|
+
self.subset(proj, subset)
|
|
257
|
+
elif buffer is not None: # Crop the dataset around a given pixel, according to a given buffer
|
|
258
|
+
self.buffer(proj, buffer)
|
|
259
|
+
if pick_date is not None:
|
|
260
|
+
self.ds = self.ds.where(
|
|
261
|
+
(
|
|
262
|
+
(self.ds["acquisition_date_img1"] >= np.datetime64(pick_date[0]))
|
|
263
|
+
& (self.ds["acquisition_date_img2"] <= np.datetime64(pick_date[1]))
|
|
264
|
+
).compute(),
|
|
265
|
+
drop=True,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
self.update_dimension() # Update self.nx,self.ny,self.nz
|
|
269
|
+
|
|
270
|
+
if conf:
|
|
271
|
+
minconfx = np.nanmin(self.ds["vx_error"].values[:])
|
|
272
|
+
maxconfx = np.nanmax(self.ds["vx_error"].values[:])
|
|
273
|
+
minconfy = np.nanmin(self.ds["vy_error"].values[:])
|
|
274
|
+
maxconfy = np.nanmax(self.ds["vy_error"].values[:])
|
|
275
|
+
|
|
276
|
+
date1 = np.array([np.datetime64(date_str, "D") for date_str in self.ds["acquisition_date_img1"].values])
|
|
277
|
+
date2 = np.array([np.datetime64(date_str, "D") for date_str in self.ds["acquisition_date_img2"].values])
|
|
278
|
+
|
|
279
|
+
# np.char.strip is used to remove the null character ('�') from each element and np.core.defchararray.add to
|
|
280
|
+
# concatenate array of different types
|
|
281
|
+
try:
|
|
282
|
+
sensor = np._core.defchararray.add(
|
|
283
|
+
np.char.strip(self.ds["mission_img1"].values.astype(str), "�"),
|
|
284
|
+
np.char.strip(self.ds["satellite_img1"].values.astype(str), "�"),
|
|
285
|
+
).astype("U10")
|
|
286
|
+
except AttributeError: # in old numpy version module 'numpy._core' has no attribute 'defchararray'
|
|
287
|
+
sensor = np.core.defchararray.add(
|
|
288
|
+
np.char.strip(self.ds["mission_img1"].values.astype(str), "�"),
|
|
289
|
+
np.char.strip(self.ds["satellite_img1"].values.astype(str), "�"),
|
|
290
|
+
).astype("U10")
|
|
291
|
+
sensor[sensor == "L7"] = "Landsat-7"
|
|
292
|
+
sensor[sensor == "L8"] = "Landsat-8"
|
|
293
|
+
sensor[sensor == "L9"] = "Landsat-9"
|
|
294
|
+
sensor[np.isin(sensor, ["S1A", "S1B"])] = "Sentinel-1"
|
|
295
|
+
sensor[np.isin(sensor, ["S2A", "S2B"])] = "Sentinel-2"
|
|
296
|
+
|
|
297
|
+
if conf: # Normalize the error between 0 and 1, and convert error in confidence
|
|
298
|
+
errorx = 1 - (self.ds["vx_error"].values - minconfx) / (maxconfx - minconfx)
|
|
299
|
+
errory = 1 - (self.ds["vy_error"].values - minconfy) / (maxconfy - minconfy)
|
|
300
|
+
else:
|
|
301
|
+
errorx = self.ds["vx_error"].values
|
|
302
|
+
errory = self.ds["vy_error"].values
|
|
303
|
+
|
|
304
|
+
# Drop variables not in the specified list
|
|
305
|
+
variables_to_keep = ["vx", "vy", "mid_date", "x", "y"]
|
|
306
|
+
self.ds = self.ds.drop_vars([var for var in self.ds.variables if var not in variables_to_keep])
|
|
307
|
+
# Drop attributes not in the specified list
|
|
308
|
+
attributes_to_keep = ["date_created", "mapping", "author", "proj4"]
|
|
309
|
+
self.ds.attrs = {attr: self.ds.attrs[attr] for attr in attributes_to_keep if attr in self.ds.attrs}
|
|
310
|
+
|
|
311
|
+
# self.ds = self.ds.unify_chunks() # to avoid error ValueError: Object has inconsistent chunks along
|
|
312
|
+
# dimension mid_date. This can be fixed by calling unify_chunks(). Create new variable and chunk them
|
|
313
|
+
self.ds["sensor"] = xr.DataArray(sensor, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
|
|
314
|
+
self.ds = self.ds.unify_chunks()
|
|
315
|
+
self.ds["date1"] = xr.DataArray(date1, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
|
|
316
|
+
self.ds = self.ds.unify_chunks()
|
|
317
|
+
self.ds["date2"] = xr.DataArray(date2, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
|
|
318
|
+
self.ds = self.ds.unify_chunks()
|
|
319
|
+
self.ds["source"] = xr.DataArray(["ITS_LIVE"] * self.nz, dims="mid_date").chunk(
|
|
320
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
321
|
+
)
|
|
322
|
+
self.ds = self.ds.unify_chunks()
|
|
323
|
+
self.ds["errorx"] = xr.DataArray(errorx, dims=["mid_date"], coords={"mid_date": self.ds.mid_date}).chunk(
|
|
324
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
325
|
+
)
|
|
326
|
+
self.ds = self.ds.unify_chunks()
|
|
327
|
+
self.ds["errory"] = xr.DataArray(errory, dims=["mid_date"], coords={"mid_date": self.ds.mid_date}).chunk(
|
|
328
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
if pick_sensor is not None:
|
|
332
|
+
self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
|
|
333
|
+
if pick_temp_bas is not None:
|
|
334
|
+
temp = (self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")
|
|
335
|
+
self.ds = self.ds.where(((pick_temp_bas[0] < temp) & (temp < pick_temp_bas[1])).compute(), drop=True)
|
|
336
|
+
del temp
|
|
337
|
+
self.ds = self.ds.unify_chunks()
|
|
338
|
+
|
|
339
|
+
def load_millan(
|
|
340
|
+
self,
|
|
341
|
+
filepath: str,
|
|
342
|
+
conf: bool = False,
|
|
343
|
+
subset: list | None = None,
|
|
344
|
+
buffer: list | None = None,
|
|
345
|
+
pick_date: list | None = None,
|
|
346
|
+
pick_sensor: list | None = None,
|
|
347
|
+
pick_temp_bas: list | None = None,
|
|
348
|
+
proj: str = "EPSG:4326",
|
|
349
|
+
verbose: bool = False,
|
|
350
|
+
):
|
|
351
|
+
"""
|
|
352
|
+
Load a cube dataset written by R. Millan et al.
|
|
353
|
+
|
|
354
|
+
:param filepath: [str] --- Filepath of the dataset
|
|
355
|
+
:param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
|
|
356
|
+
:param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
|
|
357
|
+
:param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
|
|
358
|
+
:param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
|
|
359
|
+
:param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
|
|
360
|
+
:param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
|
|
361
|
+
:param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
|
|
362
|
+
:param verbose: [bool] [default is False] --- Print information throughout the process
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
if verbose:
|
|
366
|
+
print(f"[Data loading] Path to cube file : {filepath}")
|
|
367
|
+
|
|
368
|
+
self.filedir = os.path.dirname(filepath)
|
|
369
|
+
self.filename = os.path.basename(filepath) # name of the netcdf file
|
|
370
|
+
self.author = "IGE" # name of the author
|
|
371
|
+
self.source = self.ds.source
|
|
372
|
+
del filepath
|
|
373
|
+
|
|
374
|
+
# self.split_cube(n_split=2, dim=['x', 'y'], savepath=f"{self.filedir}/{self.filename[:-3]}_")
|
|
375
|
+
|
|
376
|
+
if subset is not None: # Crop according to 4 coordinates
|
|
377
|
+
self.subset(proj, subset)
|
|
378
|
+
elif buffer is not None: # Crop the dataset around a given pixel, according to a given buffer
|
|
379
|
+
self.buffer(proj, buffer)
|
|
380
|
+
|
|
381
|
+
# Uniformization of the name and format of the time coordinate
|
|
382
|
+
self.ds = self.ds.rename({"z": "mid_date"})
|
|
383
|
+
|
|
384
|
+
date1 = [mjd2date(date_str) for date_str in self.ds["date1"].values] # conversion in date
|
|
385
|
+
date2 = [mjd2date(date_str) for date_str in self.ds["date2"].values]
|
|
386
|
+
self.ds = self.ds.unify_chunks()
|
|
387
|
+
self.ds["date1"] = xr.DataArray(np.array(date1).astype("datetime64[ns]"), dims="mid_date").chunk(
|
|
388
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
389
|
+
)
|
|
390
|
+
self.ds = self.ds.unify_chunks()
|
|
391
|
+
self.ds["date2"] = xr.DataArray(np.array(date2).astype("datetime64[ns]"), dims="mid_date").chunk(
|
|
392
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
393
|
+
)
|
|
394
|
+
self.ds = self.ds.unify_chunks()
|
|
395
|
+
del date1, date2
|
|
396
|
+
|
|
397
|
+
# Temporal subset between two dates
|
|
398
|
+
if pick_date is not None:
|
|
399
|
+
self.ds = self.ds.where(
|
|
400
|
+
(
|
|
401
|
+
(self.ds["date1"] >= np.datetime64(pick_date[0]))
|
|
402
|
+
& (self.ds["date2"] <= np.datetime64(pick_date[1]))
|
|
403
|
+
).compute(),
|
|
404
|
+
drop=True,
|
|
405
|
+
)
|
|
406
|
+
del pick_date
|
|
407
|
+
|
|
408
|
+
self.ds = self.ds.assign_coords(
|
|
409
|
+
mid_date=np.array(self.ds["date1"] + (self.ds["date2"] - self.ds["date1"]) // 2)
|
|
410
|
+
)
|
|
411
|
+
self.update_dimension()
|
|
412
|
+
|
|
413
|
+
if conf and "confx" not in self.ds.data_vars: # convert the errors into confidence indicators between 0 and 1
|
|
414
|
+
minconfx = np.nanmin(self.ds["error_vx"].values[:])
|
|
415
|
+
maxconfx = np.nanmax(self.ds["error_vx"].values[:])
|
|
416
|
+
minconfy = np.nanmin(self.ds["error_vy"].values[:])
|
|
417
|
+
maxconfy = np.nanmax(self.ds["error_vy"].values[:])
|
|
418
|
+
errorx = 1 - (self.ds["error_vx"].values - minconfx) / (maxconfx - minconfx)
|
|
419
|
+
errory = 1 - (self.ds["error_vy"].values - minconfy) / (maxconfy - minconfy)
|
|
420
|
+
else:
|
|
421
|
+
errorx = self.ds["error_vx"].values[:]
|
|
422
|
+
errory = self.ds["error_vy"].values[:]
|
|
423
|
+
|
|
424
|
+
# Homogenize sensors names
|
|
425
|
+
sensor = np.char.strip(
|
|
426
|
+
self.ds["sensor"].values.astype(str), "�"
|
|
427
|
+
) # np.char.strip is used to remove the null character ('�') from each element
|
|
428
|
+
sensor[np.isin(sensor, ["S1"])] = "Sentinel-1"
|
|
429
|
+
sensor[np.isin(sensor, ["S2"])] = "Sentinel-2"
|
|
430
|
+
sensor[np.isin(sensor, ["landsat-8", "L8", "L8. "])] = "Landsat-8"
|
|
431
|
+
|
|
432
|
+
# Drop variables not in the specified list
|
|
433
|
+
self.ds = self.ds.drop_vars(
|
|
434
|
+
[var for var in self.ds.variables if var not in ["vx", "vy", "mid_date", "x", "y", "date1", "date2"]]
|
|
435
|
+
)
|
|
436
|
+
self.ds = self.ds.transpose("mid_date", "y", "x")
|
|
437
|
+
|
|
438
|
+
# Store the variable in xarray dataset
|
|
439
|
+
self.ds["sensor"] = xr.DataArray(sensor, dims="mid_date").chunk({"mid_date": self.ds.chunks["mid_date"]})
|
|
440
|
+
del sensor
|
|
441
|
+
self.ds = self.ds.unify_chunks()
|
|
442
|
+
self.ds["source"] = xr.DataArray(["IGE"] * self.nz, dims="mid_date").chunk(
|
|
443
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
444
|
+
)
|
|
445
|
+
self.ds = self.ds.unify_chunks()
|
|
446
|
+
self.ds["errorx"] = xr.DataArray(
|
|
447
|
+
np.tile(errorx[:, np.newaxis, np.newaxis], (1, self.ny, self.nx)),
|
|
448
|
+
dims=["mid_date", "y", "x"],
|
|
449
|
+
coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
|
|
450
|
+
).chunk(chunks=self.ds.chunks)
|
|
451
|
+
self.ds = self.ds.unify_chunks()
|
|
452
|
+
self.ds["errory"] = xr.DataArray(
|
|
453
|
+
np.tile(errory[:, np.newaxis, np.newaxis], (1, self.ny, self.nx)),
|
|
454
|
+
dims=["mid_date", "y", "x"],
|
|
455
|
+
coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
|
|
456
|
+
).chunk(chunks=self.ds.chunks)
|
|
457
|
+
del errorx, errory
|
|
458
|
+
|
|
459
|
+
# Pick sensors or temporal baselines
|
|
460
|
+
if pick_sensor is not None:
|
|
461
|
+
self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
|
|
462
|
+
if pick_temp_bas is not None:
|
|
463
|
+
self.ds = self.ds.sel(
|
|
464
|
+
mid_date=(pick_temp_bas[0] < ((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")))
|
|
465
|
+
& (((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")) < pick_temp_bas[1])
|
|
466
|
+
)
|
|
467
|
+
self.ds = self.ds.unify_chunks()
|
|
468
|
+
|
|
469
|
+
def load_ducasse(
|
|
470
|
+
self,
|
|
471
|
+
filepath: str,
|
|
472
|
+
conf: bool = False,
|
|
473
|
+
subset: list | None = None,
|
|
474
|
+
buffer: list | None = None,
|
|
475
|
+
pick_date: list | None = None,
|
|
476
|
+
pick_sensor: list | None = None,
|
|
477
|
+
pick_temp_bas: list | None = None,
|
|
478
|
+
proj: str = "EPSG:4326",
|
|
479
|
+
verbose: bool = False,
|
|
480
|
+
):
|
|
481
|
+
"""
|
|
482
|
+
Load a cube dataset written by E. Ducasse et al. (Pleiades data)
|
|
483
|
+
|
|
484
|
+
:param filepath: [str] --- Filepath of the dataset
|
|
485
|
+
:param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
|
|
486
|
+
:param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
|
|
487
|
+
:param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
|
|
488
|
+
:param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
|
|
489
|
+
:param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
|
|
490
|
+
:param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
|
|
491
|
+
:param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
|
|
492
|
+
:param verbose: [bool] [default is False] --- Print information throughout the process
|
|
493
|
+
"""
|
|
494
|
+
|
|
495
|
+
if verbose:
|
|
496
|
+
print(f"[Data loading] Path to cube file : {filepath}")
|
|
497
|
+
|
|
498
|
+
self.ds = self.ds.chunk({"x": 125, "y": 125, "time": 2000}) # set chunk
|
|
499
|
+
self.filedir = os.path.dirname(filepath)
|
|
500
|
+
self.filename = os.path.basename(filepath) # name of the netcdf file
|
|
501
|
+
self.author = "IGE" # name of the author
|
|
502
|
+
del filepath
|
|
503
|
+
|
|
504
|
+
# Spatial subset
|
|
505
|
+
if subset is not None: # crop according to 4 coordinates
|
|
506
|
+
self.subset(proj, subset)
|
|
507
|
+
elif buffer is not None: # crop the dataset around a given pixel, according to a given buffer
|
|
508
|
+
self.buffer(proj, buffer)
|
|
509
|
+
|
|
510
|
+
# Uniformization of the name and format of the time coordinate
|
|
511
|
+
self.ds = self.ds.rename({"time": "mid_date"})
|
|
512
|
+
|
|
513
|
+
date1 = [date_str.split(" ")[0] for date_str in self.ds["mid_date"].values]
|
|
514
|
+
date2 = [date_str.split(" ")[1] for date_str in self.ds["mid_date"].values]
|
|
515
|
+
self.ds["date1"] = xr.DataArray(np.array(date1).astype("datetime64[ns]"), dims="mid_date").chunk(
|
|
516
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
517
|
+
)
|
|
518
|
+
self.ds["date2"] = xr.DataArray(np.array(date2).astype("datetime64[ns]"), dims="mid_date").chunk(
|
|
519
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
520
|
+
)
|
|
521
|
+
del date1, date2
|
|
522
|
+
|
|
523
|
+
# Temporal subset between two dates
|
|
524
|
+
if pick_date is not None:
|
|
525
|
+
self.ds = self.ds.where(
|
|
526
|
+
(
|
|
527
|
+
(self.ds["date1"] >= np.datetime64(pick_date[0]))
|
|
528
|
+
& (self.ds["date2"] <= np.datetime64(pick_date[1]))
|
|
529
|
+
).compute(),
|
|
530
|
+
drop=True,
|
|
531
|
+
)
|
|
532
|
+
del pick_date
|
|
533
|
+
|
|
534
|
+
self.ds = self.ds.assign_coords(
|
|
535
|
+
mid_date=np.array(self.ds["date1"] + (self.ds["date2"] - self.ds["date1"]) // 2)
|
|
536
|
+
)
|
|
537
|
+
self.update_dimension() # update self.nx, self.ny and self.nz
|
|
538
|
+
|
|
539
|
+
# Drop variables not in the specified list
|
|
540
|
+
variables_to_keep = ["vx", "vy", "mid_date", "x", "y", "date1", "date2"]
|
|
541
|
+
self.ds = self.ds.drop_vars([var for var in self.ds.variables if var not in variables_to_keep])
|
|
542
|
+
self.ds = self.ds.transpose("mid_date", "y", "x")
|
|
543
|
+
|
|
544
|
+
# Store the variable in xarray dataset
|
|
545
|
+
self.ds["sensor"] = xr.DataArray(["Pleiades"] * self.nz, dims="mid_date").chunk(
|
|
546
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
547
|
+
)
|
|
548
|
+
self.ds["source"] = xr.DataArray(["IGE"] * self.nz, dims="mid_date").chunk(
|
|
549
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
550
|
+
)
|
|
551
|
+
self.ds["vy"] = -self.ds["vy"]
|
|
552
|
+
|
|
553
|
+
# Pick sensors or temporal baselines
|
|
554
|
+
if pick_sensor is not None:
|
|
555
|
+
self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
|
|
556
|
+
if pick_temp_bas is not None:
|
|
557
|
+
self.ds = self.ds.sel(
|
|
558
|
+
mid_date=(pick_temp_bas[0] < ((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")))
|
|
559
|
+
& (((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")) < pick_temp_bas[1])
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
# Set errors equal to one (no information on the error here)
|
|
563
|
+
self.ds["errorx"] = xr.DataArray(np.ones(self.ds["mid_date"].size), dims="mid_date").chunk(
|
|
564
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
565
|
+
)
|
|
566
|
+
self.ds["errory"] = xr.DataArray(np.ones(self.ds["mid_date"].size), dims="mid_date").chunk(
|
|
567
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
def load_charrier(
|
|
571
|
+
self,
|
|
572
|
+
filepath: str,
|
|
573
|
+
conf: bool = False,
|
|
574
|
+
subset: list | None = None,
|
|
575
|
+
buffer: list | None = None,
|
|
576
|
+
pick_date: list | None = None,
|
|
577
|
+
pick_sensor: list | None = None,
|
|
578
|
+
pick_temp_bas: list | None = None,
|
|
579
|
+
proj: str = "EPSG:4326",
|
|
580
|
+
verbose: bool = False,
|
|
581
|
+
):
|
|
582
|
+
"""
|
|
583
|
+
Load a cube dataset written by L.Charrier et al.
|
|
584
|
+
|
|
585
|
+
:param filepath: [str] --- Filepath of the dataset
|
|
586
|
+
:param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
|
|
587
|
+
:param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
|
|
588
|
+
:param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
|
|
589
|
+
:param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
|
|
590
|
+
:param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
|
|
591
|
+
:param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
|
|
592
|
+
:param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
|
|
593
|
+
:param verbose: [bool] [default is False] --- Print information throughout the process
|
|
594
|
+
"""
|
|
595
|
+
|
|
596
|
+
if verbose:
|
|
597
|
+
print(f"[Data loading] Path to cube file {'(TICO cube)' if self.is_TICO else ''} : {filepath}")
|
|
598
|
+
|
|
599
|
+
# information about the cube
|
|
600
|
+
self.filedir = os.path.dirname(filepath)
|
|
601
|
+
self.filename = os.path.basename(filepath) # Name of the netcdf file
|
|
602
|
+
if self.ds.author == "J. Mouginot, R.Millan, A.Derkacheva_aligned":
|
|
603
|
+
self.author = "IGE" # Name of the author
|
|
604
|
+
else:
|
|
605
|
+
self.author = self.ds.author
|
|
606
|
+
self.source = self.ds.source
|
|
607
|
+
del filepath
|
|
608
|
+
|
|
609
|
+
# Select specific data within the cube
|
|
610
|
+
if subset is not None: # Crop according to 4 coordinates
|
|
611
|
+
self.subset(proj, subset)
|
|
612
|
+
elif buffer is not None: # Crop the dataset around a given pixel, according to a given buffer
|
|
613
|
+
self.buffer(proj, buffer)
|
|
614
|
+
|
|
615
|
+
time_dim = "mid_date" if not self.is_TICO else "second_date" # 'date2' if we load TICO data
|
|
616
|
+
self.update_dimension(time_dim)
|
|
617
|
+
|
|
618
|
+
# Temporal subset between two dates
|
|
619
|
+
if pick_date is not None:
|
|
620
|
+
if not self.is_TICO:
|
|
621
|
+
self.ds = self.ds.where(
|
|
622
|
+
(
|
|
623
|
+
(self.ds["date1"] >= np.datetime64(pick_date[0]))
|
|
624
|
+
& (self.ds["date2"] <= np.datetime64(pick_date[1]))
|
|
625
|
+
).compute(),
|
|
626
|
+
drop=True,
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
self.ds = self.ds.where(
|
|
630
|
+
(
|
|
631
|
+
(self.ds["second_date"] >= np.datetime64(pick_date[0]))
|
|
632
|
+
& (self.ds["second_date"] <= np.datetime64(pick_date[1]))
|
|
633
|
+
).compute(),
|
|
634
|
+
drop=True,
|
|
635
|
+
)
|
|
636
|
+
del pick_date
|
|
637
|
+
|
|
638
|
+
self.update_dimension(time_dim)
|
|
639
|
+
|
|
640
|
+
# Pick sensors or temporal baselines
|
|
641
|
+
if pick_sensor is not None:
|
|
642
|
+
if not self.is_TICO:
|
|
643
|
+
self.ds = self.ds.sel(mid_date=self.ds["sensor"].isin(pick_sensor))
|
|
644
|
+
else:
|
|
645
|
+
self.ds = self.ds.sel(second_date=self.ds["sensor"].isin(pick_sensor))
|
|
646
|
+
|
|
647
|
+
# Following properties are not available for TICO cubes
|
|
648
|
+
if not self.is_TICO:
|
|
649
|
+
# Pick specific temporal baselines
|
|
650
|
+
if pick_temp_bas is not None:
|
|
651
|
+
self.ds = self.ds.sel(
|
|
652
|
+
mid_date=(pick_temp_bas[0] < ((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")))
|
|
653
|
+
& (((self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")) < pick_temp_bas[1])
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
# Convert the errors into confidence indicators between 0 and 1
|
|
657
|
+
if conf and "confx" not in self.ds.data_vars:
|
|
658
|
+
minconfx = np.nanmin(self.ds["errorx"].values[:])
|
|
659
|
+
maxconfx = np.nanmax(self.ds["errorx"].values[:])
|
|
660
|
+
minconfy = np.nanmin(self.ds["errory"].values[:])
|
|
661
|
+
maxconfy = np.nanmax(self.ds["errory"].values[:])
|
|
662
|
+
errorx = 1 - (self.ds["errorx"].values - minconfx) / (maxconfx - minconfx)
|
|
663
|
+
errory = 1 - (self.ds["errory"].values - minconfy) / (maxconfy - minconfy)
|
|
664
|
+
self.ds["errorx"] = xr.DataArray(
|
|
665
|
+
errorx,
|
|
666
|
+
dims=["mid_date", "y", "x"],
|
|
667
|
+
coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
|
|
668
|
+
).chunk(chunks=self.ds.chunks)
|
|
669
|
+
self.ds["errory"] = xr.DataArray(
|
|
670
|
+
errory,
|
|
671
|
+
dims=["mid_date", "y", "x"],
|
|
672
|
+
coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
|
|
673
|
+
).chunk(chunks=self.ds.chunks)
|
|
674
|
+
|
|
675
|
+
# For cube written with write_result_TICOI
|
|
676
|
+
if "source" not in self.ds.variables:
|
|
677
|
+
self.ds["source"] = xr.DataArray([self.ds.author] * self.nz, dims="mid_date").chunk(
|
|
678
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
679
|
+
)
|
|
680
|
+
if "sensor" not in self.ds.variables:
|
|
681
|
+
self.ds["sensor"] = xr.DataArray([self.ds.sensor] * self.nz, dims="mid_date").chunk(
|
|
682
|
+
{"mid_date": self.ds.chunks["mid_date"]}
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
def load(
|
|
686
|
+
self,
|
|
687
|
+
filepath: list | str,
|
|
688
|
+
chunks: dict | str | int = {},
|
|
689
|
+
conf: bool = False,
|
|
690
|
+
subset: str | None = None,
|
|
691
|
+
buffer: str | None = None,
|
|
692
|
+
pick_date: str | None = None,
|
|
693
|
+
pick_sensor: str | None = None,
|
|
694
|
+
pick_temp_bas: str | None = None,
|
|
695
|
+
proj: str = "EPSG:4326",
|
|
696
|
+
mask: str | xr.DataArray = None,
|
|
697
|
+
reproj_coord: bool = False,
|
|
698
|
+
reproj_vel: bool = False,
|
|
699
|
+
verbose: bool = False,
|
|
700
|
+
):
|
|
701
|
+
"""
|
|
702
|
+
Load a cube dataset from a file in format netcdf (.nc) or zarr. The data are directly stored within the present object.
|
|
703
|
+
|
|
704
|
+
:param filepath: [list | str] --- Filepath of the dataset, if list of filepaths, load all the cubes and merge them
|
|
705
|
+
:param chunks: [dict] --- Dictionary with the size of chunks for each dimension, if chunks=-1 loads the dataset with dask using a single chunk for all arrays.
|
|
706
|
+
chunks={} loads the dataset with dask using engine preferred chunks if exposed by the backend, otherwise with a single chunk for all arrays,
|
|
707
|
+
chunks='auto' will use dask auto chunking taking into account the engine preferred chunks.
|
|
708
|
+
:param conf: [bool] [default is False] --- If True convert the error in confidence between 0 and 1
|
|
709
|
+
:param subset: [list | None] [default is None] --- A list of 4 float, these values are used to give a subset of the dataset in the form [xmin, xmax, ymin, ymax]
|
|
710
|
+
:param buffer: [list | None] [default is None] --- A list of 3 float, the first two are the longitude and the latitude of the central point, the last one is the buffer size
|
|
711
|
+
:param pick_date: [list | None] [default is None] --- A list of 2 string yyyy-mm-dd, pick the data between these two date
|
|
712
|
+
:param pick_sensor: [list | None] [default is None] --- A list of strings, pick only the corresponding sensors
|
|
713
|
+
:param pick_temp_bas: [list | None] [default is None] --- A list of 2 integer, pick only the data which have a temporal baseline between these two integers
|
|
714
|
+
:param proj: [str] [default is 'EPSG:4326'] --- Projection of the buffer or subset which is given
|
|
715
|
+
:param mask: [str | xr dataarray | None] [default is None] --- Mask some of the data of the cube, either a dataarray with 0 and 1, or a path to a dataarray or an .shp file
|
|
716
|
+
:param reproj_coord: [bool] [default is False] --- If True reproject the second cube of the list filepath to the grid coordinates of the first cube
|
|
717
|
+
:param reproj_vel: [bool] [default is False] --- If True reproject the velocity components, to match the coordinate system of the first cube
|
|
718
|
+
|
|
719
|
+
:param verbose: [bool] [default is False] --- Print information throughout the process
|
|
720
|
+
"""
|
|
721
|
+
self.__init__()
|
|
722
|
+
|
|
723
|
+
assert isinstance(filepath, list) or isinstance(filepath, str), (
|
|
724
|
+
f"The filepath must be a string (path to the cube file) or a list of strings, not {type(filepath)}."
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
time_dim_name = {
|
|
728
|
+
"ITS_LIVE, a NASA MEaSUREs project (its-live.jpl.nasa.gov)": "mid_date",
|
|
729
|
+
"J. Mouginot, R.Millan, A.Derkacheva": "z",
|
|
730
|
+
"J. Mouginot, R.Millan, A.Derkacheva_aligned": "mid_date",
|
|
731
|
+
"L. Charrier, L. Guo": "mid_date",
|
|
732
|
+
"L. Charrier": "mid_date",
|
|
733
|
+
"E. Ducasse": "time",
|
|
734
|
+
"S. Leinss, L. Charrier": "mid_date",
|
|
735
|
+
"IGE": "mid_date",
|
|
736
|
+
} # dictionary to set the name of time_dimension for a given author
|
|
737
|
+
|
|
738
|
+
if isinstance(filepath, list): # Merge several cubes
|
|
739
|
+
self.load(
|
|
740
|
+
filepath[0],
|
|
741
|
+
chunks=chunks,
|
|
742
|
+
conf=conf,
|
|
743
|
+
subset=subset,
|
|
744
|
+
buffer=buffer,
|
|
745
|
+
pick_date=pick_date,
|
|
746
|
+
pick_sensor=pick_sensor,
|
|
747
|
+
pick_temp_bas=pick_temp_bas,
|
|
748
|
+
proj=proj,
|
|
749
|
+
mask=mask,
|
|
750
|
+
verbose=verbose,
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
for n in range(1, len(filepath)):
|
|
754
|
+
cube2 = CubeDataClass()
|
|
755
|
+
sub = [
|
|
756
|
+
self.ds["x"].min().values,
|
|
757
|
+
self.ds["x"].max().values,
|
|
758
|
+
self.ds["y"].min().values,
|
|
759
|
+
self.ds["y"].max().values,
|
|
760
|
+
]
|
|
761
|
+
cube2.load(
|
|
762
|
+
filepath[n],
|
|
763
|
+
chunks=chunks,
|
|
764
|
+
conf=conf,
|
|
765
|
+
subset=sub,
|
|
766
|
+
pick_date=pick_date,
|
|
767
|
+
pick_sensor=pick_sensor,
|
|
768
|
+
pick_temp_bas=pick_temp_bas,
|
|
769
|
+
proj=self.ds.proj4,
|
|
770
|
+
mask=mask,
|
|
771
|
+
verbose=verbose,
|
|
772
|
+
)
|
|
773
|
+
# Align the new cube to the main one (interpolate the coordinate and/or reproject it)
|
|
774
|
+
if reproj_vel or reproj_coord:
|
|
775
|
+
cube2 = self.align_cube(
|
|
776
|
+
cube2, reproj_vel=reproj_vel, reproj_coord=reproj_coord, interp_method="nearest"
|
|
777
|
+
)
|
|
778
|
+
self.merge_cube(cube2) # Merge the new cube to the main one
|
|
779
|
+
del cube2
|
|
780
|
+
if chunks == {}: # Rechunk with optimal chunk size
|
|
781
|
+
var_name = "vx" if not self.is_TICO else "dx"
|
|
782
|
+
time_dim = time_dim_name[self.ds.author] if not self.is_TICO else "second_date"
|
|
783
|
+
tc, yc, xc = self.determine_optimal_chunk_size(
|
|
784
|
+
variable_name=var_name, x_dim="x", y_dim="y", time_dim=time_dim, verbose=verbose
|
|
785
|
+
)
|
|
786
|
+
self.ds = self.ds.chunk({time_dim: tc, "x": xc, "y": yc})
|
|
787
|
+
|
|
788
|
+
else: # Load one cube
|
|
789
|
+
with dask.config.set(**{"array.slicing.split_large_chunks": False}): # To avoid creating the large chunks
|
|
790
|
+
if filepath.split(".")[-1] == "nc":
|
|
791
|
+
try:
|
|
792
|
+
self.ds = xr.open_dataset(filepath, engine="h5netcdf", chunks=chunks)
|
|
793
|
+
except NotImplementedError: # Can not use auto rechunking with object dtype. We are unable to estimate the size in bytes of object data
|
|
794
|
+
chunks = {}
|
|
795
|
+
self.ds = xr.open_dataset(filepath, engine="h5netcdf", chunks=chunks) # Set no chunks
|
|
796
|
+
|
|
797
|
+
if "Author" in self.ds.attrs: # Uniformization of the attribute Author to author
|
|
798
|
+
self.ds.attrs["author"] = self.ds.attrs.pop("Author")
|
|
799
|
+
|
|
800
|
+
self.is_TICO = False if time_dim_name[self.ds.author] in self.ds.dims else True
|
|
801
|
+
time_dim = time_dim_name[self.ds.author] if not self.is_TICO else "second_date"
|
|
802
|
+
var_name = "vx" if not self.is_TICO else "dx"
|
|
803
|
+
|
|
804
|
+
if chunks == {}: # Rechunk with optimal chunk size
|
|
805
|
+
tc, yc, xc = self.determine_optimal_chunk_size(
|
|
806
|
+
variable_name=var_name, x_dim="x", y_dim="y", time_dim=time_dim, verbose=verbose
|
|
807
|
+
)
|
|
808
|
+
self.ds = self.ds.chunk({time_dim: tc, "x": xc, "y": yc})
|
|
809
|
+
|
|
810
|
+
elif filepath.split(".")[-1] == "zarr": # the is not rechunked
|
|
811
|
+
if chunks == {}:
|
|
812
|
+
chunks = "auto" # Change the default value to auto
|
|
813
|
+
self.ds = xr.open_dataset(
|
|
814
|
+
filepath, decode_timedelta=False, engine="zarr", consolidated=True, chunks=chunks
|
|
815
|
+
)
|
|
816
|
+
self.is_TICO = False
|
|
817
|
+
var_name = "vx"
|
|
818
|
+
|
|
819
|
+
if verbose:
|
|
820
|
+
print("[Data loading] File open")
|
|
821
|
+
|
|
822
|
+
dico_load = {
|
|
823
|
+
"ITS_LIVE, a NASA MEaSUREs project (its-live.jpl.nasa.gov)": self.load_itslive,
|
|
824
|
+
"J. Mouginot, R.Millan, A.Derkacheva": self.load_millan,
|
|
825
|
+
"J. Mouginot, R.Millan, A.Derkacheva_aligned": self.load_charrier,
|
|
826
|
+
"L. Charrier, L. Guo": self.load_charrier,
|
|
827
|
+
"L. Charrier": self.load_charrier,
|
|
828
|
+
"E. Ducasse": self.load_ducasse,
|
|
829
|
+
"S. Leinss, L. Charrier": self.load_charrier,
|
|
830
|
+
}
|
|
831
|
+
dico_load[self.ds.author](
|
|
832
|
+
filepath,
|
|
833
|
+
pick_date=pick_date,
|
|
834
|
+
subset=subset,
|
|
835
|
+
conf=conf,
|
|
836
|
+
pick_sensor=pick_sensor,
|
|
837
|
+
pick_temp_bas=pick_temp_bas,
|
|
838
|
+
buffer=buffer,
|
|
839
|
+
proj=proj,
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
time_dim = "mid_date" if not self.is_TICO else "second_date"
|
|
843
|
+
|
|
844
|
+
# Rechunk again if the size of the cube is changed:
|
|
845
|
+
if any(x is not None for x in [pick_date, subset, buffer, pick_sensor, pick_temp_bas]):
|
|
846
|
+
tc, yc, xc = self.determine_optimal_chunk_size(
|
|
847
|
+
variable_name=var_name, x_dim="x", y_dim="y", time_dim=time_dim, verbose=verbose
|
|
848
|
+
)
|
|
849
|
+
self.ds = self.ds.chunk({time_dim: tc, "x": xc, "y": yc})
|
|
850
|
+
|
|
851
|
+
# Reorder the coordinates to keep the consistency
|
|
852
|
+
self.ds = self.ds.copy().sortby(time_dim).transpose("x", "y", time_dim)
|
|
853
|
+
self.standardize_cube_for_processing(time_dim)
|
|
854
|
+
|
|
855
|
+
if mask is not None:
|
|
856
|
+
self.mask_cube(mask)
|
|
857
|
+
|
|
858
|
+
if self.ds.rio.crs is None:
|
|
859
|
+
self.ds.rio.write_crs(self.ds.proj4)
|
|
860
|
+
|
|
861
|
+
if verbose:
|
|
862
|
+
print(f"[Data loading] Author : {self.ds.author}")
|
|
863
|
+
|
|
864
|
+
def standardize_cube_for_processing(self, time_dim="mid_date"):
|
|
865
|
+
"""
|
|
866
|
+
Prepare the xarray dataset for the processing: transpose the dimension, add a variable temporal_baseline, errors if they do not exist
|
|
867
|
+
|
|
868
|
+
:param time_dim_name: [str] [default is 'mid_date'] --- Name of the z dimension within the original dataset self.ds
|
|
869
|
+
"""
|
|
870
|
+
|
|
871
|
+
self.ds = self.ds.unify_chunks()
|
|
872
|
+
if self.ds.chunksizes[time_dim] != (self.nz,): # no chunk in time
|
|
873
|
+
self.ds = self.ds.chunk({time_dim: self.nz})
|
|
874
|
+
|
|
875
|
+
if not self.is_TICO:
|
|
876
|
+
# Create a variable for temporal_baseline
|
|
877
|
+
self.ds["temporal_baseline"] = xr.DataArray(
|
|
878
|
+
(self.ds["date2"] - self.ds["date1"]).dt.days.values, dims="mid_date"
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
# Add errors if not already there
|
|
882
|
+
if "errorx" not in self.ds.variables:
|
|
883
|
+
self.ds["errorx"] = ("mid_date", np.ones(len(self.ds["mid_date"])))
|
|
884
|
+
self.ds["errory"] = ("mid_date", np.ones(len(self.ds["mid_date"])))
|
|
885
|
+
|
|
886
|
+
if self.ds.rio.write_crs:
|
|
887
|
+
self.ds = self.ds.rio.write_crs(self.ds.proj4) # add the crs to the xarray dataset if missing
|
|
888
|
+
|
|
889
|
+
def prepare_interpolation_date(
|
|
890
|
+
self,
|
|
891
|
+
) -> (np.datetime64, np.datetime64): # type: ignore
|
|
892
|
+
"""
|
|
893
|
+
Define the first and last date required for the interpolation, as the first date and last in the observations.
|
|
894
|
+
The purpose is to have homogenized results
|
|
895
|
+
|
|
896
|
+
:param cube: dataset
|
|
897
|
+
|
|
898
|
+
:return: first and last date required for the interpolation
|
|
899
|
+
"""
|
|
900
|
+
|
|
901
|
+
# Prepare interpolation dates
|
|
902
|
+
cube_date1 = self.date1_().tolist()
|
|
903
|
+
cube_date1 = cube_date1 + self.date2_().tolist()
|
|
904
|
+
cube_date1.remove(np.min(cube_date1))
|
|
905
|
+
first_date_interpol = np.min(cube_date1)
|
|
906
|
+
last_date_interpol = np.max(self.date2_())
|
|
907
|
+
|
|
908
|
+
return first_date_interpol, last_date_interpol
|
|
909
|
+
|
|
910
|
+
# %% ==================================================================== #
|
|
911
|
+
# ACCESSORS #
|
|
912
|
+
# =====================================================================%% #
|
|
913
|
+
|
|
914
|
+
def sensor_(self) -> list:
|
|
915
|
+
"""
|
|
916
|
+
Accessor to the sensors whoch captured the data.
|
|
917
|
+
|
|
918
|
+
:return: [list] --- List of sensor
|
|
919
|
+
"""
|
|
920
|
+
|
|
921
|
+
return self.ds["sensor"].values.tolist()
|
|
922
|
+
|
|
923
|
+
def source_(self) -> list:
|
|
924
|
+
"""
|
|
925
|
+
Accessor to the source of the data.
|
|
926
|
+
|
|
927
|
+
:return: [list] --- List of source
|
|
928
|
+
"""
|
|
929
|
+
|
|
930
|
+
return self.ds["source"].values.tolist()
|
|
931
|
+
|
|
932
|
+
def temp_base_(self, return_list: bool = True, format_date: str = "float") -> list | np.ndarray:
|
|
933
|
+
"""
|
|
934
|
+
Get the temporal baseline of the dataset.
|
|
935
|
+
|
|
936
|
+
:param return_list: [bool] [default is True] --- If True return of a list of date, else return a np array
|
|
937
|
+
:param format_date: [str] [default is 'float'] --- 'float' or 'D' format of the date as output
|
|
938
|
+
|
|
939
|
+
:return: [list | np array] --- List of the temporal baselines
|
|
940
|
+
"""
|
|
941
|
+
|
|
942
|
+
if format_date == "D":
|
|
943
|
+
temp = self.ds["date2"] - self.ds["date1"]
|
|
944
|
+
elif format_date == "float":
|
|
945
|
+
# temp = (self.ds['date2'].values-self.ds['date1'].values).astype('timedelta64[D]'))/ np.timedelta64(1, 'D')
|
|
946
|
+
temp = (self.ds["date2"] - self.ds["date1"]) / np.timedelta64(1, "D")
|
|
947
|
+
else:
|
|
948
|
+
raise NameError("Please enter format as float or D")
|
|
949
|
+
if return_list:
|
|
950
|
+
return temp.values.tolist()
|
|
951
|
+
else:
|
|
952
|
+
return temp.values
|
|
953
|
+
|
|
954
|
+
def date1_(self) -> np.array:
|
|
955
|
+
"""
|
|
956
|
+
Accessor to the first dates of acquisition.
|
|
957
|
+
|
|
958
|
+
:return: [np array] --- np array of date1
|
|
959
|
+
"""
|
|
960
|
+
|
|
961
|
+
return np.asarray(self.ds["date1"]).astype("datetime64[D]")
|
|
962
|
+
|
|
963
|
+
def date2_(self) -> np.array:
|
|
964
|
+
"""
|
|
965
|
+
Accessor to the second dates of acquisition.
|
|
966
|
+
|
|
967
|
+
:return: [np array] --- np array of date2
|
|
968
|
+
"""
|
|
969
|
+
|
|
970
|
+
return np.asarray(self.ds["date2"]).astype("datetime64[D]")
|
|
971
|
+
|
|
972
|
+
def datec_(self) -> np.array:
|
|
973
|
+
"""
|
|
974
|
+
Accessor to the central dates of the data.
|
|
975
|
+
|
|
976
|
+
:return: [np array] --- np array of central date
|
|
977
|
+
"""
|
|
978
|
+
|
|
979
|
+
return (self.date1_() + self.temp_base_(return_list=False, format_date="D") // 2).astype("datetime64[D]")
|
|
980
|
+
|
|
981
|
+
def vv_(self) -> np.array:
|
|
982
|
+
"""
|
|
983
|
+
Accessor to the magnitude of the velocities.
|
|
984
|
+
|
|
985
|
+
:return: [np array] --- np array of velocity magnitude
|
|
986
|
+
"""
|
|
987
|
+
|
|
988
|
+
return np.sqrt(self.ds["vx"] ** 2 + self.ds["vy"] ** 2)
|
|
989
|
+
|
|
990
|
+
def EPSG_code_(self) -> int:
|
|
991
|
+
"""
|
|
992
|
+
Accessor to the EPSG code of the dataset.
|
|
993
|
+
"""
|
|
994
|
+
|
|
995
|
+
return self.ds.rio.crs.to_epsg()
|
|
996
|
+
|
|
997
|
+
# %% ==================================================================== #
|
|
998
|
+
# PIXEL LOADING METHODS #
|
|
999
|
+
# =====================================================================%% #
|
|
1000
|
+
|
|
1001
|
+
def convert_coordinates(self, i: int | float, j: int | float, proj: str, verbose: bool = False) -> (float, float): # type: ignore
|
|
1002
|
+
"""
|
|
1003
|
+
Convert the coordinate (i, j) which are in projection proj, to projection of the cube dataset.
|
|
1004
|
+
|
|
1005
|
+
:params i, j: [int | float] --- Coordinates to be converted
|
|
1006
|
+
:param proj: [str] --- Projection of (i, j) coordinates
|
|
1007
|
+
:param verbose: [bool] [default is False] --- If True, print some text
|
|
1008
|
+
|
|
1009
|
+
:return i, j: [int | float] --- Converted (i, j)
|
|
1010
|
+
"""
|
|
1011
|
+
|
|
1012
|
+
# Convert coordinates if needed
|
|
1013
|
+
if proj == "EPSG:4326":
|
|
1014
|
+
myproj = Proj(self.ds.proj4)
|
|
1015
|
+
i, j = myproj(i, j)
|
|
1016
|
+
if verbose:
|
|
1017
|
+
print(f"[Data loading] Converted to projection {self.ds.proj4}: {i, j}")
|
|
1018
|
+
else:
|
|
1019
|
+
if CRS(self.ds.proj4) != CRS(proj):
|
|
1020
|
+
transformer = Transformer.from_crs(CRS(proj), CRS(self.ds.proj4))
|
|
1021
|
+
i, j = transformer.transform(i, j)
|
|
1022
|
+
if verbose:
|
|
1023
|
+
print(f"[Data loading] Converted to projection {self.ds.proj4}: {i, j}")
|
|
1024
|
+
return i, j
|
|
1025
|
+
|
|
1026
|
+
def load_pixel(
|
|
1027
|
+
self,
|
|
1028
|
+
i: int | float,
|
|
1029
|
+
j: int | float,
|
|
1030
|
+
unit: int = 365,
|
|
1031
|
+
regu: int | str = "1accelnotnull",
|
|
1032
|
+
coef: int = 100,
|
|
1033
|
+
flag: xr.Dataset | None = None,
|
|
1034
|
+
solver: str = "LSMR_ini",
|
|
1035
|
+
interp: str = "nearest",
|
|
1036
|
+
proj: str = "EPSG:4326",
|
|
1037
|
+
rolling_mean: xr.Dataset | None = None,
|
|
1038
|
+
visual: bool = False,
|
|
1039
|
+
output_format="np",
|
|
1040
|
+
) -> (Optional[list], Optional[list], Optional[np.array], Optional[np.array], Optional[np.array]): # type: ignore
|
|
1041
|
+
"""
|
|
1042
|
+
Load data at pixel (i, j) and compute prior to inversion (rolling mean, mean, dates range...).
|
|
1043
|
+
|
|
1044
|
+
:params i, j: [int | float] --- Coordinates to be converted
|
|
1045
|
+
:param unit: [int] [default is 365] --- 1 for m/d, 365 for m/y
|
|
1046
|
+
:param regu: [int | str] [default is '1accelnotnull'] --- Type of regularization
|
|
1047
|
+
:param coef: [int] [default is 100] --- Coef of Tikhonov regularisation
|
|
1048
|
+
:param flag: [xr dataset | None] [default is None] --- If not None, the values of the coefficient used for stable areas, surge glacier and non surge glacier
|
|
1049
|
+
:param solver: [str] [default is 'LSMR_ini'] --- Solver of the inversion: 'LSMR', 'LSMR_ini', 'LS', 'LS_bounded', 'LSQR'
|
|
1050
|
+
:param interp: [str] [default is 'nearest'] --- Interpolation method used to load the pixel when it is not in the dataset ('nearest' or 'linear')
|
|
1051
|
+
:param proj: [str] [default is 'EPSG:4326'] --- Projection of (i, j) coordinates
|
|
1052
|
+
:param rolling_mean: [xr dataset | None] [default is None] --- Filtered dataset (e.g. rolling mean)
|
|
1053
|
+
:param visual: [bool] [default is False] --- Return additional information (sensor and source) for future plots
|
|
1054
|
+
:param output_format [str] [default is np] --- Format of the output data (np for numpy or df for pandas dataframe)
|
|
1055
|
+
|
|
1056
|
+
:return data: [list | None] --- A list 2 elements : the first one is np.ndarray with the observed
|
|
1057
|
+
:return mean: [list | None] --- A list with average vx and vy if solver=LSMR_ini, but the regularization do not require an apriori on the acceleration
|
|
1058
|
+
:return dates_range: [list | None] --- Dates between which the displacements will be inverted
|
|
1059
|
+
:return regu: [np array | Nothing] --- If flag is not None, regularisation method to be used for each pixel
|
|
1060
|
+
:return coef: [np array | Nothing] --- If flag is not None, regularisation coefficient to be used for each pixel
|
|
1061
|
+
"""
|
|
1062
|
+
|
|
1063
|
+
# Variables to keep
|
|
1064
|
+
var_to_keep = (
|
|
1065
|
+
["date1", "date2", "vx", "vy", "errorx", "errory", "temporal_baseline"]
|
|
1066
|
+
if not visual
|
|
1067
|
+
else ["date1", "date2", "vx", "vy", "errorx", "errory", "temporal_baseline", "sensor", "source"]
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
if proj == "int":
|
|
1071
|
+
data = self.ds.isel(x=i, y=j)[var_to_keep]
|
|
1072
|
+
else:
|
|
1073
|
+
i, j = self.convert_coordinates(i, j, proj=proj) # convert the coordinates to the projection of the cube
|
|
1074
|
+
# Interpolate only necessary variables and drop NaN values
|
|
1075
|
+
if interp == "nearest":
|
|
1076
|
+
data = self.ds.sel(x=i, y=j, method="nearest")[var_to_keep]
|
|
1077
|
+
data = data.dropna(dim="mid_date")
|
|
1078
|
+
else:
|
|
1079
|
+
data = self.ds.interp(x=i, y=j, method=interp)[var_to_keep].dropna(
|
|
1080
|
+
dim="mid_date"
|
|
1081
|
+
) # 282 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
|
|
1082
|
+
|
|
1083
|
+
if flag is not None:
|
|
1084
|
+
if isinstance(regu, dict) and isinstance(coef, dict):
|
|
1085
|
+
flag = np.round(flag["flag"].sel(x=i, y=j, method="nearest").values)
|
|
1086
|
+
regu = regu[flag]
|
|
1087
|
+
coef = coef[flag]
|
|
1088
|
+
else:
|
|
1089
|
+
raise ValueError("regu must be a dict if assign_flag is True!")
|
|
1090
|
+
|
|
1091
|
+
data_dates = data[["date1", "date2"]].to_array().values.T
|
|
1092
|
+
if data_dates.dtype == "<M8[ns]": # Convert to days if needed
|
|
1093
|
+
data_dates = data_dates.astype("datetime64[D]")
|
|
1094
|
+
|
|
1095
|
+
if (solver == "LSMR_ini" or regu == "1accelnotnull" or regu == "directionxy") and rolling_mean is not None:
|
|
1096
|
+
if len(rolling_mean.sizes) == 3: # if regu == 1accelnotnul, rolling_mean have a time dimesion
|
|
1097
|
+
# Load rolling mean for the given pixel, only on the dates available
|
|
1098
|
+
dates_range = construction_dates_range_np(data_dates)
|
|
1099
|
+
mean = rolling_mean.sel(
|
|
1100
|
+
mid_date=dates_range[:-1] + np.diff(dates_range) // 2, x=i, y=j, method="nearest"
|
|
1101
|
+
)[["vx_filt", "vy_filt"]]
|
|
1102
|
+
mean = [mean[i].values / unit for i in ["vx_filt", "vy_filt"]] # Convert it to m/day
|
|
1103
|
+
|
|
1104
|
+
else: # elif solver= LSMR_ini, rolling_mean is an average in time per pixel
|
|
1105
|
+
mean = rolling_mean.sel(x=i, y=j, method="nearest")[["vx", "vy"]]
|
|
1106
|
+
mean = [mean[i].values / unit for i in ["vx", "vy"]] # Convert it to m/day
|
|
1107
|
+
dates_range = None
|
|
1108
|
+
|
|
1109
|
+
else: # If there is no apriori and no initialization
|
|
1110
|
+
mean = None
|
|
1111
|
+
dates_range = None
|
|
1112
|
+
|
|
1113
|
+
# data_values is composed of vx, vy, errorx, errory, temporal baseline
|
|
1114
|
+
if visual:
|
|
1115
|
+
if output_format == "np":
|
|
1116
|
+
data_str = data[["sensor", "source"]].to_array().values.T
|
|
1117
|
+
data_values = data.drop_vars(["date1", "date2", "sensor", "source"]).to_array().values.T
|
|
1118
|
+
data = [data_dates, data_values, data_str]
|
|
1119
|
+
elif output_format == "df":
|
|
1120
|
+
data = data.to_pandas()
|
|
1121
|
+
else:
|
|
1122
|
+
raise ValueError(
|
|
1123
|
+
"Please enter np if you want to have as output a numpy array, and df if you want a pandas dataframe"
|
|
1124
|
+
)
|
|
1125
|
+
else:
|
|
1126
|
+
data_values = data.drop_vars(["date1", "date2"]).to_array().values.T
|
|
1127
|
+
data = [data_dates, data_values]
|
|
1128
|
+
|
|
1129
|
+
if flag is not None:
|
|
1130
|
+
return data, mean, dates_range, regu, coef
|
|
1131
|
+
else:
|
|
1132
|
+
return data, mean, dates_range
|
|
1133
|
+
|
|
1134
|
+
# %% ==================================================================== #
|
|
1135
|
+
# CUBE PROCESSING #
|
|
1136
|
+
# =====================================================================%% #
|
|
1137
|
+
|
|
1138
|
+
def delete_outliers(
|
|
1139
|
+
self,
|
|
1140
|
+
delete_outliers: str | float,
|
|
1141
|
+
flag: xr.Dataset | None = None,
|
|
1142
|
+
slope: xr.Dataset | None = None,
|
|
1143
|
+
aspect: xr.Dataset | None = None,
|
|
1144
|
+
direction: xr.Dataset | None = None,
|
|
1145
|
+
**kwargs,
|
|
1146
|
+
):
|
|
1147
|
+
"""
|
|
1148
|
+
Delete outliers according to a certain criterium.
|
|
1149
|
+
|
|
1150
|
+
:param delete_outliers: [str | float] --- If float delete all velocities which a quality indicator higher than delete_outliers, if median_filter delete outliers that an angle 45° away from the average vector
|
|
1151
|
+
:param flag: [xr dataset | None] [default is None] --- If not None, the values of the coefficient used for stable areas, surge glacier and non surge glacier
|
|
1152
|
+
"""
|
|
1153
|
+
|
|
1154
|
+
if isinstance(delete_outliers, int) or isinstance(delete_outliers, str):
|
|
1155
|
+
if isinstance(delete_outliers, int): # filter according to the maximal error
|
|
1156
|
+
inlier_mask = dask_filt_warpper(
|
|
1157
|
+
self.ds["vx"], self.ds["vy"], filt_method="error", error_thres=delete_outliers
|
|
1158
|
+
)
|
|
1159
|
+
|
|
1160
|
+
elif isinstance(delete_outliers, str): # filter according to vcc_angle, zscore, median_angle
|
|
1161
|
+
axis = self.ds["vx"].dims.index("mid_date")
|
|
1162
|
+
inlier_mask = dask_filt_warpper(
|
|
1163
|
+
self.ds["vx"],
|
|
1164
|
+
self.ds["vy"],
|
|
1165
|
+
filt_method=delete_outliers,
|
|
1166
|
+
slope=slope,
|
|
1167
|
+
aspect=aspect,
|
|
1168
|
+
direction=direction,
|
|
1169
|
+
axis=axis,
|
|
1170
|
+
**kwargs,
|
|
1171
|
+
)
|
|
1172
|
+
|
|
1173
|
+
if flag is not None:
|
|
1174
|
+
if delete_outliers != "vvc_angle":
|
|
1175
|
+
flag = flag["flag"].values if flag["flag"].shape[0] == self.nx else flag["flag"].values.T
|
|
1176
|
+
flag_condition = flag == 0
|
|
1177
|
+
flag_condition = np.expand_dims(flag_condition, axis=axis)
|
|
1178
|
+
inlier_mask = np.logical_or(inlier_mask, flag_condition)
|
|
1179
|
+
|
|
1180
|
+
inlier_flag = xr.DataArray(inlier_mask, dims=self.ds["vx"].dims)
|
|
1181
|
+
for var in ["vx", "vy"]:
|
|
1182
|
+
self.ds[var] = self.ds[var].where(inlier_flag)
|
|
1183
|
+
|
|
1184
|
+
self.ds = self.ds.persist()
|
|
1185
|
+
|
|
1186
|
+
elif isinstance(delete_outliers, dict):
|
|
1187
|
+
for method in delete_outliers.keys():
|
|
1188
|
+
if method == "error":
|
|
1189
|
+
if delete_outliers["error"] is None:
|
|
1190
|
+
self.delete_outliers("error", flag)
|
|
1191
|
+
else:
|
|
1192
|
+
self.delete_outliers(delete_outliers["error"], flag)
|
|
1193
|
+
elif method == "magnitude":
|
|
1194
|
+
if delete_outliers["magnitude"] is None:
|
|
1195
|
+
self.delete_outliers("magnitude", flag)
|
|
1196
|
+
else:
|
|
1197
|
+
self.delete_outliers("magnitude", flag, magnitude_thres=delete_outliers["magnitude"])
|
|
1198
|
+
elif method == "median_magnitude":
|
|
1199
|
+
if delete_outliers["median_magnitude"] is None:
|
|
1200
|
+
self.delete_outliers("median_magnitude", flag)
|
|
1201
|
+
else:
|
|
1202
|
+
self.delete_outliers(
|
|
1203
|
+
"median_magnitude", flag, median_magnitude_thres=delete_outliers["median_magnitude"]
|
|
1204
|
+
)
|
|
1205
|
+
elif method == "z_score":
|
|
1206
|
+
if delete_outliers["z_score"] is None:
|
|
1207
|
+
self.delete_outliers("z_score", flag)
|
|
1208
|
+
else:
|
|
1209
|
+
self.delete_outliers("z_score", flag, z_thres=delete_outliers["z_score"])
|
|
1210
|
+
|
|
1211
|
+
elif method == "median_angle":
|
|
1212
|
+
if delete_outliers["median_angle"] is None:
|
|
1213
|
+
self.delete_outliers("median_angle", flag)
|
|
1214
|
+
else:
|
|
1215
|
+
self.delete_outliers("median_angle", flag, z_thres=delete_outliers["median_angle"])
|
|
1216
|
+
|
|
1217
|
+
elif method == "vvc_angle":
|
|
1218
|
+
if delete_outliers["vvc_angle"] is None:
|
|
1219
|
+
self.delete_outliers("vvc_angle", flag)
|
|
1220
|
+
else:
|
|
1221
|
+
self.delete_outliers("vvc_angle", flag, **delete_outliers["vvc_angle"])
|
|
1222
|
+
elif method == "topo_angle":
|
|
1223
|
+
self.delete_outliers("topo_angle", flag, slope=slope, aspect=aspect)
|
|
1224
|
+
elif method == "flow_angle":
|
|
1225
|
+
self.delete_outliers("flow_angle", flag, direction=direction)
|
|
1226
|
+
elif method == "mz_score":
|
|
1227
|
+
if delete_outliers["mz_score"] is None:
|
|
1228
|
+
self.delete_outliers("mz_score", flag)
|
|
1229
|
+
else:
|
|
1230
|
+
self.delete_outliers("mz_score", flag, z_thres=delete_outliers["mz_score"])
|
|
1231
|
+
else:
|
|
1232
|
+
raise ValueError(
|
|
1233
|
+
"Filtering method should be either 'median_angle', 'vvc_angle', 'topo_angle', 'z_score','mz_score', 'magnitude', 'median_magnitude' or 'error'."
|
|
1234
|
+
)
|
|
1235
|
+
else:
|
|
1236
|
+
raise ValueError("delete_outliers must be a int, a string or a dict, not {type(delete_outliers)}")
|
|
1237
|
+
|
|
1238
|
+
def mask_cube(self, mask: xr.DataArray | str):
|
|
1239
|
+
"""
|
|
1240
|
+
Mask some of the data of the cube (putting it to np.nan).
|
|
1241
|
+
|
|
1242
|
+
:param mask: [str | xr dataarray] --- Either a DataArray with 1 the data to keep and 0 the ones to remove, or a path to a file containing a DataArray or a shapefile to be rasterized
|
|
1243
|
+
"""
|
|
1244
|
+
|
|
1245
|
+
if type(mask) is str:
|
|
1246
|
+
if (
|
|
1247
|
+
mask[-3:] == "shp" or mask[-4:] == "gpkg"
|
|
1248
|
+
): # Convert the shp file or geopackage to an xarray dataset (rasterize the shapefile)
|
|
1249
|
+
polygon = geopandas.read_file(mask).to_crs(CRS(self.ds.proj4))
|
|
1250
|
+
raster = rasterize(
|
|
1251
|
+
[polygon.geometry[0]],
|
|
1252
|
+
out_shape=self.ds.rio.shape,
|
|
1253
|
+
transform=self.ds.rio.transform(),
|
|
1254
|
+
fill=0,
|
|
1255
|
+
dtype="int16",
|
|
1256
|
+
)
|
|
1257
|
+
mask = xr.DataArray(data=raster.T, dims=["x", "y"], coords=self.ds[["x", "y"]].coords)
|
|
1258
|
+
else:
|
|
1259
|
+
mask = xr.open_dataarray(mask)
|
|
1260
|
+
mask.load()
|
|
1261
|
+
|
|
1262
|
+
# Mask the velocities and the errors
|
|
1263
|
+
if not self.is_TICO:
|
|
1264
|
+
self.ds[["vx", "vy", "errorx", "errory"]] = (
|
|
1265
|
+
self.ds[["vx", "vy", "errorx", "errory"]]
|
|
1266
|
+
.where(mask.sel(x=self.ds.x, y=self.ds.y, method="nearest") == 1)
|
|
1267
|
+
.astype("float32")
|
|
1268
|
+
)
|
|
1269
|
+
else:
|
|
1270
|
+
self.ds[["dx", "dy", "xcount_x", "xcount_y"]] = (
|
|
1271
|
+
self.ds[["dx", "dy", "xcount_x", "xcount_y"]]
|
|
1272
|
+
.where(mask.sel(x=self.ds.x, y=self.ds.y, method="nearest") == 1)
|
|
1273
|
+
.astype("float32")
|
|
1274
|
+
)
|
|
1275
|
+
|
|
1276
|
+
def reproject_geotiff_to_cube(self, file_path):
|
|
1277
|
+
"""
|
|
1278
|
+
Reproject the geotiff file to the same geometry of the cube
|
|
1279
|
+
:param: file_path: [str] --- path of the geotifffile to be wrapped
|
|
1280
|
+
:return: warped data [np.ndarray] --- warped data with same shape and resolution as the cube
|
|
1281
|
+
"""
|
|
1282
|
+
if file_path.split(".")[-1] == "tif":
|
|
1283
|
+
with rio.open(file_path) as src:
|
|
1284
|
+
src_data = src.read(1)
|
|
1285
|
+
|
|
1286
|
+
dst_data = np.empty(shape=self.ds.rio.shape, dtype=np.float32)
|
|
1287
|
+
dst_data, _ = rio.warp.reproject(
|
|
1288
|
+
source=src_data,
|
|
1289
|
+
destination=dst_data,
|
|
1290
|
+
src_transform=src.transform,
|
|
1291
|
+
src_crs=src.crs,
|
|
1292
|
+
dst_crs=CRS.from_proj4(self.ds.proj4),
|
|
1293
|
+
dst_transform=self.ds.rio.transform(),
|
|
1294
|
+
dst_shape=self.ds.rio.shape,
|
|
1295
|
+
resampling=rio.warp.Resampling.bilinear,
|
|
1296
|
+
)
|
|
1297
|
+
dst_data[dst_data == src.nodata] = np.nan
|
|
1298
|
+
return dst_data
|
|
1299
|
+
|
|
1300
|
+
def compute_flow_direction(self, vx_file: str | None = None, vy_file: str | None = None) -> xr.DataArray:
|
|
1301
|
+
"""
|
|
1302
|
+
Compute the average flow direction from the input vx and vy files or just from the observations
|
|
1303
|
+
:param: vx_file | vy_file: [str] --- path of the flow velocity file, should be geotiff format
|
|
1304
|
+
:return: direction: [xr.DataArray] --- computed average flow direction at each pixel
|
|
1305
|
+
"""
|
|
1306
|
+
if vx_file is not None and vy_file is not None:
|
|
1307
|
+
vx = self.reproject_geotiff_to_cube(vx_file)
|
|
1308
|
+
vy = self.reproject_geotiff_to_cube(vy_file)
|
|
1309
|
+
else:
|
|
1310
|
+
vx = self.ds["vx"].values
|
|
1311
|
+
vy = self.ds["vy"].values
|
|
1312
|
+
|
|
1313
|
+
temporal_baseline = self.ds["temporal_baseline"].values
|
|
1314
|
+
temporal_baseline = temporal_baseline[np.newaxis, np.newaxis, :]
|
|
1315
|
+
vx_weighted = np.nansum(vx * temporal_baseline, axis=2) / np.nansum(temporal_baseline, axis=2)
|
|
1316
|
+
vy_weighted = np.nansum(vy * temporal_baseline, axis=2) / np.nansum(temporal_baseline, axis=2)
|
|
1317
|
+
|
|
1318
|
+
v_mean_weighted = np.sqrt(vx_weighted**2 + vy_weighted**2)
|
|
1319
|
+
|
|
1320
|
+
direction = np.arctan2(vx_weighted, vy_weighted)
|
|
1321
|
+
direction = (np.rad2deg(direction) + 360) % 360
|
|
1322
|
+
|
|
1323
|
+
direction = np.where(v_mean_weighted < 1, np.nan, direction)
|
|
1324
|
+
|
|
1325
|
+
direction = xr.Dataset(
|
|
1326
|
+
data_vars=dict(
|
|
1327
|
+
direction=(["y", "x"], np.array(direction.T)),
|
|
1328
|
+
),
|
|
1329
|
+
coords=dict(x=(["x"], self.ds.x.data), y=(["y"], self.ds.y.data)),
|
|
1330
|
+
)
|
|
1331
|
+
|
|
1332
|
+
return direction
|
|
1333
|
+
|
|
1334
|
+
def create_flag(self, flag: str = None, field_name: str | None = None, default_value: str | int | None = None):
|
|
1335
|
+
"""
|
|
1336
|
+
Create a flag dataset based on the provided shapefile and shapefile field.
|
|
1337
|
+
Which is usually used to divide the pixels into different types, especially for surging glaciers.
|
|
1338
|
+
If you just want to divide by polygon, set the shp_field to None
|
|
1339
|
+
|
|
1340
|
+
:param flag (str, optional): The path to the shapefile. Defaults to None.
|
|
1341
|
+
:param shp_field (str, optional): The name of the shapefile field. Defaults to 'surge_type' (used in RGI7).
|
|
1342
|
+
:param default_value (str | int | None, optional): The default value for the shapefile field. Defaults to 0.
|
|
1343
|
+
:Returns flag: xr.Dataset, The flag dataset with dimensions 'y' and 'x'.
|
|
1344
|
+
"""
|
|
1345
|
+
|
|
1346
|
+
if isinstance(flag, str):
|
|
1347
|
+
if flag.split(".")[-1] == "nc": # If flag is a netCDF file
|
|
1348
|
+
flag = xr.open_dataset(flag)
|
|
1349
|
+
|
|
1350
|
+
elif flag.split(".")[-1] in ["shp", "gpkg"]: # If flag is a shape file
|
|
1351
|
+
flag = geopandas.read_file(flag).to_crs(self.ds.proj4).clip(self.ds.rio.bounds())
|
|
1352
|
+
|
|
1353
|
+
# surge-type glacier: 2, other glacier: 1, stable area: 0
|
|
1354
|
+
if field_name is None:
|
|
1355
|
+
if "surge_type" in flag.columns: # RGI inventory, surge-type glacier: 2, other glacier: 0
|
|
1356
|
+
default_value = 0
|
|
1357
|
+
field_name = "surge_type"
|
|
1358
|
+
elif (
|
|
1359
|
+
"Surge_class" in flag.columns
|
|
1360
|
+
): # HMA surging glacier inventory, surge-type glacier: 2, other glacier: ''
|
|
1361
|
+
default_value = None
|
|
1362
|
+
field_name = "Surge_class"
|
|
1363
|
+
|
|
1364
|
+
if field_name is not None:
|
|
1365
|
+
flag_id = flag[field_name].apply(lambda x: 2 if x != default_value else 1).astype("int16")
|
|
1366
|
+
geom_value = ((geom, value) for geom, value in zip(flag.geometry, flag_id))
|
|
1367
|
+
else:
|
|
1368
|
+
# inside the polygon: 1, outside: 0
|
|
1369
|
+
geom_value = ((geom, 1) for geom in flag.geometry)
|
|
1370
|
+
|
|
1371
|
+
try:
|
|
1372
|
+
flag = rasterio.features.rasterize(
|
|
1373
|
+
geom_value,
|
|
1374
|
+
out_shape=(self.ny, self.nx),
|
|
1375
|
+
transform=self.ds.rio.transform(),
|
|
1376
|
+
all_touched=True,
|
|
1377
|
+
fill=0, # background value
|
|
1378
|
+
dtype="int16",
|
|
1379
|
+
)
|
|
1380
|
+
except:
|
|
1381
|
+
flag = np.zeros(shape=(self.ny, self.nx), dtype="int16")
|
|
1382
|
+
|
|
1383
|
+
flag = xr.Dataset(
|
|
1384
|
+
data_vars=dict(
|
|
1385
|
+
flag=(["y", "x"], flag),
|
|
1386
|
+
),
|
|
1387
|
+
coords=dict(
|
|
1388
|
+
x=(["x"], self.ds.x.data),
|
|
1389
|
+
y=(["y"], self.ds.y.data),
|
|
1390
|
+
),
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1393
|
+
elif not isinstance(flag, xr.Dataset):
|
|
1394
|
+
raise ValueError("flag file must be .nc or .shp")
|
|
1395
|
+
|
|
1396
|
+
if "flags" in list(flag.variables):
|
|
1397
|
+
flag = flag.rename({"flags": "flag"})
|
|
1398
|
+
|
|
1399
|
+
return flag
|
|
1400
|
+
|
|
1401
|
+
def filter_cube_before_inversion(
|
|
1402
|
+
self,
|
|
1403
|
+
i: int | float | None = None,
|
|
1404
|
+
j: int | float | None = None,
|
|
1405
|
+
smooth_method: str = "savgol",
|
|
1406
|
+
s_win: int = 3,
|
|
1407
|
+
t_win: int = 90,
|
|
1408
|
+
sigma: int = 3,
|
|
1409
|
+
order: int = 3,
|
|
1410
|
+
unit: int = 365,
|
|
1411
|
+
delete_outliers: str | float | None = None,
|
|
1412
|
+
flag: xr.Dataset | str | None = None,
|
|
1413
|
+
dem_file: str | None = None,
|
|
1414
|
+
regu: int | str = "1accelnotnull",
|
|
1415
|
+
solver: str = "LSMR_ini",
|
|
1416
|
+
proj: str = "EPSG:4326",
|
|
1417
|
+
velo_or_disp: str = "velo",
|
|
1418
|
+
select_baseline: int | None = 180,
|
|
1419
|
+
verbose: bool = False,
|
|
1420
|
+
) -> xr.Dataset:
|
|
1421
|
+
"""
|
|
1422
|
+
Filter the original data before the inversion:
|
|
1423
|
+
-delete outliers according to the provided criterion
|
|
1424
|
+
-compute a spatio-temporal kernel of the data, which can be used as apriori for the inversion (for "1accelnotnull" or "directionxy" )
|
|
1425
|
+
-compute mean velocity along x and y ( for solver = 'LSMR_ini' if regu is not "1accelnotnull" or "directionxy" )
|
|
1426
|
+
|
|
1427
|
+
:params i, j: [int | float] --- Coordinates to be converted
|
|
1428
|
+
:param smooth_method: [str] [default is 'gaussian'] --- Smoothing method to be used to smooth the data in time ('gaussian', 'median', 'emwa', 'savgol')
|
|
1429
|
+
:param s_win: [int] [default is 3] --- Size of the spatial window
|
|
1430
|
+
:param t_win: [int] [default is 90] --- Time window size for 'ewma' smoothing
|
|
1431
|
+
:param sigma: [int] [default is 3] --- Standard deviation for 'gaussian' filter
|
|
1432
|
+
:param order: [int] [default is 3] --- Order of the smoothing function
|
|
1433
|
+
:param unit: [int] [default is 365] --- 365 if the unit is m/y, 1 if the unit is m/d
|
|
1434
|
+
:param delete_outliers: [str | float | None] [default is None] --- If float delete all velocities which a quality indicator higher than delete_outliers
|
|
1435
|
+
:param flag: [xr dataset | None] [default is None] --- If not None, the values of the coefficient used for stable areas, surge glacier and non surge glacier
|
|
1436
|
+
:param regu: [int | str] [default is "1accelnotnull"] --- Regularisation of the solver
|
|
1437
|
+
:param solver: [str] [default is 'LSMR_ini'] --- Solver used to invert the system
|
|
1438
|
+
:param proj: [str] [default is 'EPSG:4326'] --- EPSG of i,j projection
|
|
1439
|
+
:param velo_or_disp: [str] [default is 'velo'] --- 'disp' or 'velo' to indicate the type of the observations : 'disp' mean that self contain displacements values and 'velo' mean it contains velocity
|
|
1440
|
+
:param select_baseline: [int | None] [default is None] --- threshold of the temporal baseline to select, if the number of observation is lower than 3 times the number of estimated displacement with this threshold, it is increased by 30 days
|
|
1441
|
+
:param verbose: [bool] [default is False] --- Print information throughout the process
|
|
1442
|
+
|
|
1443
|
+
:return obs_filt: [xr dataset | None] --- Filtered dataset
|
|
1444
|
+
"""
|
|
1445
|
+
|
|
1446
|
+
def loop_rolling(da_arr: xr.Dataset, select_baseline: int | None = 180) -> (np.ndarray, np.ndarray): # type: ignore
|
|
1447
|
+
"""
|
|
1448
|
+
A function to calculate spatial mean, resample data, and calculate smoothed velocity.
|
|
1449
|
+
|
|
1450
|
+
:param da_arr: [xr dataset] --- Original data
|
|
1451
|
+
:param select_baseline: [int] [default is None] --- Threshold over the temporal baselines
|
|
1452
|
+
|
|
1453
|
+
:return spatial_mean: [np array] --- smoothed velocity
|
|
1454
|
+
:return date_out: [np array] --- Observed dates
|
|
1455
|
+
"""
|
|
1456
|
+
|
|
1457
|
+
# Compute the dates of the estimated displacements time series
|
|
1458
|
+
date_out = date_range[:-1] + np.diff(date_range) // 2
|
|
1459
|
+
mid_dates = self.ds["mid_date"]
|
|
1460
|
+
|
|
1461
|
+
if verbose:
|
|
1462
|
+
start = time.time()
|
|
1463
|
+
if select_baseline is not None: # select data with a temporal baseline lower than a threshold
|
|
1464
|
+
baseline = self.ds["temporal_baseline"].compute()
|
|
1465
|
+
idx = np.where(baseline < select_baseline)
|
|
1466
|
+
while (
|
|
1467
|
+
len(idx[0]) < 3 * len(date_out) & (select_baseline < 200)
|
|
1468
|
+
): # Increase the threshold by 30, if the number of observation is lower than 3 times the number of estimated displacement
|
|
1469
|
+
select_baseline += 30
|
|
1470
|
+
mid_dates = mid_dates.isel(mid_date=idx[0])
|
|
1471
|
+
da_arr = da_arr.isel(mid_date=idx[0])
|
|
1472
|
+
|
|
1473
|
+
# Find the time axis for dask processing
|
|
1474
|
+
time_axis = self.ds["vx"].dims.index("mid_date")
|
|
1475
|
+
# Apply the selected kernel in time
|
|
1476
|
+
if verbose:
|
|
1477
|
+
with ProgressBar(): # Plot a progress bar
|
|
1478
|
+
filtered_in_time = dask_smooth_wrapper(
|
|
1479
|
+
da_arr.data,
|
|
1480
|
+
mid_dates,
|
|
1481
|
+
t_out=date_out,
|
|
1482
|
+
smooth_method=smooth_method,
|
|
1483
|
+
sigma=sigma,
|
|
1484
|
+
t_win=t_win,
|
|
1485
|
+
order=order,
|
|
1486
|
+
axis=time_axis,
|
|
1487
|
+
).compute()
|
|
1488
|
+
else:
|
|
1489
|
+
filtered_in_time = dask_smooth_wrapper(
|
|
1490
|
+
da_arr.data,
|
|
1491
|
+
mid_dates,
|
|
1492
|
+
t_out=date_out,
|
|
1493
|
+
smooth_method=smooth_method,
|
|
1494
|
+
sigma=sigma,
|
|
1495
|
+
t_win=t_win,
|
|
1496
|
+
order=order,
|
|
1497
|
+
axis=time_axis,
|
|
1498
|
+
).compute()
|
|
1499
|
+
|
|
1500
|
+
if verbose:
|
|
1501
|
+
print(f"[Data filtering] Smoothing observations took {round((time.time() - start), 1)} s")
|
|
1502
|
+
|
|
1503
|
+
# Spatial average
|
|
1504
|
+
if (
|
|
1505
|
+
np.min([da_arr["x"].size, da_arr["y"].size]) > s_win
|
|
1506
|
+
): # The spatial average is performed only if the size of the cube is larger than s_win, the spatial window
|
|
1507
|
+
spatial_axis = tuple(i for i in range(3) if i != time_axis)
|
|
1508
|
+
pad_widths = tuple((s_win // 2, s_win // 2) if i != time_axis else (0, 0) for i in range(3))
|
|
1509
|
+
spatial_mean = da.nanmean(
|
|
1510
|
+
sliding_window_view(filtered_in_time, (s_win, s_win), axis=spatial_axis), axis=(-1, -2)
|
|
1511
|
+
)
|
|
1512
|
+
spatial_mean = da.pad(spatial_mean, pad_widths, mode="edge")
|
|
1513
|
+
else:
|
|
1514
|
+
spatial_mean = filtered_in_time
|
|
1515
|
+
|
|
1516
|
+
return spatial_mean.compute(), np.unique(date_out)
|
|
1517
|
+
|
|
1518
|
+
if np.isnan(self.ds["date1"].values).all():
|
|
1519
|
+
print("[Data filtering] Empty sub-cube (masked data ?)")
|
|
1520
|
+
return None
|
|
1521
|
+
|
|
1522
|
+
if i is not None and j is not None: # Crop the cube dataset around a given pixel
|
|
1523
|
+
i, j = self.convert_coordinates(i, j, proj=proj, verbose=verbose)
|
|
1524
|
+
if verbose:
|
|
1525
|
+
print(f"[Data filtering] Clipping dataset to individual pixel: (x, y) = ({i},{j})")
|
|
1526
|
+
buffer = (s_win + 2) * (self.ds["x"][1] - self.ds["x"][0])
|
|
1527
|
+
self.buffer(self.ds.proj4, [i, j, buffer])
|
|
1528
|
+
self.ds = self.ds.unify_chunks()
|
|
1529
|
+
|
|
1530
|
+
# The spatio-temporal smoothing should be carried on velocity, while we need displacement during inversion
|
|
1531
|
+
if velo_or_disp == "disp": # to provide velocity values
|
|
1532
|
+
self.ds["vx"] = self.ds["vx"] / self.ds["temporal_baseline"] * unit
|
|
1533
|
+
self.ds["vy"] = self.ds["vy"] / self.ds["temporal_baseline"] * unit
|
|
1534
|
+
|
|
1535
|
+
if flag is not None: # create a flag, to identify stable,areas, and eventually surges
|
|
1536
|
+
flag = self.create_flag(flag)
|
|
1537
|
+
flag.load()
|
|
1538
|
+
|
|
1539
|
+
if isinstance(regu, dict):
|
|
1540
|
+
regu = list(regu.values())
|
|
1541
|
+
else:
|
|
1542
|
+
raise ValueError("regu must be a dict if flag is Not None")
|
|
1543
|
+
else:
|
|
1544
|
+
if isinstance(regu, int): # if regu is an integer
|
|
1545
|
+
regu = [regu]
|
|
1546
|
+
elif isinstance(regu, str): # if regu is a string
|
|
1547
|
+
regu = list(regu.split())
|
|
1548
|
+
|
|
1549
|
+
start = time.time()
|
|
1550
|
+
|
|
1551
|
+
if delete_outliers is not None: # remove outliers beforehand
|
|
1552
|
+
slope, aspect, direction = None, None, None
|
|
1553
|
+
if (isinstance(delete_outliers, str) and delete_outliers == "topo_angle") or (
|
|
1554
|
+
isinstance(delete_outliers, dict) and "topo_angle" in delete_outliers.keys()
|
|
1555
|
+
):
|
|
1556
|
+
if isinstance(dem_file, str):
|
|
1557
|
+
slope, aspect = self.compute_slo_asp(dem_file=dem_file)
|
|
1558
|
+
else:
|
|
1559
|
+
raise ValueError("dem_file must be given if delete_outliers is 'topo_angle'")
|
|
1560
|
+
|
|
1561
|
+
elif (isinstance(delete_outliers, str) and delete_outliers == "flow_angle") or (
|
|
1562
|
+
isinstance(delete_outliers, dict) and "flow_angle" in delete_outliers.keys()
|
|
1563
|
+
):
|
|
1564
|
+
direction = self.compute_flow_direction(vx_file=None, vy_file=None)
|
|
1565
|
+
self.delete_outliers(
|
|
1566
|
+
delete_outliers=delete_outliers, flag=None, slope=slope, aspect=aspect, direction=direction
|
|
1567
|
+
)
|
|
1568
|
+
if verbose:
|
|
1569
|
+
print(f"[Data filtering] Delete outlier took {round((time.time() - start), 1)} s")
|
|
1570
|
+
|
|
1571
|
+
if "1accelnotnull" in regu or "directionxy" in regu: # compute velocity smoothed using a spatio-temporal filter
|
|
1572
|
+
date_range = np.sort(
|
|
1573
|
+
np.unique(
|
|
1574
|
+
np.concatenate(
|
|
1575
|
+
(
|
|
1576
|
+
self.ds["date1"].values[~np.isnan(self.ds["date1"].values)],
|
|
1577
|
+
self.ds["date2"].values[~np.isnan(self.ds["date2"].values)],
|
|
1578
|
+
),
|
|
1579
|
+
axis=0,
|
|
1580
|
+
)
|
|
1581
|
+
)
|
|
1582
|
+
) # dates between which the displacement should be estimated
|
|
1583
|
+
if verbose:
|
|
1584
|
+
start = time.time()
|
|
1585
|
+
|
|
1586
|
+
# spatio-temporal filter
|
|
1587
|
+
vx_filtered, dates_uniq = loop_rolling(
|
|
1588
|
+
self.ds["vx"], select_baseline=select_baseline
|
|
1589
|
+
) # dates_uniq correspond to the central date of dates_range
|
|
1590
|
+
vy_filtered, dates_uniq = loop_rolling(self.ds["vy"], select_baseline=select_baseline)
|
|
1591
|
+
|
|
1592
|
+
# We obtain one smoothed value for each unique date in date_range
|
|
1593
|
+
obs_filt = xr.Dataset(
|
|
1594
|
+
data_vars=dict(
|
|
1595
|
+
vx_filt=(["x", "y", "mid_date"], vx_filtered), vy_filt=(["x", "y", "mid_date"], vy_filtered)
|
|
1596
|
+
),
|
|
1597
|
+
coords=dict(x=(["x"], self.ds.x.data), y=(["y"], self.ds.y.data), mid_date=dates_uniq),
|
|
1598
|
+
attrs=dict(description="Smoothed velocity observations", units="m/y", proj4=self.ds.proj4),
|
|
1599
|
+
)
|
|
1600
|
+
del vx_filtered, vy_filtered
|
|
1601
|
+
|
|
1602
|
+
if verbose:
|
|
1603
|
+
print(
|
|
1604
|
+
"[Data filtering] Calculating smoothing mean of the observations completed in {:.2f} seconds".format(
|
|
1605
|
+
time.time() - start
|
|
1606
|
+
)
|
|
1607
|
+
)
|
|
1608
|
+
|
|
1609
|
+
elif (
|
|
1610
|
+
solver == "LSMR_ini"
|
|
1611
|
+
): # The initialization is based on the averaged velocity over the period, for every pixel
|
|
1612
|
+
obs_filt = self.ds[["vx", "vy"]].mean(dim="mid_date")
|
|
1613
|
+
obs_filt.attrs["description"] = "Averaged velocity over the period"
|
|
1614
|
+
obs_filt.attrs["units"] = "m/y"
|
|
1615
|
+
else:
|
|
1616
|
+
obs_filt = None
|
|
1617
|
+
|
|
1618
|
+
# Unify the observations to displacement to provide displacement values during inversion
|
|
1619
|
+
self.ds["vx"] = self.ds["vx"] * self.ds["temporal_baseline"] / unit
|
|
1620
|
+
self.ds["vy"] = self.ds["vy"] * self.ds["temporal_baseline"] / unit
|
|
1621
|
+
|
|
1622
|
+
if obs_filt is not None:
|
|
1623
|
+
obs_filt.load()
|
|
1624
|
+
self.ds = self.ds.load() # Crash memory without loading
|
|
1625
|
+
# persist() is particularly useful when using a distributed cluster because the data will be loaded into distributed memory across your machines and be much faster to use than reading repeatedly from disk.
|
|
1626
|
+
|
|
1627
|
+
return obs_filt, flag
|
|
1628
|
+
|
|
1629
|
+
def split_cube(self, n_split: int = 2, dim: str | list = "x", savepath: str | None = None):
|
|
1630
|
+
"""
|
|
1631
|
+
Split the cube into smaller cubes (taking less memory to load) according to the given dimensions.
|
|
1632
|
+
|
|
1633
|
+
:param n_split: [int] [default is 2] --- Number of split to compute along each dimensions in dim
|
|
1634
|
+
:param dim: [str | list] [default is "x"] --- Dimension.s along which must be split the cube
|
|
1635
|
+
:param savepath: [str | None] [default is None] --- If not None, save the new cubes at this location
|
|
1636
|
+
|
|
1637
|
+
:return cubes: [dict] --- Dictionary of the splitcubes (keys describe the position of the cube)
|
|
1638
|
+
"""
|
|
1639
|
+
|
|
1640
|
+
cubes = []
|
|
1641
|
+
for s in range(n_split):
|
|
1642
|
+
if isinstance(dim, str):
|
|
1643
|
+
cube = CubeDataClass(
|
|
1644
|
+
self,
|
|
1645
|
+
self.ds.isel(
|
|
1646
|
+
{
|
|
1647
|
+
dim: slice(
|
|
1648
|
+
s * len(self.ds[dim].values) // n_split,
|
|
1649
|
+
(s + 1) * len(self.ds[dim].values) // n_split,
|
|
1650
|
+
1,
|
|
1651
|
+
)
|
|
1652
|
+
}
|
|
1653
|
+
),
|
|
1654
|
+
)
|
|
1655
|
+
cube.update_dimension()
|
|
1656
|
+
if savepath is not None:
|
|
1657
|
+
cube.ds.to_netcdf(f"{savepath}{dim}_{s}.nc")
|
|
1658
|
+
print(f"Split cube saved at {savepath}{dim}_{s}.nc")
|
|
1659
|
+
cubes.append(cube)
|
|
1660
|
+
elif isinstance(dim, list):
|
|
1661
|
+
cube = CubeDataClass(
|
|
1662
|
+
self,
|
|
1663
|
+
self.ds.isel(
|
|
1664
|
+
{
|
|
1665
|
+
dim[0]: slice(
|
|
1666
|
+
s * len(self.ds[dim[0]].values) // 2, (s + 1) * len(self.ds[dim[0]].values) // 2, 1
|
|
1667
|
+
)
|
|
1668
|
+
}
|
|
1669
|
+
),
|
|
1670
|
+
)
|
|
1671
|
+
if len(dim) > 1:
|
|
1672
|
+
cubes |= cube.split_cube(n_split=n_split, dim=dim[1:], savepath=f"{savepath}{dim[0]}_{s}_")
|
|
1673
|
+
else:
|
|
1674
|
+
if savepath is not None:
|
|
1675
|
+
cube.ds.to_netcdf(f"{savepath}{dim[0]}_{s}.nc")
|
|
1676
|
+
print(f"Split cube saved at {savepath}{dim[0]}_{s}.nc")
|
|
1677
|
+
cubes.append(cube)
|
|
1678
|
+
|
|
1679
|
+
return cubes
|
|
1680
|
+
|
|
1681
|
+
def reproj_coord(
|
|
1682
|
+
self,
|
|
1683
|
+
new_proj: Optional[str] = None,
|
|
1684
|
+
new_res: Optional[float] = None,
|
|
1685
|
+
interp_method: str = "nearest",
|
|
1686
|
+
cube_to_match: Optional["CubeDataClass"] = None,
|
|
1687
|
+
):
|
|
1688
|
+
"""
|
|
1689
|
+
Repreject the cube_data_self to a given projection system, and (optionally) resample this cube to a given resolution.
|
|
1690
|
+
The new projection can be defined by the variable new_proj or by a cube stored in cube_to_match.
|
|
1691
|
+
The new resolution can be defined by the variable new_res or by a cube stored in cube_to_match.
|
|
1692
|
+
|
|
1693
|
+
:param new_proj: [str] --- EPSG code of the new projection
|
|
1694
|
+
:param new_res: [float] --- new resolution in the unit of the new projection system
|
|
1695
|
+
:param interp_method: [str] ---
|
|
1696
|
+
:param cube_to_match: [cube_data_class] --- cube used as a reference to reproject self
|
|
1697
|
+
"""
|
|
1698
|
+
# assign coordinate system
|
|
1699
|
+
if cube_to_match is not None:
|
|
1700
|
+
cube_to_match.ds = cube_to_match.ds.rio.write_crs(cube_to_match.ds.proj4)
|
|
1701
|
+
self.ds = self.ds.rio.write_crs(self.ds.proj4)
|
|
1702
|
+
self.ds = self.ds.transpose("mid_date", "y", "x")
|
|
1703
|
+
|
|
1704
|
+
# Reproject coordinates
|
|
1705
|
+
if cube_to_match is not None:
|
|
1706
|
+
if interp_method == "nearest":
|
|
1707
|
+
self.ds = self.ds.rio.reproject_match(cube_to_match.ds, resampling=rasterio.enums.Resampling.nearest)
|
|
1708
|
+
elif interp_method == "bilinear":
|
|
1709
|
+
self.ds = self.ds.rio.reproject_match(cube_to_match.ds, resampling=rasterio.enums.Resampling.bilinear)
|
|
1710
|
+
if new_res is not None or new_proj is not None:
|
|
1711
|
+
print("The new projection has been defined according to cube_to_match.")
|
|
1712
|
+
elif new_res is None:
|
|
1713
|
+
self.ds = self.ds.rio.reproject(new_proj)
|
|
1714
|
+
else:
|
|
1715
|
+
self.ds = self.ds.rio.reproject(new_proj, resolution=new_res)
|
|
1716
|
+
|
|
1717
|
+
# Reject abnormal data (when the cube sizes are not the same and data are missing, the interpolation leads to infinite or nearly-infinite values)
|
|
1718
|
+
self.ds[["vx", "vy"]] = self.ds[["vx", "vy"]].where(
|
|
1719
|
+
(np.abs(self.ds["vx"].values) < 10000) | (np.abs(self.ds["vy"].values) < 10000), np.nan
|
|
1720
|
+
)
|
|
1721
|
+
|
|
1722
|
+
# Update of cube_data_classxr attributes
|
|
1723
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="pyproj") # prevent to have a warning
|
|
1724
|
+
if new_proj is None:
|
|
1725
|
+
new_proj = cube_to_match.ds.proj4
|
|
1726
|
+
self.ds = self.ds.assign_attrs({"proj4": new_proj})
|
|
1727
|
+
else:
|
|
1728
|
+
self.ds = self.ds.assign_attrs({"proj4": CRS.from_epsg(new_proj[5:]).to_proj4()})
|
|
1729
|
+
self.ds = self.ds.assign_coords({"x": self.ds.x, "y": self.ds.y})
|
|
1730
|
+
self.update_dimension()
|
|
1731
|
+
|
|
1732
|
+
def reproj_vel(
|
|
1733
|
+
self,
|
|
1734
|
+
new_proj: Optional[str] = None,
|
|
1735
|
+
cube_to_match: Optional["CubeDataClass"] = None,
|
|
1736
|
+
unit: int = 365,
|
|
1737
|
+
nb_cpu: int = 8,
|
|
1738
|
+
):
|
|
1739
|
+
"""
|
|
1740
|
+
Reproject the velocity vector in a new projection grid (i.e. the x and y variables are not changed, only vx and vy are modified).
|
|
1741
|
+
The new projection can be defined by the variable new_proj or by a cube stored in cube_to_match.
|
|
1742
|
+
|
|
1743
|
+
:param new_proj: [str] --- EPSG code of the new projection
|
|
1744
|
+
:param cube_to_match: [cube_data_class] --- cube used as a reference to reproject self
|
|
1745
|
+
:param unit: [int] [default is 365] --- 365 if the unit of the velocity are m/y, 1 if they are m/d
|
|
1746
|
+
:param nb_cpu: [int] [default is 8] --- number of CPUs used for the parallelization
|
|
1747
|
+
"""
|
|
1748
|
+
|
|
1749
|
+
if new_proj is None:
|
|
1750
|
+
if cube_to_match is not None:
|
|
1751
|
+
new_proj = cube_to_match.ds.proj4
|
|
1752
|
+
transformer = Transformer.from_crs(self.ds.proj4, new_proj)
|
|
1753
|
+
else:
|
|
1754
|
+
raise ValueError("Please provide new_proj or cube_to_match")
|
|
1755
|
+
else:
|
|
1756
|
+
transformer = Transformer.from_crs(self.ds.proj4, CRS.from_epsg(new_proj[5:]).to_proj4())
|
|
1757
|
+
|
|
1758
|
+
# Prepare grid and transformer
|
|
1759
|
+
grid = np.meshgrid(self.ds["x"], self.ds["y"])
|
|
1760
|
+
grid_transformed = transformer.transform(grid[0], grid[1])
|
|
1761
|
+
# temp = self.temp_base_()
|
|
1762
|
+
temp = np.array([30] * self.nz)
|
|
1763
|
+
|
|
1764
|
+
def transform_slice(z, temp, grid, transformer):
|
|
1765
|
+
"""Transform the velocity slice for a single time step."""
|
|
1766
|
+
# compute the coordinate for the ending point of the vector
|
|
1767
|
+
endx = (self.ds["vx"].isel(mid_date=z) * temp[z] / unit) + grid[0]
|
|
1768
|
+
endy = (self.ds["vy"].isel(mid_date=z) * temp[z] / unit) + grid[1]
|
|
1769
|
+
|
|
1770
|
+
# Transform final coordinates
|
|
1771
|
+
t = transformer.transform(endx, endy)
|
|
1772
|
+
# Compute differences in the new coordinate system
|
|
1773
|
+
vx = (grid_transformed[0] - t[0]) / temp[z] * unit
|
|
1774
|
+
vy = (t[1] - grid_transformed[1]) / temp[z] * unit
|
|
1775
|
+
|
|
1776
|
+
return vx, vy
|
|
1777
|
+
|
|
1778
|
+
results = np.array(
|
|
1779
|
+
Parallel(n_jobs=nb_cpu, verbose=0)(
|
|
1780
|
+
delayed(transform_slice, temp, grid, transformer)(z) for z in range(self.nz)
|
|
1781
|
+
)
|
|
1782
|
+
)
|
|
1783
|
+
# Unpack the results
|
|
1784
|
+
vx, vy = results[:, 0, :, :], results[:, 1, :, :]
|
|
1785
|
+
|
|
1786
|
+
# Updating DataArrays
|
|
1787
|
+
self.ds["vx"] = xr.DataArray(
|
|
1788
|
+
vx.astype("float32"),
|
|
1789
|
+
dims=["mid_date", "y", "x"],
|
|
1790
|
+
coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
|
|
1791
|
+
)
|
|
1792
|
+
self.ds["vx"].encoding = {"vx": {"dtype": "float32", "scale_factor": 0.1, "units": "m/y"}}
|
|
1793
|
+
|
|
1794
|
+
self.ds["vy"] = xr.DataArray(
|
|
1795
|
+
vy.astype("float32"),
|
|
1796
|
+
dims=["mid_date", "y", "x"],
|
|
1797
|
+
coords={"mid_date": self.ds.mid_date, "y": self.ds.y, "x": self.ds.x},
|
|
1798
|
+
)
|
|
1799
|
+
self.ds["vy"].encoding = {"vy": {"dtype": "float32", "scale_factor": 0.1, "units": "m/y"}}
|
|
1800
|
+
|
|
1801
|
+
del grid, transformer, temp, vx, vy
|
|
1802
|
+
|
|
1803
|
+
def align_cube(
|
|
1804
|
+
self,
|
|
1805
|
+
cube: "CubeDataClass",
|
|
1806
|
+
unit: int = 365,
|
|
1807
|
+
reproj_vel: bool = True,
|
|
1808
|
+
reproj_coord: bool = True,
|
|
1809
|
+
interp_method: str = "nearest",
|
|
1810
|
+
nb_cpu: int = 8,
|
|
1811
|
+
):
|
|
1812
|
+
"""
|
|
1813
|
+
Reproject cube to match the resolution, projection, and region of self.
|
|
1814
|
+
|
|
1815
|
+
:param cube: Cube to align to self
|
|
1816
|
+
:param unit: Unit of the velocities (365 for m/y, 1 for m/d) (default is 365)
|
|
1817
|
+
:param reproj_vel: Whether the velocity have to be reprojected or not -> it will modify their value (default is True)
|
|
1818
|
+
:param reproj_coord: Whether the coordinates have to be interpolated or not (using interp_method) (default is True)
|
|
1819
|
+
:param interp_method: Interpolation method used to reproject cube (default is 'nearest')
|
|
1820
|
+
:param nb_cpu: [int] [default is 8] --- number of CPUs used for the parallelization
|
|
1821
|
+
|
|
1822
|
+
:return: Cube projected to self
|
|
1823
|
+
"""
|
|
1824
|
+
# if the velocity components have to be reprojected in the new projection system
|
|
1825
|
+
if reproj_vel:
|
|
1826
|
+
cube.reproj_vel(cube_to_match=self, unit=unit, nb_cpu=nb_cpu)
|
|
1827
|
+
|
|
1828
|
+
# if the coordinates have to be reprojected in the new projection system
|
|
1829
|
+
if reproj_coord:
|
|
1830
|
+
cube.reproj_coord(cube_to_match=self)
|
|
1831
|
+
|
|
1832
|
+
cube.ds = cube.ds.assign_attrs({"author": f"{cube.ds.author} aligned"})
|
|
1833
|
+
cube.update_dimension()
|
|
1834
|
+
|
|
1835
|
+
return cube
|
|
1836
|
+
|
|
1837
|
+
def merge_cube(self, cube: "CubeDataClass"):
|
|
1838
|
+
"""
|
|
1839
|
+
Merge another cube to the present one. It must have been aligned first (using align_cube)
|
|
1840
|
+
|
|
1841
|
+
:param cube: [cube_data_class] --- The cube to be merged to self
|
|
1842
|
+
"""
|
|
1843
|
+
|
|
1844
|
+
# Merge the cubes (must be previously aligned before using align_cube)
|
|
1845
|
+
self.ds = xr.concat([self.ds, cube.ds.sel(x=self.ds["x"], y=self.ds["y"])], dim="mid_date")
|
|
1846
|
+
|
|
1847
|
+
# Update the attributes
|
|
1848
|
+
self.ds = self.ds.chunk(chunks={"mid_date": self.ds["mid_date"].size})
|
|
1849
|
+
self.nz = self.ds["mid_date"].size
|
|
1850
|
+
if (
|
|
1851
|
+
isinstance(self.filedir, list)
|
|
1852
|
+
and isinstance(self.filename, list)
|
|
1853
|
+
and isinstance(self.author, list)
|
|
1854
|
+
and isinstance(self.source, list)
|
|
1855
|
+
):
|
|
1856
|
+
self.filedir = [self.filedir]
|
|
1857
|
+
self.filename = [self.filename]
|
|
1858
|
+
self.author = [self.author]
|
|
1859
|
+
self.source = [self.source]
|
|
1860
|
+
self.filedir.append(cube.filedir)
|
|
1861
|
+
self.filename.append(cube.filename)
|
|
1862
|
+
self.author.append(cube.author)
|
|
1863
|
+
self.source.append(cube.source)
|
|
1864
|
+
|
|
1865
|
+
def average_cube(
|
|
1866
|
+
self,
|
|
1867
|
+
return_format: str = "geotiff",
|
|
1868
|
+
return_variable: list = ["vv"],
|
|
1869
|
+
save: bool = True,
|
|
1870
|
+
path_save: str | None = None,
|
|
1871
|
+
):
|
|
1872
|
+
"""
|
|
1873
|
+
Compute the mean velocity at each pixel of he cube.
|
|
1874
|
+
|
|
1875
|
+
:param return_format: [str] [default is 'geotiff'] --- Type of the file to be returned ('nc' or 'geotiff')
|
|
1876
|
+
:param return_variable: [list] [default is ['vv']] --- Which variable's mean must be returned
|
|
1877
|
+
:param save: [bool] [default is True] --- If True, save the file to path_save
|
|
1878
|
+
:param path_save: [str | None] [default is None] --- Path where to save the mean velocity file
|
|
1879
|
+
|
|
1880
|
+
:return: xr dataset, with vx_mean, the mean of vx and vy_mean the mean of vy
|
|
1881
|
+
"""
|
|
1882
|
+
time_dim = "mid_date" if "mid_date" in self.ds.dims else "time"
|
|
1883
|
+
vx_mean = self.ds["vx"].mean(dim=time_dim)
|
|
1884
|
+
vy_mean = self.ds["vy"].mean(dim=time_dim)
|
|
1885
|
+
dico_variable = {"vx": vx_mean, "vy": vy_mean}
|
|
1886
|
+
if "vv" in return_variable:
|
|
1887
|
+
vv_mean = np.sqrt(vx_mean**2 + vy_mean**2)
|
|
1888
|
+
dico_variable["vv"] = vv_mean
|
|
1889
|
+
|
|
1890
|
+
if return_format == "nc":
|
|
1891
|
+
ds_mean = xr.Dataset({})
|
|
1892
|
+
coords = {"y": self.ds.y, "x": self.ds.x}
|
|
1893
|
+
for variable in return_variable:
|
|
1894
|
+
ds_mean[f"{variable}_mean"] = xr.DataArray(dico_variable[variable], dims=["y", "x"], coords=coords)
|
|
1895
|
+
if save:
|
|
1896
|
+
ds_mean.to_netcdf(path_save)
|
|
1897
|
+
return ds_mean
|
|
1898
|
+
|
|
1899
|
+
elif return_format == "geotiff":
|
|
1900
|
+
ds_mean = []
|
|
1901
|
+
for variable in return_variable:
|
|
1902
|
+
mean_v = dico_variable[variable].to_numpy().astype(np.float32)
|
|
1903
|
+
mean_v = np.flip(mean_v.T, axis=0)
|
|
1904
|
+
|
|
1905
|
+
if save:
|
|
1906
|
+
# Create the GeoTIFF file
|
|
1907
|
+
with rasterio.open(
|
|
1908
|
+
f"{path_save}/mean_velocity_{variable}.tif",
|
|
1909
|
+
"w",
|
|
1910
|
+
driver="GTiff",
|
|
1911
|
+
height=mean_v.shape[0],
|
|
1912
|
+
width=mean_v.shape[1],
|
|
1913
|
+
count=1,
|
|
1914
|
+
dtype=str(mean_v.dtype),
|
|
1915
|
+
crs=CRS.from_proj4(self.ds.proj4),
|
|
1916
|
+
transform=self.ds.rio.transform(),
|
|
1917
|
+
) as dst:
|
|
1918
|
+
dst.write(mean_v, 1)
|
|
1919
|
+
|
|
1920
|
+
ds_mean.append(mean_v)
|
|
1921
|
+
|
|
1922
|
+
return ds_mean
|
|
1923
|
+
else:
|
|
1924
|
+
raise ValueError("Please enter geotiff or nc")
|
|
1925
|
+
|
|
1926
|
+
def compute_heatmap_moving(
|
|
1927
|
+
self,
|
|
1928
|
+
points_heatmap: pd.DataFrame,
|
|
1929
|
+
variable: str = "vv",
|
|
1930
|
+
method_interp: str = "linear",
|
|
1931
|
+
verbose: bool = False,
|
|
1932
|
+
freq: str = "MS",
|
|
1933
|
+
method: str = "mean",
|
|
1934
|
+
) -> pd.DataFrame:
|
|
1935
|
+
"""
|
|
1936
|
+
Compute a heatmap of the average monthly velocity, average all the velocities which are overlapping a given month
|
|
1937
|
+
|
|
1938
|
+
:param points_heatmap: Points where the heatmap is to be computed
|
|
1939
|
+
:param variable: What variable is to be computed ('vx', 'vy' or 'vv')
|
|
1940
|
+
:param method_interp: Interpolation method used to determine the value at a specified point from the discrete velocities data
|
|
1941
|
+
:param freq: frequency used in the pandas.date_range function (default: 'MS' every first day of the month)
|
|
1942
|
+
:param method: 'mean' or 'median'
|
|
1943
|
+
:param verbose: Print information throughout the process (default is False)
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
:return: pandas DataFrame, heatmap values where each line corresponds to a date and each row to a point of the line
|
|
1947
|
+
"""
|
|
1948
|
+
|
|
1949
|
+
date1 = self.date1_()
|
|
1950
|
+
date2 = self.date2_()
|
|
1951
|
+
# Create a DateTimeIndex range spanning from the minimum date to the maximum date
|
|
1952
|
+
date_range = pd.date_range(np.nanmin(date1), np.nanmax(date2), freq=freq) # 'MS' for start of each month
|
|
1953
|
+
data = np.column_stack((date1, date2)) # Combine date1 and date2 into a single 2D array
|
|
1954
|
+
# Sort data according to the first date
|
|
1955
|
+
data = np.ma.array(sorted(data, key=lambda date: date[0])) # Sort according to the first date
|
|
1956
|
+
|
|
1957
|
+
# Find the index of the dates that have to be averaged, to get the heatmap
|
|
1958
|
+
# Each value of the heatmap corresponds to an average of all the velocities which are overlapping a given period
|
|
1959
|
+
save_line = [[] for _ in range(len(date_range) - 1)]
|
|
1960
|
+
for i_date, _ in enumerate(date_range[:-1]):
|
|
1961
|
+
i = 0
|
|
1962
|
+
while i < data.shape[0] and date_range[i_date + 1] >= data[i, 0]:
|
|
1963
|
+
if date_range[i_date] <= data[i, 1]:
|
|
1964
|
+
save_line[i_date].append(i)
|
|
1965
|
+
i += 1
|
|
1966
|
+
interval_output = pd.Series(
|
|
1967
|
+
[(date_range[k + 1] - date_range[k]) / np.timedelta64(1, "D") for k in range(date_range.shape[0] - 1)]
|
|
1968
|
+
)
|
|
1969
|
+
dates_c = date_range[1:] - pd.to_timedelta((interval_output / 2).astype("int"), "D")
|
|
1970
|
+
del interval_output, date_range, data
|
|
1971
|
+
|
|
1972
|
+
def data_temporalpoint(k: int, points_heatmap):
|
|
1973
|
+
"""Get the data at a given spatial point contained in points_heatmap"""
|
|
1974
|
+
|
|
1975
|
+
geopoint = points_heatmap[
|
|
1976
|
+
"geometry"
|
|
1977
|
+
].iloc[
|
|
1978
|
+
k
|
|
1979
|
+
] # Return a point at the specified distance along a linear geometric object. # True -> interpretate k/n as fraction and not meters
|
|
1980
|
+
|
|
1981
|
+
i, j = geopoint.x, geopoint.y
|
|
1982
|
+
if verbose:
|
|
1983
|
+
print("i,j", i, j)
|
|
1984
|
+
|
|
1985
|
+
if variable == "vv":
|
|
1986
|
+
v = np.sqrt(
|
|
1987
|
+
self.ds["vx"].interp(x=i, y=j, method=method_interp).load() ** 2
|
|
1988
|
+
+ self.ds["vy"].interp(x=i, y=j, method="linear").load() ** 2
|
|
1989
|
+
)
|
|
1990
|
+
elif variable == "vx" or variable == "vy":
|
|
1991
|
+
v = self.ds[variable].interp(x=i, y=j, method=method_interp).load()
|
|
1992
|
+
|
|
1993
|
+
data = np.array([date1, date2, v.values], dtype=object).T
|
|
1994
|
+
data = np.ma.array(sorted(data, key=lambda date: date[0])) # Slort according to the first date
|
|
1995
|
+
|
|
1996
|
+
return data[:, 2]
|
|
1997
|
+
|
|
1998
|
+
for k in range(len(points_heatmap)):
|
|
1999
|
+
if verbose:
|
|
2000
|
+
print("k", k)
|
|
2001
|
+
|
|
2002
|
+
data = data_temporalpoint(k, points_heatmap)
|
|
2003
|
+
vvmasked = np.ma.masked_invalid(np.ma.array(data, dtype="float"))
|
|
2004
|
+
|
|
2005
|
+
if method == "mean":
|
|
2006
|
+
vvmean = [np.ma.mean(vvmasked[lines]) for lines in save_line]
|
|
2007
|
+
elif method == "median":
|
|
2008
|
+
vvmean = [np.ma.median(vvmasked[lines]) for lines in save_line]
|
|
2009
|
+
|
|
2010
|
+
vvdf = pd.DataFrame(vvmean, index=dates_c, columns=[points_heatmap["distance"].iloc[k] / 1000])
|
|
2011
|
+
|
|
2012
|
+
if k > 0:
|
|
2013
|
+
line_df_vv = pd.concat([line_df_vv, vvdf], join="inner", axis=1)
|
|
2014
|
+
else:
|
|
2015
|
+
line_df_vv = vvdf
|
|
2016
|
+
|
|
2017
|
+
return line_df_vv
|
|
2018
|
+
|
|
2019
|
+
# @jit(nopython=True)
|
|
2020
|
+
def nvvc(self, nb_cpu=8, verbose=True):
|
|
2021
|
+
"""
|
|
2022
|
+
Compute the Normalized Coherence Vector Velocity for every pixel of the cube.
|
|
2023
|
+
|
|
2024
|
+
"""
|
|
2025
|
+
|
|
2026
|
+
def ncvv_pixel(cube, i, j):
|
|
2027
|
+
return (
|
|
2028
|
+
np.sqrt(
|
|
2029
|
+
np.nansum(
|
|
2030
|
+
cube.ds["vx"].isel(x=i, y=j)
|
|
2031
|
+
/ np.sqrt(cube.ds["vx"].isel(x=i, y=j) ** 2 + cube.ds["vy"].isel(x=i, y=j) ** 2)
|
|
2032
|
+
)
|
|
2033
|
+
** 2
|
|
2034
|
+
+ np.nansum(
|
|
2035
|
+
cube.ds["vy"].isel(x=i, y=j)
|
|
2036
|
+
/ np.sqrt(cube.ds["vx"].isel(x=i, y=j) ** 2 + cube.ds["vy"].isel(x=i, y=j) ** 2)
|
|
2037
|
+
)
|
|
2038
|
+
** 2
|
|
2039
|
+
)
|
|
2040
|
+
/ cube.nz
|
|
2041
|
+
)
|
|
2042
|
+
|
|
2043
|
+
xy_values = itertools.product(range(self.nx), range(self.ny))
|
|
2044
|
+
xy_values_tqdm = tqdm(xy_values, total=self.nx * self.ny, mininterval=0.5)
|
|
2045
|
+
|
|
2046
|
+
return np.array(
|
|
2047
|
+
Parallel(n_jobs=nb_cpu, verbose=0)(
|
|
2048
|
+
delayed(ncvv_pixel)(self, i, j) for i, j in (xy_values_tqdm if verbose else xy_values)
|
|
2049
|
+
)
|
|
2050
|
+
).reshape(self.nx, self.ny)
|
|
2051
|
+
|
|
2052
|
+
def compute_med_stable_areas(
|
|
2053
|
+
self, shapefile_path, return_as="dataframe", stat_name="med", var_list=["vx", "vy"], invert=True
|
|
2054
|
+
):
|
|
2055
|
+
"""
|
|
2056
|
+
Compute MAD per time step using Dask and apply_ufunc over a shapefile-defined area.
|
|
2057
|
+
|
|
2058
|
+
Parameters:
|
|
2059
|
+
|
|
2060
|
+
shapefile_path (str): Path to shapefile.
|
|
2061
|
+
return_as (str): 'dataframe' or 'cube'.
|
|
2062
|
+
stat_name (str): Base variable name for new data.
|
|
2063
|
+
invert (bool): Whether to invert the shapefile mask.
|
|
2064
|
+
|
|
2065
|
+
Returns:
|
|
2066
|
+
pd.DataFrame or xr.Dataset
|
|
2067
|
+
"""
|
|
2068
|
+
# Ensure data has Dask chunks
|
|
2069
|
+
# self.ds = self.ds.chunk({'y': -1, 'x': -1, 'mid_date': 10})
|
|
2070
|
+
print(var_list)
|
|
2071
|
+
# Clip with shapefile
|
|
2072
|
+
gdf = gpd.read_file(shapefile_path)
|
|
2073
|
+
gdf = gdf.to_crs(self.ds.rio.crs)
|
|
2074
|
+
masked = self.ds.rio.clip(gdf.geometry, gdf.crs, drop=False, all_touched=True, invert=invert)
|
|
2075
|
+
|
|
2076
|
+
print("Clipped")
|
|
2077
|
+
|
|
2078
|
+
# Return as DataFrame
|
|
2079
|
+
if return_as == "dataframe":
|
|
2080
|
+
df_vx = (
|
|
2081
|
+
masked["vx"]
|
|
2082
|
+
.median(dim=["x", "y"])
|
|
2083
|
+
.compute()
|
|
2084
|
+
.to_dataframe(name=f"{stat_name}_vx")
|
|
2085
|
+
.reset_index()[["mid_date", f"{stat_name}_vx"]]
|
|
2086
|
+
)
|
|
2087
|
+
df_vy = (
|
|
2088
|
+
masked["vy"]
|
|
2089
|
+
.median(dim=["x", "y"])
|
|
2090
|
+
.compute()
|
|
2091
|
+
.to_dataframe(name=f"{stat_name}_vy")
|
|
2092
|
+
.reset_index()[["mid_date", f"{stat_name}_vy"]]
|
|
2093
|
+
)
|
|
2094
|
+
if len(var_list) == 3:
|
|
2095
|
+
df_v = (
|
|
2096
|
+
masked[var_list[2]]
|
|
2097
|
+
.median(dim=["x", "y"])
|
|
2098
|
+
.compute()
|
|
2099
|
+
.to_dataframe(name=f"{stat_name}_v")
|
|
2100
|
+
.reset_index()[["mid_date", f"{stat_name}_v"]]
|
|
2101
|
+
)
|
|
2102
|
+
|
|
2103
|
+
# Merge on time coordinate (e.g., 'mid_date')
|
|
2104
|
+
if len(var_list) == 3:
|
|
2105
|
+
merged_df = reduce(
|
|
2106
|
+
lambda left, right: pd.merge(left, right, on="mid_date", how="outer"), [df_vx, df_vy, df_v]
|
|
2107
|
+
)
|
|
2108
|
+
else:
|
|
2109
|
+
merged_df = pd.merge(df_vx, df_vy, on="mid_date")
|
|
2110
|
+
|
|
2111
|
+
return merged_df
|
|
2112
|
+
|
|
2113
|
+
# # Return as cube
|
|
2114
|
+
# elif return_as == 'cube':
|
|
2115
|
+
# return self.assign({f'{stat_name}_vx': mad_results['vx'], f'{stat_name}_vy': mad_results['vy']})
|
|
2116
|
+
|
|
2117
|
+
else:
|
|
2118
|
+
raise ValueError("return_as must be 'dataframe' or 'cube'")
|
|
2119
|
+
|
|
2120
|
+
def compute_mad(self, shapefile_path, return_as="dataframe", stat_name="mad", var_list=["vx", "vy"], invert=True):
|
|
2121
|
+
"""
|
|
2122
|
+
Compute MAD per time step using Dask and apply_ufunc over a shapefile-defined area.
|
|
2123
|
+
|
|
2124
|
+
Parameters:
|
|
2125
|
+
|
|
2126
|
+
shapefile_path (str): Path to shapefile.
|
|
2127
|
+
return_as (str): 'dataframe' or 'cube'.
|
|
2128
|
+
stat_name (str): Base variable name for new data.
|
|
2129
|
+
invert (bool): Whether to invert the shapefile mask.
|
|
2130
|
+
|
|
2131
|
+
Returns:
|
|
2132
|
+
pd.DataFrame or xr.Dataset
|
|
2133
|
+
"""
|
|
2134
|
+
# Ensure data has Dask chunks
|
|
2135
|
+
self.ds = self.ds.chunk({"y": -1, "x": -1, "mid_date": 10})
|
|
2136
|
+
print(var_list)
|
|
2137
|
+
# Clip with shapefile
|
|
2138
|
+
gdf = gpd.read_file(shapefile_path)
|
|
2139
|
+
gdf = gdf.to_crs(self.ds.rio.crs)
|
|
2140
|
+
masked = self.ds.rio.clip(gdf.geometry, gdf.crs, drop=False, all_touched=True, invert=invert)
|
|
2141
|
+
|
|
2142
|
+
print("Clipped")
|
|
2143
|
+
|
|
2144
|
+
# Define MAD function
|
|
2145
|
+
def mad_2d(arr):
|
|
2146
|
+
median = np.nanmedian(arr)
|
|
2147
|
+
return 1.483 * np.nanmedian(np.abs(arr - median))
|
|
2148
|
+
|
|
2149
|
+
mad_results = {} # Store MAD DataArrays
|
|
2150
|
+
|
|
2151
|
+
for var in var_list:
|
|
2152
|
+
data = masked[var]
|
|
2153
|
+
|
|
2154
|
+
mad = xr.apply_ufunc(
|
|
2155
|
+
mad_2d,
|
|
2156
|
+
data,
|
|
2157
|
+
input_core_dims=[["y", "x"]],
|
|
2158
|
+
output_core_dims=[[]],
|
|
2159
|
+
vectorize=True,
|
|
2160
|
+
dask="parallelized",
|
|
2161
|
+
output_dtypes=[data.dtype],
|
|
2162
|
+
)
|
|
2163
|
+
|
|
2164
|
+
mad.name = f"{stat_name}_{var}"
|
|
2165
|
+
mad_results[var] = mad
|
|
2166
|
+
|
|
2167
|
+
# Return as DataFrame
|
|
2168
|
+
if return_as == "dataframe":
|
|
2169
|
+
df_vx = (
|
|
2170
|
+
mad_results["vx"]
|
|
2171
|
+
.compute()
|
|
2172
|
+
.to_dataframe(name=f"{stat_name}_vx")
|
|
2173
|
+
.reset_index()[["mid_date", f"{stat_name}_vx"]]
|
|
2174
|
+
)
|
|
2175
|
+
df_vy = (
|
|
2176
|
+
mad_results["vy"]
|
|
2177
|
+
.compute()
|
|
2178
|
+
.to_dataframe(name=f"{stat_name}_vy")
|
|
2179
|
+
.reset_index()[["mid_date", f"{stat_name}_vy"]]
|
|
2180
|
+
)
|
|
2181
|
+
if len(var_list) == 3:
|
|
2182
|
+
df_v = (
|
|
2183
|
+
mad_results[var_list[2]]
|
|
2184
|
+
.compute()
|
|
2185
|
+
.to_dataframe(name=f"{stat_name}_v")
|
|
2186
|
+
.reset_index()[["mid_date", f"{stat_name}_v"]]
|
|
2187
|
+
)
|
|
2188
|
+
|
|
2189
|
+
# Merge on time coordinate (e.g., 'mid_date')
|
|
2190
|
+
if len(var_list) == 3:
|
|
2191
|
+
merged_df = reduce(
|
|
2192
|
+
lambda left, right: pd.merge(left, right, on="mid_date", how="outer"), [df_vx, df_vy, df_v]
|
|
2193
|
+
)
|
|
2194
|
+
else:
|
|
2195
|
+
merged_df = pd.merge(df_vx, df_vy, on="mid_date")
|
|
2196
|
+
|
|
2197
|
+
return merged_df
|
|
2198
|
+
|
|
2199
|
+
# Return as cube
|
|
2200
|
+
elif return_as == "cube":
|
|
2201
|
+
return self.assign({f"{stat_name}_vx": mad_results["vx"], f"{stat_name}_vy": mad_results["vy"]})
|
|
2202
|
+
|
|
2203
|
+
else:
|
|
2204
|
+
raise ValueError("return_as must be 'dataframe' or 'cube'")
|