tfv-get-tools 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tfv_get_tools/__init__.py +4 -0
- tfv_get_tools/_standard_attrs.py +107 -0
- tfv_get_tools/atmos.py +167 -0
- tfv_get_tools/cli/_cli_base.py +173 -0
- tfv_get_tools/cli/atmos_cli.py +192 -0
- tfv_get_tools/cli/ocean_cli.py +204 -0
- tfv_get_tools/cli/tide_cli.py +118 -0
- tfv_get_tools/cli/wave_cli.py +183 -0
- tfv_get_tools/fvc/__init__.py +3 -0
- tfv_get_tools/fvc/_atmos.py +230 -0
- tfv_get_tools/fvc/_fvc.py +218 -0
- tfv_get_tools/fvc/_ocean.py +171 -0
- tfv_get_tools/fvc/_tide.py +195 -0
- tfv_get_tools/ocean.py +170 -0
- tfv_get_tools/providers/__init__.py +0 -0
- tfv_get_tools/providers/_custom_conversions.py +34 -0
- tfv_get_tools/providers/_downloader.py +566 -0
- tfv_get_tools/providers/_merger.py +520 -0
- tfv_get_tools/providers/_utilities.py +255 -0
- tfv_get_tools/providers/atmos/barra2.py +209 -0
- tfv_get_tools/providers/atmos/cfgs/barra2_c2.yaml +52 -0
- tfv_get_tools/providers/atmos/cfgs/barra2_r2.yaml +85 -0
- tfv_get_tools/providers/atmos/cfgs/barra2_re2.yaml +70 -0
- tfv_get_tools/providers/atmos/cfgs/cfsr.yaml +68 -0
- tfv_get_tools/providers/atmos/cfgs/era5.yaml +77 -0
- tfv_get_tools/providers/atmos/cfgs/era5_gcp.yaml +77 -0
- tfv_get_tools/providers/atmos/cfsr.py +207 -0
- tfv_get_tools/providers/atmos/era5.py +20 -0
- tfv_get_tools/providers/atmos/era5_gcp.py +20 -0
- tfv_get_tools/providers/ocean/cfgs/copernicus_blk.yaml +64 -0
- tfv_get_tools/providers/ocean/cfgs/copernicus_glo.yaml +67 -0
- tfv_get_tools/providers/ocean/cfgs/copernicus_nws.yaml +62 -0
- tfv_get_tools/providers/ocean/cfgs/hycom.yaml +73 -0
- tfv_get_tools/providers/ocean/copernicus_ocean.py +457 -0
- tfv_get_tools/providers/ocean/hycom.py +611 -0
- tfv_get_tools/providers/wave/cawcr.py +166 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_aus_10m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_aus_4m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_glob_24m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_pac_10m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_pac_4m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/copernicus_glo.yaml +56 -0
- tfv_get_tools/providers/wave/cfgs/copernicus_nws.yaml +51 -0
- tfv_get_tools/providers/wave/cfgs/era5.yaml +48 -0
- tfv_get_tools/providers/wave/cfgs/era5_gcp.yaml +48 -0
- tfv_get_tools/providers/wave/copernicus_wave.py +38 -0
- tfv_get_tools/providers/wave/era5.py +232 -0
- tfv_get_tools/providers/wave/era5_gcp.py +169 -0
- tfv_get_tools/tide/__init__.py +2 -0
- tfv_get_tools/tide/_nodestring.py +214 -0
- tfv_get_tools/tide/_tidal_base.py +568 -0
- tfv_get_tools/utilities/_tfv_bc.py +78 -0
- tfv_get_tools/utilities/horizontal_padding.py +89 -0
- tfv_get_tools/utilities/land_masking.py +93 -0
- tfv_get_tools/utilities/parsers.py +44 -0
- tfv_get_tools/utilities/warnings.py +38 -0
- tfv_get_tools/wave.py +179 -0
- tfv_get_tools-0.2.0.dist-info/METADATA +286 -0
- tfv_get_tools-0.2.0.dist-info/RECORD +62 -0
- tfv_get_tools-0.2.0.dist-info/WHEEL +5 -0
- tfv_get_tools-0.2.0.dist-info/entry_points.txt +5 -0
- tfv_get_tools-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
_BASE_URL: "https://thredds.rda.ucar.edu/thredds/dodsC/files/g"
|
|
2
|
+
_INFO_URL: https://climatedataguide.ucar.edu/climate-data/climate-forecast-system-reanalysis-cfsr
|
|
3
|
+
|
|
4
|
+
_SOURCE_XLIMS: [-180.0, 359.5]
|
|
5
|
+
_SOURCE_YLIMS: [-90.0, 90.0]
|
|
6
|
+
_SOURCE_TIMELIMS: ["1979-01-01 00:00:00", null]
|
|
7
|
+
|
|
8
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
9
|
+
|
|
10
|
+
# Default variables to extract if no `variables` arg is provided.
|
|
11
|
+
_VARIABLES: ["wnd10m", "prmsl", "dlwsfc", "dswsfc", "tmp2m", "rh2m", "prate"]
|
|
12
|
+
|
|
13
|
+
# This CFSR dataset mapping links the dataset id and system name to the valid date.
|
|
14
|
+
# The full opendap url takes the form <base_url>/<dset_id>/<var>.<sys>.<datestr>.grb2
|
|
15
|
+
_DATASETS: {
|
|
16
|
+
1979-01-01: { d093001-gdas: default },
|
|
17
|
+
2011-01-01: { d094001-gdas: default },
|
|
18
|
+
2011-04-01: { d094001-cdas1: default },
|
|
19
|
+
} # CFSR Selected Hourly # CFSv2 Selected Hourly on GDAS # CFSv2 Selected Hourly on CDAS1
|
|
20
|
+
|
|
21
|
+
# For CFSR, the merged grid will be based on the u10 variable
|
|
22
|
+
u10:
|
|
23
|
+
source_var: "u-component_of_wind_height_above_ground"
|
|
24
|
+
tfv_var: "W10_GRID"
|
|
25
|
+
bc_scale: 1
|
|
26
|
+
bc_offset: 0
|
|
27
|
+
|
|
28
|
+
v10:
|
|
29
|
+
source_var: "v-component_of_wind_height_above_ground"
|
|
30
|
+
tfv_var: "W10_GRID"
|
|
31
|
+
bc_scale: 1
|
|
32
|
+
bc_offset: 0
|
|
33
|
+
|
|
34
|
+
mslp:
|
|
35
|
+
source_var: "Pressure_reduced_to_MSL_msl"
|
|
36
|
+
tfv_var: "MSLP_GRID"
|
|
37
|
+
bc_scale: 0.01
|
|
38
|
+
bc_offset: 0
|
|
39
|
+
|
|
40
|
+
dlwrf:
|
|
41
|
+
source_var: "Downward_Long-Wave_Radp_Flux_surface_Mixed_intervals_Average"
|
|
42
|
+
tfv_var: "LW_RAD_GRID"
|
|
43
|
+
bc_scale: 1
|
|
44
|
+
bc_offset: 0
|
|
45
|
+
|
|
46
|
+
dswrf:
|
|
47
|
+
source_var: "Downward_Short-Wave_Radiation_Flux_surface_Mixed_intervals_Average"
|
|
48
|
+
tfv_var: "SW_RAD_GRID"
|
|
49
|
+
bc_scale: 1
|
|
50
|
+
bc_offset: 0
|
|
51
|
+
|
|
52
|
+
t2m:
|
|
53
|
+
source_var: "Temperature_height_above_ground"
|
|
54
|
+
tfv_var: "AIR_TEMP_GRID"
|
|
55
|
+
bc_scale: 1
|
|
56
|
+
bc_offset: -273.15
|
|
57
|
+
|
|
58
|
+
relhum:
|
|
59
|
+
source_var: "Relative_humidity_height_above_ground"
|
|
60
|
+
tfv_var: "REL_HUM_GRID"
|
|
61
|
+
bc_scale: 1
|
|
62
|
+
bc_offset: 0
|
|
63
|
+
|
|
64
|
+
prate:
|
|
65
|
+
source_var: "Precipitation_rate_surface_Mixed_intervals_Average"
|
|
66
|
+
tfv_var: PRECIP_GRID
|
|
67
|
+
bc_scale: 86.4 # Convert from kg/m2/s to mm/day
|
|
68
|
+
bc_offset: 0
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
_BASE_URL: "N/A"
|
|
2
|
+
_INFO_URL: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels
|
|
3
|
+
|
|
4
|
+
_SOURCE_XLIMS: [-180, 360]
|
|
5
|
+
_SOURCE_YLIMS: [-90, 90]
|
|
6
|
+
_SOURCE_TIMELIMS: ["1940-01-01 00:00:00", null]
|
|
7
|
+
|
|
8
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
9
|
+
|
|
10
|
+
# Default variables to extract if no `variables` arg is provided.
|
|
11
|
+
_VARIABLES:
|
|
12
|
+
[
|
|
13
|
+
"10m_u_component_of_wind",
|
|
14
|
+
"10m_v_component_of_wind",
|
|
15
|
+
"mean_sea_level_pressure",
|
|
16
|
+
"mean_surface_downward_long_wave_radiation_flux",
|
|
17
|
+
"mean_surface_downward_short_wave_radiation_flux",
|
|
18
|
+
"2m_temperature",
|
|
19
|
+
"mean_total_precipitation_rate",
|
|
20
|
+
"2m_dewpoint_temperature",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# ERA5 only has the single single-levels dataset.
|
|
24
|
+
_DATASETS: { 1940-01-01: { reanalysis-era5-single-levels: default } }
|
|
25
|
+
|
|
26
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
27
|
+
# source var == var as in the original dataset
|
|
28
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
29
|
+
|
|
30
|
+
u10:
|
|
31
|
+
source_var: "u10"
|
|
32
|
+
tfv_var: "W10_GRID"
|
|
33
|
+
bc_scale: 1
|
|
34
|
+
bc_offset: 0
|
|
35
|
+
|
|
36
|
+
v10:
|
|
37
|
+
source_var: "v10"
|
|
38
|
+
tfv_var: "W10_GRID"
|
|
39
|
+
bc_scale: 1
|
|
40
|
+
bc_offset: 0
|
|
41
|
+
|
|
42
|
+
mslp:
|
|
43
|
+
source_var: "msl"
|
|
44
|
+
tfv_var: "MSLP_GRID"
|
|
45
|
+
bc_scale: 0.01
|
|
46
|
+
bc_offset: 0
|
|
47
|
+
|
|
48
|
+
dlwrf:
|
|
49
|
+
source_var: "msdwlwrf"
|
|
50
|
+
tfv_var: LW_RAD_GRID
|
|
51
|
+
bc_scale: 1
|
|
52
|
+
bc_offset: 0
|
|
53
|
+
|
|
54
|
+
dswrf:
|
|
55
|
+
source_var: "msdwswrf"
|
|
56
|
+
tfv_var: SW_RAD_GRID
|
|
57
|
+
bc_scale: 1
|
|
58
|
+
bc_offset: 0
|
|
59
|
+
|
|
60
|
+
t2m:
|
|
61
|
+
source_var: "t2m"
|
|
62
|
+
tfv_var: AIR_TEMP_GRID
|
|
63
|
+
bc_scale: 1
|
|
64
|
+
bc_offset: 0
|
|
65
|
+
|
|
66
|
+
prate:
|
|
67
|
+
source_var: "mtpr"
|
|
68
|
+
tfv_var: PRECIP_GRID
|
|
69
|
+
bc_scale: 86.4 # Convert from kg/m2/s to mm/day
|
|
70
|
+
bc_offset: 0
|
|
71
|
+
|
|
72
|
+
relhum:
|
|
73
|
+
source_var: d2m
|
|
74
|
+
tfv_var: "relative_humidity"
|
|
75
|
+
bc_scale: 1
|
|
76
|
+
bc_offset: 0
|
|
77
|
+
pre_process: dewpt_to_relhum(d2m, t2m)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
_BASE_URL: "gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3"
|
|
2
|
+
_INFO_URL: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels
|
|
3
|
+
|
|
4
|
+
_SOURCE_XLIMS: [-180, 360]
|
|
5
|
+
_SOURCE_YLIMS: [-90, 90]
|
|
6
|
+
_SOURCE_TIMELIMS: ["1940-01-01 00:00:00", null]
|
|
7
|
+
|
|
8
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
9
|
+
|
|
10
|
+
# Default variables to extract if no `variables` arg is provided.
|
|
11
|
+
_VARIABLES:
|
|
12
|
+
[
|
|
13
|
+
"10m_u_component_of_wind",
|
|
14
|
+
"10m_v_component_of_wind",
|
|
15
|
+
"mean_sea_level_pressure",
|
|
16
|
+
"mean_surface_downward_long_wave_radiation_flux",
|
|
17
|
+
"mean_surface_downward_short_wave_radiation_flux",
|
|
18
|
+
"2m_temperature",
|
|
19
|
+
"mean_total_precipitation_rate",
|
|
20
|
+
"2m_dewpoint_temperature",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# ERA5 only has the single single-levels dataset.
|
|
24
|
+
_DATASETS: { 1940-01-01: { reanalysis-era5-single-levels: default } }
|
|
25
|
+
|
|
26
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
27
|
+
# source var == var as in the original dataset
|
|
28
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
29
|
+
|
|
30
|
+
u10:
|
|
31
|
+
source_var: "u10"
|
|
32
|
+
tfv_var: "W10_GRID"
|
|
33
|
+
bc_scale: 1
|
|
34
|
+
bc_offset: 0
|
|
35
|
+
|
|
36
|
+
v10:
|
|
37
|
+
source_var: "v10"
|
|
38
|
+
tfv_var: "W10_GRID"
|
|
39
|
+
bc_scale: 1
|
|
40
|
+
bc_offset: 0
|
|
41
|
+
|
|
42
|
+
mslp:
|
|
43
|
+
source_var: "msl"
|
|
44
|
+
tfv_var: "MSLP_GRID"
|
|
45
|
+
bc_scale: 0.01
|
|
46
|
+
bc_offset: 0
|
|
47
|
+
|
|
48
|
+
dlwrf:
|
|
49
|
+
source_var: "msdwlwrf"
|
|
50
|
+
tfv_var: LW_RAD_GRID
|
|
51
|
+
bc_scale: 1
|
|
52
|
+
bc_offset: 0
|
|
53
|
+
|
|
54
|
+
dswrf:
|
|
55
|
+
source_var: "msdwswrf"
|
|
56
|
+
tfv_var: SW_RAD_GRID
|
|
57
|
+
bc_scale: 1
|
|
58
|
+
bc_offset: 0
|
|
59
|
+
|
|
60
|
+
t2m:
|
|
61
|
+
source_var: "t2m"
|
|
62
|
+
tfv_var: AIR_TEMP_GRID
|
|
63
|
+
bc_scale: 1
|
|
64
|
+
bc_offset: 0
|
|
65
|
+
|
|
66
|
+
prate:
|
|
67
|
+
source_var: "mtpr"
|
|
68
|
+
tfv_var: PRECIP_GRID
|
|
69
|
+
bc_scale: 86.4 # Convert from kg/m2/s to mm/day
|
|
70
|
+
bc_offset: 0
|
|
71
|
+
|
|
72
|
+
relhum:
|
|
73
|
+
source_var: d2m
|
|
74
|
+
tfv_var: "relative_humidity"
|
|
75
|
+
bc_scale: 1
|
|
76
|
+
bc_offset: 0
|
|
77
|
+
pre_process: dewpt_to_relhum(d2m, t2m)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFSR/CFSv2
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import xarray as xr
|
|
11
|
+
from pandas.tseries.offsets import MonthEnd
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
|
|
14
|
+
from tfv_get_tools.providers._downloader import BaseDownloader
|
|
15
|
+
from tfv_get_tools.providers._merger import BaseMerger
|
|
16
|
+
from tfv_get_tools.providers._utilities import todstr
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DownloadCFSRAtmos(BaseDownloader):
|
|
20
|
+
"""CFSR downloader - only the source-specific parts"""
|
|
21
|
+
|
|
22
|
+
def _init_specific(self, **kwargs):
|
|
23
|
+
"""Set source and mode - that's it"""
|
|
24
|
+
self.source = "CFSR"
|
|
25
|
+
self.mode = "ATMOS"
|
|
26
|
+
self._load_config()
|
|
27
|
+
|
|
28
|
+
def _get_output_filename(self, ts: pd.Timestamp, te: pd.Timestamp, var: str) -> Path:
|
|
29
|
+
"""CFSR-specific filename pattern"""
|
|
30
|
+
return self.outdir / f"{self.prefix}_{var}_{todstr(ts)}_{todstr(te)}.nc"
|
|
31
|
+
|
|
32
|
+
def _construct_opendap_url(self, date: pd.Timestamp, var: str) -> str:
|
|
33
|
+
"""CFSR-specific URL construction"""
|
|
34
|
+
dataset_dates = list(self.dsmap.keys())
|
|
35
|
+
idx = np.where([date.date() >= x for x in dataset_dates])[0][-1]
|
|
36
|
+
dataset_time = dataset_dates[idx]
|
|
37
|
+
|
|
38
|
+
ds_entry = list(self.dsmap[dataset_time].keys())[0]
|
|
39
|
+
dataset_id, sys = ds_entry.split("-")
|
|
40
|
+
|
|
41
|
+
datestr = date.strftime("%Y%m")
|
|
42
|
+
year = date.year
|
|
43
|
+
|
|
44
|
+
fname = f"{var}.{sys}.{datestr}.grb2"
|
|
45
|
+
url = f"{self.base_url}/{dataset_id}/{year}/{fname}"
|
|
46
|
+
return url
|
|
47
|
+
|
|
48
|
+
def _download_single_file(self, fname: Path, url: str) -> bool:
|
|
49
|
+
"""CFSR-specific download and processing"""
|
|
50
|
+
try:
|
|
51
|
+
ds = xr.open_dataset(url)
|
|
52
|
+
|
|
53
|
+
# Handle longitude selection (CFSR-specific logic)
|
|
54
|
+
if self.xlims[0] < self.xlims[1]:
|
|
55
|
+
lon_idx = np.hstack(
|
|
56
|
+
np.where(
|
|
57
|
+
(self.xlims[0] <= ds["lon"])
|
|
58
|
+
& (ds["lon"] <= self.xlims[1])
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
lon_idx = np.hstack(
|
|
63
|
+
(
|
|
64
|
+
np.where(ds["lon"] <= self.xlims[1])[0],
|
|
65
|
+
np.where(ds["lon"] >= self.xlims[0])[0],
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
assert lon_idx.size > 1, "No longitude points selected!"
|
|
70
|
+
|
|
71
|
+
# Select latitude indices
|
|
72
|
+
lat_idx = np.hstack(
|
|
73
|
+
np.where(
|
|
74
|
+
(self.ylims[0] <= ds["lat"])
|
|
75
|
+
& (ds["lat"] <= self.ylims[1])
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
assert lat_idx.size > 1, "No latitude points selected!"
|
|
79
|
+
|
|
80
|
+
# Subset and save
|
|
81
|
+
ds = ds.isel(lon=lon_idx, lat=lat_idx)
|
|
82
|
+
ds.to_netcdf(fname)
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
if self.verbose:
|
|
87
|
+
print(f"Failed to download {url}: {e}")
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
def download(self):
|
|
91
|
+
"""CFSR-specific download loop - only the parts that differ from base"""
|
|
92
|
+
# The base class handles result tracking, progress, etc.
|
|
93
|
+
# We just need to yield the download tasks
|
|
94
|
+
|
|
95
|
+
for ts in self.times:
|
|
96
|
+
te = ts + MonthEnd() + pd.Timedelta("23.9h")
|
|
97
|
+
|
|
98
|
+
for var in self.variables:
|
|
99
|
+
out_file = self._get_output_filename(ts, te, var)
|
|
100
|
+
url = self._construct_opendap_url(ts, var)
|
|
101
|
+
|
|
102
|
+
# Let base class handle the file existence check, timing, etc.
|
|
103
|
+
yield {
|
|
104
|
+
'file_path': out_file,
|
|
105
|
+
'url': url,
|
|
106
|
+
'timestamp': ts,
|
|
107
|
+
'variable': var,
|
|
108
|
+
'download_func': lambda f=out_file, u=url: self._download_single_file(f, u)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
class MergeCFSRAtmos(BaseMerger):
|
|
112
|
+
def _init_specific(self) -> None:
|
|
113
|
+
self.source = "CFSR"
|
|
114
|
+
self.mode = "ATMOS"
|
|
115
|
+
self._load_config()
|
|
116
|
+
|
|
117
|
+
def _extract_target_coordinates(self, datasets: List[xr.Dataset]) -> Tuple[np.ndarray, np.ndarray]:
|
|
118
|
+
"""Extract appropriate wind coordinates, preferring CFSv2 over CFSR."""
|
|
119
|
+
cfsv2_coords = None
|
|
120
|
+
cfsr_coords = None
|
|
121
|
+
|
|
122
|
+
for ds in datasets:
|
|
123
|
+
if ds is None or 'u-component_of_wind_height_above_ground' not in ds:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
time_values = pd.to_datetime(ds['time'].values)
|
|
127
|
+
first_time = time_values[0] if len(time_values) > 0 else pd.Timestamp.min
|
|
128
|
+
|
|
129
|
+
if first_time >= pd.Timestamp(2011, 1, 1):
|
|
130
|
+
cfsv2_coords = (ds['lon'].values, ds['lat'].values)
|
|
131
|
+
else:
|
|
132
|
+
cfsr_coords = (ds['lon'].values, ds['lat'].values)
|
|
133
|
+
|
|
134
|
+
# Prefer CFSv2, fallback to CFSR, then any available
|
|
135
|
+
return cfsv2_coords or cfsr_coords or (datasets[0]['lon'].values, datasets[0]['lat'].values)
|
|
136
|
+
|
|
137
|
+
def _process_file(self, file_path: Path) -> Optional[xr.Dataset]:
|
|
138
|
+
"""Load file and filter 30-minute data."""
|
|
139
|
+
ds = self._open_subset_netcdf(file_path, chunks=dict(time=24))
|
|
140
|
+
if ds is not None:
|
|
141
|
+
# Filter out 30-minute data points
|
|
142
|
+
time_values = pd.to_datetime(ds['time'].values)
|
|
143
|
+
valid_indices = [i for i, t in enumerate(time_values) if t.minute != 30]
|
|
144
|
+
ds = ds.isel(time=valid_indices)
|
|
145
|
+
return ds
|
|
146
|
+
|
|
147
|
+
def merge_files(self, file_list: List[Path]) -> Tuple[xr.Dataset, List[Path]]:
|
|
148
|
+
"""Merge CFSR files: group by startdate, merge variables, then concat time."""
|
|
149
|
+
if not file_list:
|
|
150
|
+
raise ValueError("No files provided for merging")
|
|
151
|
+
|
|
152
|
+
# Group files by start date
|
|
153
|
+
startdates = [f.stem.split('_')[-2] for f in file_list]
|
|
154
|
+
grouped_files = {date: [] for date in np.unique(startdates)}
|
|
155
|
+
|
|
156
|
+
# Load files and group them
|
|
157
|
+
all_datasets = []
|
|
158
|
+
skipped_files = []
|
|
159
|
+
|
|
160
|
+
for i, file_path in enumerate(tqdm(file_list, disable=not self.verbose)):
|
|
161
|
+
ds = self._process_file(file_path)
|
|
162
|
+
if ds is not None:
|
|
163
|
+
grouped_files[startdates[i]].append(ds)
|
|
164
|
+
all_datasets.append(ds)
|
|
165
|
+
else:
|
|
166
|
+
skipped_files.append(file_path)
|
|
167
|
+
|
|
168
|
+
if not all_datasets:
|
|
169
|
+
raise ValueError("No valid datasets could be loaded")
|
|
170
|
+
|
|
171
|
+
# Extract target coordinates for interpolation
|
|
172
|
+
target_lon, target_lat = self._extract_target_coordinates(all_datasets)
|
|
173
|
+
|
|
174
|
+
if self.verbose:
|
|
175
|
+
print("Concatenating and interpolating xarray dataset")
|
|
176
|
+
|
|
177
|
+
# Merge variables for each start date group
|
|
178
|
+
merged_by_date = []
|
|
179
|
+
for date_group in grouped_files.values():
|
|
180
|
+
if date_group:
|
|
181
|
+
# Interpolate all datasets to common grid
|
|
182
|
+
interpolated = [
|
|
183
|
+
ds.interp(lon=target_lon, lat=target_lat, method='linear',
|
|
184
|
+
kwargs=dict(fill_value='extrapolate'))
|
|
185
|
+
for ds in date_group
|
|
186
|
+
]
|
|
187
|
+
merged_by_date.append(xr.merge(interpolated, compat='override'))
|
|
188
|
+
|
|
189
|
+
# Concatenate along time dimension
|
|
190
|
+
merged = xr.concat(merged_by_date, dim="time", combine_attrs="override",
|
|
191
|
+
data_vars="minimal", coords="minimal", compat="override")
|
|
192
|
+
|
|
193
|
+
# Final cleanup
|
|
194
|
+
merged = merged.mean(dim='height_above_ground', skipna=True)
|
|
195
|
+
merged = merged.drop_vars(['reftime', 'time_bounds', 'GaussLatLon_Projection',
|
|
196
|
+
'LatLon_Projection', 'height_above_ground'], errors='ignore')
|
|
197
|
+
|
|
198
|
+
# Sort and remove duplicates
|
|
199
|
+
merged = merged.sortby('time')
|
|
200
|
+
_, unique_idx = np.unique(merged['time'], return_index=True)
|
|
201
|
+
merged = merged.isel(time=unique_idx)
|
|
202
|
+
|
|
203
|
+
# Fill gaps and standardise coordinates
|
|
204
|
+
merged = merged.bfill('time', limit=3).ffill('time', limit=3)
|
|
205
|
+
merged = merged.rename({'lon': 'longitude', 'lat': 'latitude'})
|
|
206
|
+
|
|
207
|
+
return merged, skipped_files
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This ERA5 ATMOS model has been branched from the ERA5 Wave Downloader
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from tfv_get_tools.providers.wave.era5 import DownloadERA5Wave, MergeERA5Wave
|
|
6
|
+
|
|
7
|
+
class DownloadERA5Atmos(DownloadERA5Wave):
|
|
8
|
+
def _init_specific(self):
|
|
9
|
+
self.source = 'ERA5'
|
|
10
|
+
self.mode = 'ATMOS'
|
|
11
|
+
self._load_config()
|
|
12
|
+
|
|
13
|
+
# User login check not yet performed
|
|
14
|
+
self._logged_in = False
|
|
15
|
+
|
|
16
|
+
class MergeERA5Atmos(MergeERA5Wave):
|
|
17
|
+
def _init_specific(self):
|
|
18
|
+
self.source = 'ERA5'
|
|
19
|
+
self.mode = 'ATMOS'
|
|
20
|
+
self._load_config()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This ERA5 ATMOS model has been branched from the ERA5 Wave Downloader
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from tfv_get_tools.providers.wave.era5 import DownloadERA5Wave, MergeERA5Wave
|
|
6
|
+
|
|
7
|
+
class DownloadERA5AtmosGCP(DownloadERA5Wave):
|
|
8
|
+
def _init_specific(self):
|
|
9
|
+
self.source = 'ERA5_GCP'
|
|
10
|
+
self.mode = 'ATMOS'
|
|
11
|
+
self._load_config()
|
|
12
|
+
|
|
13
|
+
# User login check not yet performed
|
|
14
|
+
self._logged_in = False
|
|
15
|
+
|
|
16
|
+
class MergeERA5Atmos(MergeERA5Wave):
|
|
17
|
+
def _init_specific(self):
|
|
18
|
+
self.source = 'ERA5'
|
|
19
|
+
self.mode = 'ATMOS'
|
|
20
|
+
self._load_config()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
_BASE_URL: "N/A"
|
|
2
|
+
|
|
3
|
+
_SOURCE_XLIMS: [27.37, 41.96]
|
|
4
|
+
_SOURCE_YLIMS: [40.86, 46.8]
|
|
5
|
+
_SOURCE_ZLIMS: [0, 2300] # Approx 2248m is max in BLKSEA Forecast.
|
|
6
|
+
_SOURCE_TIMELIMS: ["1993-01-01 00:00:00", null]
|
|
7
|
+
|
|
8
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
9
|
+
|
|
10
|
+
# Default copernicus ocean variables for global
|
|
11
|
+
_VARIABLES: ["zos", "uo", "vo", "so", "thetao"]
|
|
12
|
+
|
|
13
|
+
# Variable mapping for individual datasets
|
|
14
|
+
_DATASETS:
|
|
15
|
+
{
|
|
16
|
+
1993-01-01:
|
|
17
|
+
{
|
|
18
|
+
cmems_mod_blk_phy-cur_my_2.5km_P1D-m: ["uo", "vo"],
|
|
19
|
+
cmems_mod_blk_phy-sal_my_2.5km_P1D-m: ["so"],
|
|
20
|
+
cmems_mod_blk_phy-tem_my_2.5km_P1D-m: ["thetao"],
|
|
21
|
+
cmems_mod_blk_phy-ssh_my_2.5km_P1D-m: ["zos"],
|
|
22
|
+
},
|
|
23
|
+
forecast:
|
|
24
|
+
{
|
|
25
|
+
cmems_mod_blk_phy-cur_anfc_2.5km_P1D-m: ["uo", "vo"],
|
|
26
|
+
cmems_mod_blk_phy-sal_anfc_2.5km_P1D-m: ["so"],
|
|
27
|
+
cmems_mod_blk_phy-tem_anfc_2.5km_P1D-m: ["thetao"],
|
|
28
|
+
cmems_mod_blk_phy-ssh_anfc_2.5km_P1D-m: ["zos"],
|
|
29
|
+
},
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
33
|
+
# source var == var as in the original dataset
|
|
34
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
35
|
+
|
|
36
|
+
surf_el:
|
|
37
|
+
source_var: "zos"
|
|
38
|
+
tfv_var: "H"
|
|
39
|
+
bc_scale: 1
|
|
40
|
+
bc_offset: 0
|
|
41
|
+
|
|
42
|
+
water_u:
|
|
43
|
+
source_var: "uo"
|
|
44
|
+
tfv_var: "V_x"
|
|
45
|
+
bc_scale: 1
|
|
46
|
+
bc_offset: 0
|
|
47
|
+
|
|
48
|
+
water_v:
|
|
49
|
+
source_var: "vo"
|
|
50
|
+
tfv_var: "V_y"
|
|
51
|
+
bc_scale: 1
|
|
52
|
+
bc_offset: 0
|
|
53
|
+
|
|
54
|
+
salinity:
|
|
55
|
+
source_var: "so"
|
|
56
|
+
tfv_var: "SAL"
|
|
57
|
+
bc_scale: 1
|
|
58
|
+
bc_offset: 0
|
|
59
|
+
|
|
60
|
+
water_temp:
|
|
61
|
+
source_var: "thetao"
|
|
62
|
+
tfv_var: "TEMP"
|
|
63
|
+
bc_scale: 1
|
|
64
|
+
bc_offset: 0
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
_BASE_URL: "N/A"
|
|
2
|
+
|
|
3
|
+
_SOURCE_XLIMS: [-180, 360]
|
|
4
|
+
_SOURCE_YLIMS: [-90, 90]
|
|
5
|
+
_SOURCE_ZLIMS: [null, null]
|
|
6
|
+
_SOURCE_TIMELIMS: ["1993-01-01 00:00:00", null]
|
|
7
|
+
|
|
8
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
9
|
+
|
|
10
|
+
# Default copernicus ocean variables for global
|
|
11
|
+
_VARIABLES: ["zos", "uo", "vo", "so", "thetao"]
|
|
12
|
+
|
|
13
|
+
# Variable mapping for individual datasets
|
|
14
|
+
_DATASETS:
|
|
15
|
+
{
|
|
16
|
+
1993-01-01:
|
|
17
|
+
{
|
|
18
|
+
cmems_mod_glo_phy_my_0.083deg_P1D-m:
|
|
19
|
+
["zos", "uo", "vo", "so", "thetao"],
|
|
20
|
+
},
|
|
21
|
+
2021-07-01:
|
|
22
|
+
{
|
|
23
|
+
cmems_mod_glo_phy_myint_0.083deg_P1D-m:
|
|
24
|
+
["zos", "uo", "vo", "so", "thetao"],
|
|
25
|
+
},
|
|
26
|
+
forecast:
|
|
27
|
+
{
|
|
28
|
+
cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i: ["uo", "vo"],
|
|
29
|
+
cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i: ["so"],
|
|
30
|
+
cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i: ["thetao"],
|
|
31
|
+
cmems_mod_glo_phy_anfc_0.083deg_PT1H-m: ["zos"],
|
|
32
|
+
},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
36
|
+
# source var == var as in the original dataset
|
|
37
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
38
|
+
|
|
39
|
+
surf_el:
|
|
40
|
+
source_var: "zos"
|
|
41
|
+
tfv_var: "H"
|
|
42
|
+
bc_scale: 1
|
|
43
|
+
bc_offset: 0
|
|
44
|
+
|
|
45
|
+
water_u:
|
|
46
|
+
source_var: "uo"
|
|
47
|
+
tfv_var: "V_x"
|
|
48
|
+
bc_scale: 1
|
|
49
|
+
bc_offset: 0
|
|
50
|
+
|
|
51
|
+
water_v:
|
|
52
|
+
source_var: "vo"
|
|
53
|
+
tfv_var: "V_y"
|
|
54
|
+
bc_scale: 1
|
|
55
|
+
bc_offset: 0
|
|
56
|
+
|
|
57
|
+
salinity:
|
|
58
|
+
source_var: "so"
|
|
59
|
+
tfv_var: "SAL"
|
|
60
|
+
bc_scale: 1
|
|
61
|
+
bc_offset: 0
|
|
62
|
+
|
|
63
|
+
water_temp:
|
|
64
|
+
source_var: "thetao"
|
|
65
|
+
tfv_var: "TEMP"
|
|
66
|
+
bc_scale: 1
|
|
67
|
+
bc_offset: 0
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
_BASE_URL: "N/A"
|
|
2
|
+
|
|
3
|
+
_SOURCE_XLIMS: [-19.89, 13.0]
|
|
4
|
+
_SOURCE_YLIMS: [40.07, 65.0]
|
|
5
|
+
_SOURCE_ZLIMS: [0, 5000]
|
|
6
|
+
_SOURCE_TIMELIMS: ["1993-01-01 00:00:00", null]
|
|
7
|
+
|
|
8
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
9
|
+
|
|
10
|
+
# Default copernicus ocean variables for global
|
|
11
|
+
_VARIABLES: ["zos", "uo", "vo", "so", "thetao"]
|
|
12
|
+
|
|
13
|
+
# Variable mapping for individual datasets
|
|
14
|
+
_DATASETS:
|
|
15
|
+
{
|
|
16
|
+
1993-01-01:
|
|
17
|
+
{
|
|
18
|
+
cmems_mod_nws_phy-uv_my_7km-3D_P1D-m: ["uo", "vo"],
|
|
19
|
+
cmems_mod_nws_phy-s_my_7km-3D_P1D-m: ["so"],
|
|
20
|
+
cmems_mod_nws_phy-t_my_7km-3D_P1D-m: ["thetao"],
|
|
21
|
+
cmems_mod_nws_phy-ssh_my_7km-2D_PT1H-i: ["zos"],
|
|
22
|
+
},
|
|
23
|
+
forecast:
|
|
24
|
+
{
|
|
25
|
+
cmems_mod_nws_phy_anfc_0.027deg-2D_PT1H-m: ["zos"],
|
|
26
|
+
cmems_mod_nws_phy_anfc_0.027deg-3D_PT1H-m: ["uo", "vo", "so", "thetao"],
|
|
27
|
+
},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
31
|
+
# source var == var as in the original dataset
|
|
32
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
33
|
+
|
|
34
|
+
surf_el:
|
|
35
|
+
source_var: "zos"
|
|
36
|
+
tfv_var: "H"
|
|
37
|
+
bc_scale: 1
|
|
38
|
+
bc_offset: 0
|
|
39
|
+
|
|
40
|
+
water_u:
|
|
41
|
+
source_var: "uo"
|
|
42
|
+
tfv_var: "V_x"
|
|
43
|
+
bc_scale: 1
|
|
44
|
+
bc_offset: 0
|
|
45
|
+
|
|
46
|
+
water_v:
|
|
47
|
+
source_var: "vo"
|
|
48
|
+
tfv_var: "V_y"
|
|
49
|
+
bc_scale: 1
|
|
50
|
+
bc_offset: 0
|
|
51
|
+
|
|
52
|
+
salinity:
|
|
53
|
+
source_var: "so"
|
|
54
|
+
tfv_var: "SAL"
|
|
55
|
+
bc_scale: 1
|
|
56
|
+
bc_offset: 0
|
|
57
|
+
|
|
58
|
+
water_temp:
|
|
59
|
+
source_var: "thetao"
|
|
60
|
+
tfv_var: "TEMP"
|
|
61
|
+
bc_scale: 1
|
|
62
|
+
bc_offset: 0
|