tfv-get-tools 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tfv_get_tools/__init__.py +4 -0
- tfv_get_tools/_standard_attrs.py +107 -0
- tfv_get_tools/atmos.py +167 -0
- tfv_get_tools/cli/_cli_base.py +173 -0
- tfv_get_tools/cli/atmos_cli.py +192 -0
- tfv_get_tools/cli/ocean_cli.py +204 -0
- tfv_get_tools/cli/tide_cli.py +118 -0
- tfv_get_tools/cli/wave_cli.py +183 -0
- tfv_get_tools/fvc/__init__.py +3 -0
- tfv_get_tools/fvc/_atmos.py +230 -0
- tfv_get_tools/fvc/_fvc.py +218 -0
- tfv_get_tools/fvc/_ocean.py +171 -0
- tfv_get_tools/fvc/_tide.py +195 -0
- tfv_get_tools/ocean.py +170 -0
- tfv_get_tools/providers/__init__.py +0 -0
- tfv_get_tools/providers/_custom_conversions.py +34 -0
- tfv_get_tools/providers/_downloader.py +566 -0
- tfv_get_tools/providers/_merger.py +520 -0
- tfv_get_tools/providers/_utilities.py +255 -0
- tfv_get_tools/providers/atmos/barra2.py +209 -0
- tfv_get_tools/providers/atmos/cfgs/barra2_c2.yaml +52 -0
- tfv_get_tools/providers/atmos/cfgs/barra2_r2.yaml +85 -0
- tfv_get_tools/providers/atmos/cfgs/barra2_re2.yaml +70 -0
- tfv_get_tools/providers/atmos/cfgs/cfsr.yaml +68 -0
- tfv_get_tools/providers/atmos/cfgs/era5.yaml +77 -0
- tfv_get_tools/providers/atmos/cfgs/era5_gcp.yaml +77 -0
- tfv_get_tools/providers/atmos/cfsr.py +207 -0
- tfv_get_tools/providers/atmos/era5.py +20 -0
- tfv_get_tools/providers/atmos/era5_gcp.py +20 -0
- tfv_get_tools/providers/ocean/cfgs/copernicus_blk.yaml +64 -0
- tfv_get_tools/providers/ocean/cfgs/copernicus_glo.yaml +67 -0
- tfv_get_tools/providers/ocean/cfgs/copernicus_nws.yaml +62 -0
- tfv_get_tools/providers/ocean/cfgs/hycom.yaml +73 -0
- tfv_get_tools/providers/ocean/copernicus_ocean.py +457 -0
- tfv_get_tools/providers/ocean/hycom.py +611 -0
- tfv_get_tools/providers/wave/cawcr.py +166 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_aus_10m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_aus_4m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_glob_24m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_pac_10m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/cawcr_pac_4m.yaml +39 -0
- tfv_get_tools/providers/wave/cfgs/copernicus_glo.yaml +56 -0
- tfv_get_tools/providers/wave/cfgs/copernicus_nws.yaml +51 -0
- tfv_get_tools/providers/wave/cfgs/era5.yaml +48 -0
- tfv_get_tools/providers/wave/cfgs/era5_gcp.yaml +48 -0
- tfv_get_tools/providers/wave/copernicus_wave.py +38 -0
- tfv_get_tools/providers/wave/era5.py +232 -0
- tfv_get_tools/providers/wave/era5_gcp.py +169 -0
- tfv_get_tools/tide/__init__.py +2 -0
- tfv_get_tools/tide/_nodestring.py +214 -0
- tfv_get_tools/tide/_tidal_base.py +568 -0
- tfv_get_tools/utilities/_tfv_bc.py +78 -0
- tfv_get_tools/utilities/horizontal_padding.py +89 -0
- tfv_get_tools/utilities/land_masking.py +93 -0
- tfv_get_tools/utilities/parsers.py +44 -0
- tfv_get_tools/utilities/warnings.py +38 -0
- tfv_get_tools/wave.py +179 -0
- tfv_get_tools-0.2.0.dist-info/METADATA +286 -0
- tfv_get_tools-0.2.0.dist-info/RECORD +62 -0
- tfv_get_tools-0.2.0.dist-info/WHEEL +5 -0
- tfv_get_tools-0.2.0.dist-info/entry_points.txt +5 -0
- tfv_get_tools-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import xarray as xr
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
root = Path(__file__).parent
|
|
9
|
+
|
|
10
|
+
def _get_config(mode:str, source: str, model=None):
|
|
11
|
+
"""Load a data source config file
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
mode (str): Ocean data type {'ocean', 'wave', 'atmos'}
|
|
15
|
+
source (str): Source e.g {'hycom', 'copernicus'}
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
dict: cfg dictionary
|
|
19
|
+
str: Base data source URL (e.g., for THREDDS)
|
|
20
|
+
"""
|
|
21
|
+
# Replace the 'default' tag with a None for this check
|
|
22
|
+
model = None if model == 'default' else model
|
|
23
|
+
|
|
24
|
+
# If there is a model specified, append to filename
|
|
25
|
+
if model is None:
|
|
26
|
+
cfgname = f'{source}.yaml'
|
|
27
|
+
else:
|
|
28
|
+
cfgname = f'{source}_{model}.yaml'
|
|
29
|
+
|
|
30
|
+
path = root / f"{mode}/cfgs/{cfgname}".lower()
|
|
31
|
+
|
|
32
|
+
if not path.exists():
|
|
33
|
+
raise ValueError(f'Config file mode {mode} and source/model {cfgname} does not exist! Please review your source / model request')
|
|
34
|
+
|
|
35
|
+
with open(path) as f:
|
|
36
|
+
cfg = yaml.safe_load(f)
|
|
37
|
+
BASE_URL = cfg.pop("_BASE_URL", None)
|
|
38
|
+
assert BASE_URL, "Please check config file, missing BASE_URL"
|
|
39
|
+
|
|
40
|
+
return cfg, BASE_URL
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def todstr(datetime):
|
|
44
|
+
"""Return a YYYYmmdd formatted datestr
|
|
45
|
+
For use in all downloaders and mergers
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
datetime (pd.Timestamp): Timestamp for conversion
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
str: YYYYmmdd format string
|
|
52
|
+
"""
|
|
53
|
+
return datetime.strftime("%Y%m%d")
|
|
54
|
+
|
|
55
|
+
def _check_time_interval(x):
|
|
56
|
+
assert int(x) == float(x), "Timestep unit should be an integer representing hours"
|
|
57
|
+
x = int(x)
|
|
58
|
+
assert (
|
|
59
|
+
x >= 3
|
|
60
|
+
), "The highest resolution timestep available is 3-hourly output - please check"
|
|
61
|
+
assert (
|
|
62
|
+
x % 3 == 0
|
|
63
|
+
), "Timestep should be a multiple of 3 (highest available timestep resolution is 3hrly) - please check"
|
|
64
|
+
return x
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _conv_date(date):
|
|
68
|
+
# Convert date-types, if necessary
|
|
69
|
+
if isinstance(date, str):
|
|
70
|
+
if len(date) == 10:
|
|
71
|
+
fmt = "%Y-%m-%d"
|
|
72
|
+
elif len(date) == 13:
|
|
73
|
+
fmt = "%Y-%m-%d %H"
|
|
74
|
+
elif len(date) == 16:
|
|
75
|
+
fmt = "%Y-%m-%d %H:%M"
|
|
76
|
+
elif len(date) == 19:
|
|
77
|
+
fmt = "%Y-%m-%d %H:%M:%S"
|
|
78
|
+
elif len(date) == 8:
|
|
79
|
+
fmt = "%Y%m%d"
|
|
80
|
+
elif len(date) == 15:
|
|
81
|
+
fmt = "%Y%m%d.%H%M%S"
|
|
82
|
+
date = datetime.strptime(date, fmt)
|
|
83
|
+
elif isinstance(date, pd.Timestamp):
|
|
84
|
+
date = date.to_pydatetime()
|
|
85
|
+
return date
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_request(
|
|
89
|
+
timelims, xlims, ylims, src_timelims, src_xlims, src_ylims, source_name
|
|
90
|
+
):
|
|
91
|
+
"""Validate the time range of the source dataset.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
timelims (tuple): start and end time of data request.
|
|
95
|
+
xlims (tuple): x limits of the data request
|
|
96
|
+
ylims (tuple): y limits of the data request
|
|
97
|
+
src_timelims (tuple): tuple of start and end timeframes for the dataset.
|
|
98
|
+
src_xlims (tuple): tuple of start and end longitudes for the dataset.
|
|
99
|
+
src_ylims (tuple): tuple of start and end latitudes for the dataset.
|
|
100
|
+
source_name (_type_): data source name
|
|
101
|
+
"""
|
|
102
|
+
# Capitalise src name for consistency
|
|
103
|
+
source_name = source_name.upper()
|
|
104
|
+
|
|
105
|
+
# Validate time limits
|
|
106
|
+
src_start, src_end = src_timelims
|
|
107
|
+
|
|
108
|
+
time_start, time_end = timelims
|
|
109
|
+
if src_start is not None:
|
|
110
|
+
assert time_start >= pd.Timestamp(
|
|
111
|
+
src_start
|
|
112
|
+
), f"Start time is outside of {source_name} data temporal extents ({src_start} to {src_end})"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
if src_end is not None:
|
|
116
|
+
assert time_end <= pd.Timestamp(
|
|
117
|
+
src_end
|
|
118
|
+
), f"End time is outside of {source_name} data temporal extents ({src_start} to {src_end})"
|
|
119
|
+
else:
|
|
120
|
+
assert time_end >= pd.Timestamp(
|
|
121
|
+
src_start
|
|
122
|
+
), f"End time is outside of {source_name} data temporal extents ({src_start} to {src_end})"
|
|
123
|
+
|
|
124
|
+
# Validate x limits
|
|
125
|
+
x_start, x_end = xlims
|
|
126
|
+
src_x_start, src_x_end = src_xlims
|
|
127
|
+
|
|
128
|
+
if src_x_start is not None:
|
|
129
|
+
assert (
|
|
130
|
+
x_start >= src_x_start
|
|
131
|
+
), f"x start is outside of {source_name} data spatial extents ({src_x_start} to {src_x_end})"
|
|
132
|
+
|
|
133
|
+
if src_x_end is not None:
|
|
134
|
+
assert (
|
|
135
|
+
x_end <= src_x_end
|
|
136
|
+
), f"x end is outside of {source_name} data spatial extents ({src_x_start} to {src_x_end})"
|
|
137
|
+
|
|
138
|
+
# Validate y limits
|
|
139
|
+
y_start, y_end = ylims
|
|
140
|
+
src_y_start, src_y_end = src_ylims
|
|
141
|
+
|
|
142
|
+
if src_y_start is not None:
|
|
143
|
+
assert (
|
|
144
|
+
y_start >= src_y_start
|
|
145
|
+
), f"y start is outside of {source_name} data spatial extents ({src_y_start} to {src_y_end})"
|
|
146
|
+
|
|
147
|
+
if src_y_end is not None:
|
|
148
|
+
assert (
|
|
149
|
+
y_end <= src_y_end
|
|
150
|
+
), f"y end is outside of {source_name} data spatial extents ({src_y_start} to {src_y_end})"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _open_netcdf_file(file: Path) -> xr.Dataset:
|
|
154
|
+
"""Open a subset netcdf file and assert validity
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
file (Path): path to the netcdf file
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
xr.Dataset: Subset netcdf dataset
|
|
161
|
+
"""
|
|
162
|
+
try:
|
|
163
|
+
# Attempt to open the file
|
|
164
|
+
ds = xr.open_dataset(file)
|
|
165
|
+
|
|
166
|
+
# Check if 'time' is properly formatted
|
|
167
|
+
if pd.api.types.is_datetime64_any_dtype(ds["time"]):
|
|
168
|
+
return ds
|
|
169
|
+
else:
|
|
170
|
+
print(f"Skipping file {file.name} - time error")
|
|
171
|
+
return None
|
|
172
|
+
except Exception as e:
|
|
173
|
+
print(f"Skipping file {file.name}: {str(e)}")
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def wrap_longitude(ds, wrapto360=False, xname="longitude"):
|
|
178
|
+
"""Function to wrap a dataset longitude around 360 or 180.
|
|
179
|
+
Defaults to -180 to 180.
|
|
180
|
+
|
|
181
|
+
:param ds: dataset to be wrapped
|
|
182
|
+
:param wrapto360: boolean to wrap to 360 (Defaults to False).
|
|
183
|
+
:param xname: X-var name (Defaults to 'longitude')
|
|
184
|
+
:return: ds (xr.Dataset): The wrapped dataset
|
|
185
|
+
"""
|
|
186
|
+
attrs = ds[xname].attrs
|
|
187
|
+
if wrapto360 is True:
|
|
188
|
+
x = ds[xname].values
|
|
189
|
+
x[x < 0] = x[x < 0] + 360
|
|
190
|
+
else:
|
|
191
|
+
x = ds[xname].values
|
|
192
|
+
x[x > 180] = x[x > 180] - 360
|
|
193
|
+
|
|
194
|
+
ds = ds.assign_coords({xname: x})
|
|
195
|
+
ds = ds.sortby(xname)
|
|
196
|
+
|
|
197
|
+
# Ensure attributes get copied in correctly
|
|
198
|
+
ds[xname].attrs = attrs
|
|
199
|
+
|
|
200
|
+
return ds
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def check_path(path):
|
|
204
|
+
# Convert str path to Pathlib; assert existence.
|
|
205
|
+
if isinstance(path, str):
|
|
206
|
+
path = Path(path)
|
|
207
|
+
|
|
208
|
+
if path.is_dir():
|
|
209
|
+
return path
|
|
210
|
+
else:
|
|
211
|
+
assert False, f"{path.as_posix()} is not a valid path - check that it exists"
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def query_yes_no(question, default="yes"):
|
|
215
|
+
"""Ask a yes/no question via raw_input() and return their answer.
|
|
216
|
+
|
|
217
|
+
"question" is a string that is presented to the user.
|
|
218
|
+
"default" is the presumed answer if the user just hits <Enter>.
|
|
219
|
+
It must be "yes" (the default), "no" or None (meaning
|
|
220
|
+
an answer is required of the user).
|
|
221
|
+
|
|
222
|
+
The "answer" return value is True for "yes" or False for "no".
|
|
223
|
+
"""
|
|
224
|
+
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
|
|
225
|
+
if default is None:
|
|
226
|
+
prompt = " [y/n] "
|
|
227
|
+
elif default == "yes":
|
|
228
|
+
prompt = " [Y/n] "
|
|
229
|
+
elif default == "no":
|
|
230
|
+
prompt = " [y/N] "
|
|
231
|
+
else:
|
|
232
|
+
raise ValueError("invalid default answer: '%s'" % default)
|
|
233
|
+
|
|
234
|
+
while True:
|
|
235
|
+
sys.stdout.write(question + prompt)
|
|
236
|
+
choice = input().lower()
|
|
237
|
+
if default is not None and choice == "":
|
|
238
|
+
return valid[default]
|
|
239
|
+
elif choice in valid:
|
|
240
|
+
return valid[choice]
|
|
241
|
+
else:
|
|
242
|
+
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def is_notebook() -> bool:
|
|
246
|
+
try:
|
|
247
|
+
shell = get_ipython().__class__.__name__
|
|
248
|
+
if shell == "ZMQInteractiveShell":
|
|
249
|
+
return True # Jupyter notebook or qtconsole
|
|
250
|
+
elif shell == "TerminalInteractiveShell":
|
|
251
|
+
return False # Terminal running IPython
|
|
252
|
+
else:
|
|
253
|
+
return False # Other type (?)
|
|
254
|
+
except NameError:
|
|
255
|
+
return False # Probably standard Python interpreter
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import xarray as xr
|
|
6
|
+
from pandas.tseries.offsets import MonthEnd
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from tfv_get_tools.providers._downloader import BaseDownloader
|
|
10
|
+
from tfv_get_tools.providers._merger import BaseMerger
|
|
11
|
+
from tfv_get_tools.providers._utilities import todstr
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DownloadBARRA2(BaseDownloader):
|
|
15
|
+
"""BARRA2 Downloader
|
|
16
|
+
|
|
17
|
+
This is a THREDDS Dataset with each variable saved under a sub-url.
|
|
18
|
+
Compared to BARRA1, the new BARRA2 appears to be more self-consistent so we'll use a
|
|
19
|
+
standard THREDDS server downloader (like CAWCR)
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
def _init_specific(self):
|
|
23
|
+
if self.model == 'default':
|
|
24
|
+
self.log("Default model has been selected == 'R2'")
|
|
25
|
+
self.model = 'r2'
|
|
26
|
+
|
|
27
|
+
self.source = "BARRA2"
|
|
28
|
+
self.mode = "ATMOS"
|
|
29
|
+
|
|
30
|
+
MODELS = {"c2", "r2", "re2"}
|
|
31
|
+
if self.model.lower() not in MODELS:
|
|
32
|
+
raise ValueError(f"Model must be one of {MODELS}")
|
|
33
|
+
|
|
34
|
+
self._load_config()
|
|
35
|
+
|
|
36
|
+
def _get_output_filename(self, ts: pd.Timestamp, te: pd.Timestamp, var: str) -> Path:
|
|
37
|
+
"""BARAR2 filename pattern"""
|
|
38
|
+
fname = f"{self.prefix}_{self.model}_{var}_{todstr(ts)}_{todstr(te)}.nc"
|
|
39
|
+
return self.outdir / fname
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def download(self):
|
|
43
|
+
"""Begin download of files.
|
|
44
|
+
|
|
45
|
+
Approach:
|
|
46
|
+
- Loop through each time in times vector
|
|
47
|
+
- Try again if failures.
|
|
48
|
+
"""
|
|
49
|
+
for ts in self.times:
|
|
50
|
+
te = ts + MonthEnd() + pd.Timedelta("23.9h")
|
|
51
|
+
|
|
52
|
+
for var in self.variables:
|
|
53
|
+
out_file = self._get_output_filename(ts, te, var)
|
|
54
|
+
url = self._construct_opendap_url(ts, var)
|
|
55
|
+
|
|
56
|
+
# Let base class handle the file existence check, timing, etc.
|
|
57
|
+
yield {
|
|
58
|
+
'file_path': out_file,
|
|
59
|
+
'url': url,
|
|
60
|
+
'timestamp': ts,
|
|
61
|
+
'variable': var,
|
|
62
|
+
'download_func': lambda f=out_file, u=url: self._download_single_file(f, u)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def _construct_opendap_url(self, date: pd.Timestamp, var: str) -> str:
|
|
66
|
+
"""
|
|
67
|
+
Construct the OPeNDAP URL for the given date for BARRA2
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
date (pd.Timestamp): The date for which to construct the URL.
|
|
71
|
+
var (str): Variable name to download (BARRA2 var)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
str: The constructed URL.
|
|
75
|
+
"""
|
|
76
|
+
date_str = date.strftime("%Y%m")
|
|
77
|
+
|
|
78
|
+
# Crap way of getting the dset template name from nexted dict.
|
|
79
|
+
name_tmp = list(self.dsmap[list(self.dsmap.keys())[0]].keys())[0]
|
|
80
|
+
|
|
81
|
+
# Apply replacements
|
|
82
|
+
name = name_tmp.replace('<VAR>', var).replace('<DATE>', date_str)
|
|
83
|
+
|
|
84
|
+
url = f"{self.base_url}/{var}/latest/{name}"
|
|
85
|
+
|
|
86
|
+
return url
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _download_single_file(self, fname: Path, url: str) -> bool:
|
|
90
|
+
"""
|
|
91
|
+
Download a single file from the specified URL and save it to the specified filename.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
fname (Path): The output filename.
|
|
95
|
+
url (str): The URL to download the data from.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
bool: True if the download was successful, False otherwise.
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
ds = xr.open_dataset(url)
|
|
102
|
+
|
|
103
|
+
# Handle longitude selection (CFSR-specific logic)
|
|
104
|
+
if self.xlims[0] < self.xlims[1]:
|
|
105
|
+
lon_idx = np.hstack(
|
|
106
|
+
np.where(
|
|
107
|
+
(self.xlims[0] <= ds["lon"])
|
|
108
|
+
& (ds["lon"] <= self.xlims[1])
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
lon_idx = np.hstack(
|
|
113
|
+
(
|
|
114
|
+
np.where(ds["lon"] <= self.xlims[1])[0],
|
|
115
|
+
np.where(ds["lon"] >= self.xlims[0])[0],
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
assert lon_idx.size > 1, "No longitude points selected!"
|
|
120
|
+
|
|
121
|
+
# Select latitude indices
|
|
122
|
+
lat_idx = np.hstack(
|
|
123
|
+
np.where(
|
|
124
|
+
(self.ylims[0] <= ds["lat"])
|
|
125
|
+
& (ds["lat"] <= self.ylims[1])
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
assert lat_idx.size > 1, "No latitude points selected!"
|
|
129
|
+
|
|
130
|
+
# Subset and save
|
|
131
|
+
ds = ds.isel(lon=lon_idx, lat=lat_idx)
|
|
132
|
+
ds.to_netcdf(fname)
|
|
133
|
+
return True
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
if self.verbose:
|
|
137
|
+
print(f"Failed to download {url}: {e}")
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
class MergeBARRA2(BaseMerger):
|
|
141
|
+
def _init_specific(self):
|
|
142
|
+
self.source = "BARRA2"
|
|
143
|
+
self.mode = "ATMOS"
|
|
144
|
+
|
|
145
|
+
if self.model == 'default':
|
|
146
|
+
self.model = 'R2'
|
|
147
|
+
|
|
148
|
+
self._load_config()
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def merge_files(self, file_list):
|
|
152
|
+
"""Specific merging logic
|
|
153
|
+
|
|
154
|
+
# BARRA2 requires us to first merge on varabies before concatenating over time.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
file_list (list): list of path objects to open and concat.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
xr.Dataset: merged xarray dataset
|
|
161
|
+
list: files unable to be merged
|
|
162
|
+
"""
|
|
163
|
+
skipped_list = []
|
|
164
|
+
|
|
165
|
+
startdates = [x.stem.split('_')[-2] for x in file_list]
|
|
166
|
+
unq_startdates = np.unique(startdates)
|
|
167
|
+
|
|
168
|
+
dsset = {k: [] for k in unq_startdates}
|
|
169
|
+
|
|
170
|
+
for i, f in enumerate(tqdm(file_list)):
|
|
171
|
+
dsx = self._open_subset_netcdf(f, chunks=dict(time=24))
|
|
172
|
+
if dsx is not None:
|
|
173
|
+
dsx['time'] = pd.to_datetime(dsx['time'].values).floor('1h')
|
|
174
|
+
|
|
175
|
+
# Drop all the extra variables
|
|
176
|
+
dsx = dsx.drop_vars(['height', 'level_height', 'model_level_number', 'sigma', 'crs'], errors='ignore')
|
|
177
|
+
|
|
178
|
+
dsset[startdates[i]].append(dsx)
|
|
179
|
+
else:
|
|
180
|
+
skipped_list.append(f)
|
|
181
|
+
|
|
182
|
+
print("Concatenating xarray dataset")
|
|
183
|
+
|
|
184
|
+
# Merge the common start_dates first, then concatenate by time afterwards
|
|
185
|
+
dssetm = []
|
|
186
|
+
for v in dsset.values():
|
|
187
|
+
dssetm.append(xr.merge(v))
|
|
188
|
+
|
|
189
|
+
ds = xr.concat(
|
|
190
|
+
dssetm,
|
|
191
|
+
dim="time",
|
|
192
|
+
combine_attrs="override",
|
|
193
|
+
data_vars="minimal",
|
|
194
|
+
coords="minimal",
|
|
195
|
+
compat="override",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Drop the redundant time_bnds var
|
|
199
|
+
ds = ds.drop_vars(['time_bnds', 'bnds'], errors='ignore')
|
|
200
|
+
|
|
201
|
+
# Sort and drop nans
|
|
202
|
+
ds = ds.sortby('time')
|
|
203
|
+
_, idx = np.unique(ds['time'], return_index=True)
|
|
204
|
+
ds = ds.isel(time=idx)
|
|
205
|
+
|
|
206
|
+
# Rename the original coords
|
|
207
|
+
ds = ds.rename({'lon': 'longitude', 'lat': 'latitude'})
|
|
208
|
+
|
|
209
|
+
return ds, skipped_list
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
_BASE_URL: "https://thredds.nci.org.au/thredds/dodsC/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr"
|
|
2
|
+
_INFO_URL: http://www.bom.gov.au/research/publications/researchreports/BRR-067.pdf
|
|
3
|
+
|
|
4
|
+
# BARRA2 is a THREDDS server dataset. Each variable is stored in suburls from the main dataset url above.
|
|
5
|
+
# The data covers 1979 to near present (~5 month lag?)
|
|
6
|
+
# No short or long wave radiation components are provided - leaving it as a wind and pressure dataset for now.
|
|
7
|
+
|
|
8
|
+
# Dataset FNAME is read from the _DATASETS param below. <VAR> and <DATE> should be replaced:
|
|
9
|
+
# - VAR = variable name, e.g., uasmean, vasmean, psl
|
|
10
|
+
# - DATE = Date string in %Y%m format. It should be the same (e.g., 197901-197901)
|
|
11
|
+
|
|
12
|
+
_SOURCE_XLIMS: [108.02, 159.9]
|
|
13
|
+
_SOURCE_YLIMS: [-45.69, -5.01]
|
|
14
|
+
_SOURCE_TIMELIMS: ["1979-01-01 00:00:00", null]
|
|
15
|
+
|
|
16
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
17
|
+
_VARIABLES: ["uasmean", "vasmean", "psl", "pr"] # u10, v10, pressure. {temp: 'tasmean', relhum: 'hurs'}
|
|
18
|
+
_DATASETS:
|
|
19
|
+
{
|
|
20
|
+
1979-01-01:
|
|
21
|
+
{
|
|
22
|
+
<VAR>_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_<DATE>-<DATE>.nc: default,
|
|
23
|
+
},
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
27
|
+
# source var == var as in the original dataset
|
|
28
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
29
|
+
|
|
30
|
+
u10:
|
|
31
|
+
source_var: "uasmean"
|
|
32
|
+
tfv_var: "W10_GRID"
|
|
33
|
+
bc_scale: 1
|
|
34
|
+
bc_offset: 0.
|
|
35
|
+
|
|
36
|
+
v10:
|
|
37
|
+
source_var: "vasmean"
|
|
38
|
+
tfv_var: "W10_GRID"
|
|
39
|
+
bc_scale: 1
|
|
40
|
+
bc_offset: 0.
|
|
41
|
+
|
|
42
|
+
mslp:
|
|
43
|
+
source_var: "psl"
|
|
44
|
+
tfv_var: "MSLP_GRID"
|
|
45
|
+
bc_scale: 0.01
|
|
46
|
+
bc_offset: 0.
|
|
47
|
+
|
|
48
|
+
prate:
|
|
49
|
+
source_var: "pr"
|
|
50
|
+
tfv_var: "PRECIP_GRID"
|
|
51
|
+
bc_scale: 86.4 # Convert from kg/m2/s to mm/day
|
|
52
|
+
bc_offset: 0.
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
_BASE_URL: "https://thredds.nci.org.au/thredds/dodsC/ob53/output/reanalysis/AUS-11/BOM/ERA5/historical/hres/BARRA-R2/v1/1hr"
|
|
2
|
+
_INFO_URL: http://www.bom.gov.au/research/publications/researchreports/BRR-067.pdf
|
|
3
|
+
|
|
4
|
+
# BARRA2 is a THREDDS server dataset. Each variable is stored in suburls from the main dataset url above.
|
|
5
|
+
# The data covers 1979 to near present (~5 month lag?)
|
|
6
|
+
# No short or long wave radiation components are provided - leaving it as a wind and pressure dataset for now.
|
|
7
|
+
|
|
8
|
+
# Dataset FNAME is read from the _DATASETS param below. <VAR> and <DATE> should be replaced:
|
|
9
|
+
# - VAR = variable name, e.g., uasmean, vasmean, psl
|
|
10
|
+
# - DATE = Date string in %Y%m format. It should be the same (e.g., 197901-197901)
|
|
11
|
+
|
|
12
|
+
_SOURCE_XLIMS: [88.48, 207.39]
|
|
13
|
+
_SOURCE_YLIMS: [-57.97, 12.98]
|
|
14
|
+
_SOURCE_TIMELIMS: ["1979-01-01 00:00:00", null]
|
|
15
|
+
|
|
16
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
17
|
+
_VARIABLES: [
|
|
18
|
+
"uasmean",
|
|
19
|
+
"vasmean",
|
|
20
|
+
"psl",
|
|
21
|
+
"tasmean",
|
|
22
|
+
"hurs",
|
|
23
|
+
"rsds",
|
|
24
|
+
"rlds",
|
|
25
|
+
"pr",
|
|
26
|
+
] # u10, v10, mslp, temp, relhum, swr, lwr
|
|
27
|
+
_DATASETS:
|
|
28
|
+
{
|
|
29
|
+
1979-01-01:
|
|
30
|
+
{
|
|
31
|
+
<VAR>_AUS-11_ERA5_historical_hres_BOM_BARRA-R2_v1_1hr_<DATE>-<DATE>.nc: default,
|
|
32
|
+
},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
36
|
+
# source var == var as in the original dataset
|
|
37
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
38
|
+
|
|
39
|
+
u10:
|
|
40
|
+
source_var: "uasmean"
|
|
41
|
+
tfv_var: "W10_GRID"
|
|
42
|
+
bc_scale: 1
|
|
43
|
+
bc_offset: 0
|
|
44
|
+
|
|
45
|
+
v10:
|
|
46
|
+
source_var: "vasmean"
|
|
47
|
+
tfv_var: "W10_GRID"
|
|
48
|
+
bc_scale: 1
|
|
49
|
+
bc_offset: 0
|
|
50
|
+
|
|
51
|
+
mslp:
|
|
52
|
+
source_var: "psl"
|
|
53
|
+
tfv_var: "MSLP_GRID"
|
|
54
|
+
bc_scale: 0.01
|
|
55
|
+
bc_offset: 0
|
|
56
|
+
|
|
57
|
+
dlwrf:
|
|
58
|
+
source_var: "rlds"
|
|
59
|
+
tfv_var: "LW_RAD_GRID"
|
|
60
|
+
bc_scale: 1
|
|
61
|
+
bc_offset: 0
|
|
62
|
+
|
|
63
|
+
dswrf:
|
|
64
|
+
source_var: "rsds"
|
|
65
|
+
tfv_var: "SW_RAD_GRID"
|
|
66
|
+
bc_scale: 1
|
|
67
|
+
bc_offset: 0
|
|
68
|
+
|
|
69
|
+
t2m:
|
|
70
|
+
source_var: "tasmean"
|
|
71
|
+
tfv_var: "AIR_TEMP_GRID"
|
|
72
|
+
bc_scale: 1
|
|
73
|
+
bc_offset: -273.15
|
|
74
|
+
|
|
75
|
+
relhum:
|
|
76
|
+
source_var: "hurs"
|
|
77
|
+
tfv_var: "REL_HUM_GRID"
|
|
78
|
+
bc_scale: 1
|
|
79
|
+
bc_offset: 0
|
|
80
|
+
|
|
81
|
+
prate:
|
|
82
|
+
source_var: "pr"
|
|
83
|
+
tfv_var: "PRECIP_GRID"
|
|
84
|
+
bc_scale: 86.4 # Convert from kg/m2/s to mm/day
|
|
85
|
+
bc_offset: 0.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
_BASE_URL: "https://thredds.nci.org.au/thredds/dodsC/ob53/output/reanalysis/AUS-22/BOM/ERA5/historical/eda/BARRA-RE2/v1/1hr"
|
|
2
|
+
_INFO_URL: http://www.bom.gov.au/research/publications/researchreports/BRR-067.pdf
|
|
3
|
+
|
|
4
|
+
# BARRA2 is a THREDDS server dataset. Each variable is stored in suburls from the main dataset url above.
|
|
5
|
+
# The data covers 1979 to near present (~5 month lag?)
|
|
6
|
+
# No short or long wave radiation components are provided - leaving it as a wind and pressure dataset for now.
|
|
7
|
+
|
|
8
|
+
# Dataset FNAME is read from the _DATASETS param below. <VAR> and <DATE> should be replaced:
|
|
9
|
+
# - VAR = variable name, e.g., uasmean, vasmean, psl
|
|
10
|
+
# - DATE = Date string in %Y%m format. It should be the same (e.g., 197901-197901)
|
|
11
|
+
|
|
12
|
+
_SOURCE_XLIMS: [89.53, 206.13]
|
|
13
|
+
_SOURCE_YLIMS: [-56.49, 11.71]
|
|
14
|
+
_SOURCE_TIMELIMS: ["1979-01-01 00:00:00", null]
|
|
15
|
+
|
|
16
|
+
_DOWNLOAD_INTERVAL: monthly
|
|
17
|
+
_VARIABLES: ["uasmean", "vasmean", "psl", "tasmean", "hurs", "rsds", "rlds"] # u10, v10, mslp, temp, relhum, swr, lwr
|
|
18
|
+
_DATASETS:
|
|
19
|
+
{
|
|
20
|
+
1979-01-01:
|
|
21
|
+
{
|
|
22
|
+
<VAR>_AUS-22_ERA5_historical_eda_BOM_BARRA-RE2_v1_1hr_<DATE>-<DATE>.nc: default,
|
|
23
|
+
},
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# KEY: The standard NC Variable for merged data (and per _standard_attrs.py)
|
|
27
|
+
# source var == var as in the original dataset
|
|
28
|
+
# tfv_var == standard tuflow fv var name (for FVC writer template)
|
|
29
|
+
|
|
30
|
+
u10:
|
|
31
|
+
source_var: "uasmean"
|
|
32
|
+
tfv_var: "W10_GRID"
|
|
33
|
+
bc_scale: 1
|
|
34
|
+
bc_offset: 0
|
|
35
|
+
|
|
36
|
+
v10:
|
|
37
|
+
source_var: "vasmean"
|
|
38
|
+
tfv_var: "W10_GRID"
|
|
39
|
+
bc_scale: 1
|
|
40
|
+
bc_offset: 0
|
|
41
|
+
|
|
42
|
+
mslp:
|
|
43
|
+
source_var: "psl"
|
|
44
|
+
tfv_var: "MSLP_GRID"
|
|
45
|
+
bc_scale: 0.01
|
|
46
|
+
bc_offset: 0
|
|
47
|
+
|
|
48
|
+
dlwrf:
|
|
49
|
+
source_var: "rlds"
|
|
50
|
+
tfv_var: "LW_RAD_GRID"
|
|
51
|
+
bc_scale: 1
|
|
52
|
+
bc_offset: 0
|
|
53
|
+
|
|
54
|
+
dswrf:
|
|
55
|
+
source_var: "rsds"
|
|
56
|
+
tfv_var: "SW_RAD_GRID"
|
|
57
|
+
bc_scale: 1
|
|
58
|
+
bc_offset: 0
|
|
59
|
+
|
|
60
|
+
t2m:
|
|
61
|
+
source_var: "tasmean"
|
|
62
|
+
tfv_var: "AIR_TEMP_GRID"
|
|
63
|
+
bc_scale: 1
|
|
64
|
+
bc_offset: -273.15
|
|
65
|
+
|
|
66
|
+
relhum:
|
|
67
|
+
source_var: "hurs"
|
|
68
|
+
tfv_var: "REL_HUM_GRID"
|
|
69
|
+
bc_scale: 1
|
|
70
|
+
bc_offset: 0
|