tfv-get-tools 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. tfv_get_tools/__init__.py +4 -0
  2. tfv_get_tools/_standard_attrs.py +107 -0
  3. tfv_get_tools/atmos.py +167 -0
  4. tfv_get_tools/cli/_cli_base.py +173 -0
  5. tfv_get_tools/cli/atmos_cli.py +192 -0
  6. tfv_get_tools/cli/ocean_cli.py +204 -0
  7. tfv_get_tools/cli/tide_cli.py +118 -0
  8. tfv_get_tools/cli/wave_cli.py +183 -0
  9. tfv_get_tools/fvc/__init__.py +3 -0
  10. tfv_get_tools/fvc/_atmos.py +230 -0
  11. tfv_get_tools/fvc/_fvc.py +218 -0
  12. tfv_get_tools/fvc/_ocean.py +171 -0
  13. tfv_get_tools/fvc/_tide.py +195 -0
  14. tfv_get_tools/ocean.py +170 -0
  15. tfv_get_tools/providers/__init__.py +0 -0
  16. tfv_get_tools/providers/_custom_conversions.py +34 -0
  17. tfv_get_tools/providers/_downloader.py +566 -0
  18. tfv_get_tools/providers/_merger.py +520 -0
  19. tfv_get_tools/providers/_utilities.py +255 -0
  20. tfv_get_tools/providers/atmos/barra2.py +209 -0
  21. tfv_get_tools/providers/atmos/cfgs/barra2_c2.yaml +52 -0
  22. tfv_get_tools/providers/atmos/cfgs/barra2_r2.yaml +85 -0
  23. tfv_get_tools/providers/atmos/cfgs/barra2_re2.yaml +70 -0
  24. tfv_get_tools/providers/atmos/cfgs/cfsr.yaml +68 -0
  25. tfv_get_tools/providers/atmos/cfgs/era5.yaml +77 -0
  26. tfv_get_tools/providers/atmos/cfgs/era5_gcp.yaml +77 -0
  27. tfv_get_tools/providers/atmos/cfsr.py +207 -0
  28. tfv_get_tools/providers/atmos/era5.py +20 -0
  29. tfv_get_tools/providers/atmos/era5_gcp.py +20 -0
  30. tfv_get_tools/providers/ocean/cfgs/copernicus_blk.yaml +64 -0
  31. tfv_get_tools/providers/ocean/cfgs/copernicus_glo.yaml +67 -0
  32. tfv_get_tools/providers/ocean/cfgs/copernicus_nws.yaml +62 -0
  33. tfv_get_tools/providers/ocean/cfgs/hycom.yaml +73 -0
  34. tfv_get_tools/providers/ocean/copernicus_ocean.py +457 -0
  35. tfv_get_tools/providers/ocean/hycom.py +611 -0
  36. tfv_get_tools/providers/wave/cawcr.py +166 -0
  37. tfv_get_tools/providers/wave/cfgs/cawcr_aus_10m.yaml +39 -0
  38. tfv_get_tools/providers/wave/cfgs/cawcr_aus_4m.yaml +39 -0
  39. tfv_get_tools/providers/wave/cfgs/cawcr_glob_24m.yaml +39 -0
  40. tfv_get_tools/providers/wave/cfgs/cawcr_pac_10m.yaml +39 -0
  41. tfv_get_tools/providers/wave/cfgs/cawcr_pac_4m.yaml +39 -0
  42. tfv_get_tools/providers/wave/cfgs/copernicus_glo.yaml +56 -0
  43. tfv_get_tools/providers/wave/cfgs/copernicus_nws.yaml +51 -0
  44. tfv_get_tools/providers/wave/cfgs/era5.yaml +48 -0
  45. tfv_get_tools/providers/wave/cfgs/era5_gcp.yaml +48 -0
  46. tfv_get_tools/providers/wave/copernicus_wave.py +38 -0
  47. tfv_get_tools/providers/wave/era5.py +232 -0
  48. tfv_get_tools/providers/wave/era5_gcp.py +169 -0
  49. tfv_get_tools/tide/__init__.py +2 -0
  50. tfv_get_tools/tide/_nodestring.py +214 -0
  51. tfv_get_tools/tide/_tidal_base.py +568 -0
  52. tfv_get_tools/utilities/_tfv_bc.py +78 -0
  53. tfv_get_tools/utilities/horizontal_padding.py +89 -0
  54. tfv_get_tools/utilities/land_masking.py +93 -0
  55. tfv_get_tools/utilities/parsers.py +44 -0
  56. tfv_get_tools/utilities/warnings.py +38 -0
  57. tfv_get_tools/wave.py +179 -0
  58. tfv_get_tools-0.2.0.dist-info/METADATA +286 -0
  59. tfv_get_tools-0.2.0.dist-info/RECORD +62 -0
  60. tfv_get_tools-0.2.0.dist-info/WHEEL +5 -0
  61. tfv_get_tools-0.2.0.dist-info/entry_points.txt +5 -0
  62. tfv_get_tools-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,73 @@
1
+ # HYCOM GOFS 3.1 Configuration File
2
+ _BASE_URL: "https://tds.hycom.org/thredds/dodsC"
3
+ _INFO_URL: https://www.hycom.org/
4
+
5
+ _SOURCE_XLIMS: [-180, 360]
6
+ _SOURCE_YLIMS: [-90, 90]
7
+ _SOURCE_ZLIMS: [0, 5000]
8
+ _SOURCE_TIMELIMS: ["1994-01-01 00:00:00", null]
9
+
10
+ _DOWNLOAD_INTERVAL: daily
11
+
12
+ # Default variables to download
13
+ _VARIABLES: ["surf_el", "water_u", "water_v", "water_temp", "salinity"]
14
+
15
+ _DATASETS:
16
+ "1994-01-01":
17
+ "GLBv0.08/expt_53.X/data/{year}": default
18
+ "2016-01-01":
19
+ "GLBv0.08/expt_56.3": default
20
+ "2016-05-01":
21
+ "GLBv0.08/expt_57.2": default
22
+ "2017-02-01":
23
+ "GLBv0.08/expt_92.8": default
24
+ "2017-06-01":
25
+ "GLBv0.08/expt_57.7": default
26
+ "2017-10-01":
27
+ "GLBv0.08/expt_92.9": default
28
+ "2018-01-01":
29
+ "GLBv0.08/expt_93.0": default
30
+ "2018-12-04":
31
+ "GLBy0.08/expt_93.0": default
32
+ "2024-08-10":
33
+ {
34
+ ESPC-D-V02/u3z: ["water_u"],
35
+ ESPC-D-V02/v3z: ["water_v"],
36
+ ESPC-D-V02/t3z: ["water_temp"],
37
+ ESPC-D-V02/s3z: ["salinity"],
38
+ ESPC-D-V02/ssh: ["surf_el"],
39
+ }
40
+
41
+ # KEY: The standard NC Variable for merged data (and per `_standard_attrs.py`)
42
+ # source var == var as in the original dataset
43
+ # tfv_var == standard tuflow fv var name (for FVC writer template)
44
+
45
+ surf_el:
46
+ source_var: surf_el
47
+ tfv_var: "H"
48
+ bc_scale: 1
49
+ bc_offset: 0
50
+
51
+ water_u:
52
+ source_var: water_u
53
+ tfv_var: "V_x"
54
+ bc_scale: 1
55
+ bc_offset: 0
56
+
57
+ water_v:
58
+ source_var: water_v
59
+ tfv_var: "V_y"
60
+ bc_scale: 1
61
+ bc_offset: 0
62
+
63
+ salinity:
64
+ source_var: salinity
65
+ tfv_var: "SAL"
66
+ bc_scale: 1
67
+ bc_offset: 0
68
+
69
+ water_temp:
70
+ source_var: water_temp
71
+ tfv_var: "TEMP"
72
+ bc_scale: 1
73
+ bc_offset: 0
@@ -0,0 +1,457 @@
1
+ """
2
+ Copernicus Ocean
3
+ """
4
+
5
+ from datetime import datetime
6
+ from functools import partial
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import List, Tuple, Dict, Optional, Any
10
+
11
+ import copernicusmarine as cm
12
+ import numpy as np
13
+ import pandas as pd
14
+ from pandas.tseries.offsets import MonthEnd
15
+ from tqdm import tqdm
16
+ import xarray as xr
17
+
18
+ from tfv_get_tools.providers._downloader import BaseDownloader
19
+ from tfv_get_tools.providers._merger import BaseMerger
20
+ from tfv_get_tools.providers._utilities import todstr
21
+
22
+
23
+ class DownloadCopernicusOcean(BaseDownloader):
24
+ """Copernicus Ocean downloader"""
25
+
26
+ def _init_specific(self, **kwargs):
27
+ """Set source and mode - matches original interface"""
28
+ if self.model == "default":
29
+ self.log("Default model has been selected == 'GLO'")
30
+ self.model = "GLO"
31
+
32
+ self.source = "COPERNICUS"
33
+ self.mode = "OCEAN"
34
+ self._load_config()
35
+
36
+ # Login status tracking
37
+ self._logged_in = False
38
+
39
+ # Cache for temporal extents
40
+ self._temporal_extents_cache = {}
41
+
42
+ #
43
+ if not self.verbose:
44
+ logging.getLogger("copernicusmarine").setLevel(logging.WARNING)
45
+
46
+ def _ensure_logged_in(self):
47
+ """Ensure user is logged into Copernicus Marine Service"""
48
+ if self._logged_in:
49
+ return True
50
+
51
+ # Fast credential check first
52
+ if cm.login(check_credentials_valid=True):
53
+ print("Found existing credentials file, skipping login check.")
54
+ self._logged_in = True
55
+ return True
56
+
57
+ # Fallback to full login attempt
58
+ print(
59
+ "Login to Copernicus Marine Service required. Please enter your credentials."
60
+ )
61
+ print("Your credentials will be stored for next time.")
62
+ print(
63
+ 'If you do not have an account, please register at "https://marine.copernicus.eu/".'
64
+ )
65
+ if cm.login():
66
+ self._logged_in = True
67
+ return True
68
+
69
+ # Both methods failed
70
+ raise AttributeError(
71
+ "Login to Copernicus Marine Service has failed, please check credentials"
72
+ )
73
+
74
+ def _get_dataset_temporal_extent(self, dataset_id: str) -> Optional[Dict]:
75
+ """Extract temporal extent for a dataset using Copernicus Marine API"""
76
+ if dataset_id in self._temporal_extents_cache:
77
+ return self._temporal_extents_cache[dataset_id]
78
+
79
+ try:
80
+ dataset_info = cm.describe(dataset_id=dataset_id)
81
+ dataset_info_dict = (
82
+ dataset_info.model_dump()
83
+ ) # Use model_dump instead of deprecated dict()
84
+
85
+ # Extract temporal extents
86
+ for product in dataset_info_dict["products"]:
87
+ for dataset in product["datasets"]:
88
+ if dataset["dataset_id"] == dataset_id:
89
+ for version in dataset["versions"]:
90
+ for part in version["parts"]:
91
+ for service in part["services"]:
92
+ if service.get("variables"):
93
+ for variable in service["variables"]:
94
+ if variable.get("coordinates"):
95
+ for coord in variable["coordinates"]:
96
+ if coord["coordinate_id"] == "time":
97
+ min_time_ms = coord[
98
+ "minimum_value"
99
+ ]
100
+ max_time_ms = coord[
101
+ "maximum_value"
102
+ ]
103
+
104
+ # Convert from milliseconds to datetime
105
+ start_date = (
106
+ datetime.fromtimestamp(
107
+ min_time_ms / 1000.0,
108
+ )
109
+ )
110
+ end_date = (
111
+ datetime.fromtimestamp(
112
+ max_time_ms / 1000.0,
113
+ )
114
+ )
115
+
116
+ extent = {
117
+ "start_date": start_date,
118
+ "end_date": end_date,
119
+ "start_timestamp_ms": min_time_ms,
120
+ "end_timestamp_ms": max_time_ms,
121
+ }
122
+
123
+ self._temporal_extents_cache[
124
+ dataset_id
125
+ ] = extent
126
+ return extent
127
+ except Exception as e:
128
+ if self.verbose:
129
+ print(f"Failed to get temporal extent for {dataset_id}: {e}")
130
+ return None
131
+
132
+ return None
133
+
134
+ def _get_dataset_group_temporal_intersection(
135
+ self, dataset_group: Dict
136
+ ) -> Optional[Tuple[datetime, datetime]]:
137
+ """Get the temporal intersection of all datasets in a group"""
138
+ if not dataset_group:
139
+ return None
140
+
141
+ extents = []
142
+ for ds_id in dataset_group.keys():
143
+ extent = self._get_dataset_temporal_extent(ds_id)
144
+ if extent is None:
145
+ return None
146
+ extents.append(extent)
147
+
148
+ # Find intersection (latest start, earliest end)
149
+ latest_start = max(extent["start_date"] for extent in extents)
150
+ earliest_end = min(extent["end_date"] for extent in extents)
151
+
152
+ if latest_start <= earliest_end:
153
+ return (latest_start, earliest_end)
154
+ else:
155
+ return None # No overlap
156
+
157
+ def _get_output_filename(
158
+ self, ds_id: str, ts: pd.Timestamp, te: pd.Timestamp
159
+ ) -> Path:
160
+ """Copernicus filename pattern includes dataset ID"""
161
+ outname = f"{self.prefix}_{ds_id}_{todstr(ts)}_{todstr(te)}.nc"
162
+ return self.outdir / outname
163
+
164
+ def _get_variables_for_dataset(self, dataset_config, ds_id: str):
165
+ """Determine which variables to download for this dataset"""
166
+ if self._custom_variables:
167
+ # Use custom variables requested by user
168
+ return self.variables
169
+ else:
170
+ # Use defaults (dataset-specific or global)
171
+ if dataset_config[ds_id] == "default":
172
+ return self.variables
173
+ else:
174
+ return dataset_config[ds_id]
175
+
176
+ def get_variable_temporal_extents(self) -> Dict[str, Dict[str, Any]]:
177
+ """Get temporal extents for each requested variable"""
178
+ variable_extents = {}
179
+
180
+ for variable in self.variables:
181
+ # Find all datasets that contain this variable
182
+ datasets_with_variable = []
183
+
184
+ # Search through all dataset groups
185
+ for dsmap_key, dataset_group in self.dsmap.items():
186
+ for ds_id, variables in dataset_group.items():
187
+ if variable in variables:
188
+ extent = self._get_dataset_temporal_extent(ds_id)
189
+ if extent:
190
+ datasets_with_variable.append(
191
+ {
192
+ "dataset_id": ds_id,
193
+ "dsmap_key": dsmap_key,
194
+ "start_date": extent["start_date"],
195
+ "end_date": extent["end_date"],
196
+ }
197
+ )
198
+
199
+ if datasets_with_variable:
200
+ # Find the union of all temporal extents for this variable
201
+ earliest_start = min(ds["start_date"] for ds in datasets_with_variable)
202
+ latest_end = max(ds["end_date"] for ds in datasets_with_variable)
203
+
204
+ variable_extents[variable] = {
205
+ "start_date": earliest_start,
206
+ "end_date": latest_end,
207
+ "datasets": datasets_with_variable,
208
+ }
209
+
210
+ return variable_extents
211
+
212
+ def get_best_dsmap_key_for_date(
213
+ self, target_date: pd.Timestamp, variable_extents: Dict
214
+ ) -> Optional[str]:
215
+ """Find the dsmap key that provides ALL variables for the given date"""
216
+ target_dt = target_date.to_pydatetime()
217
+
218
+ # Get all possible dsmap keys that could work for this date
219
+ candidate_keys = set()
220
+ for variable in self.variables:
221
+ if variable in variable_extents:
222
+ for dataset in variable_extents[variable]["datasets"]:
223
+ if dataset["start_date"] <= target_dt <= dataset["end_date"]:
224
+ candidate_keys.add(dataset["dsmap_key"])
225
+
226
+ # Check each candidate key to see if it provides ALL variables for this date
227
+ for dsmap_key in candidate_keys:
228
+ can_provide_all = True
229
+
230
+ for variable in self.variables:
231
+ variable_available = False
232
+
233
+ # Check if this dsmap_key has a dataset with this variable for this date
234
+ if variable in variable_extents:
235
+ for dataset in variable_extents[variable]["datasets"]:
236
+ if (
237
+ dataset["dsmap_key"] == dsmap_key
238
+ and dataset["start_date"]
239
+ <= target_dt
240
+ <= dataset["end_date"]
241
+ ):
242
+ variable_available = True
243
+ break
244
+
245
+ if not variable_available:
246
+ can_provide_all = False
247
+ break
248
+
249
+ if can_provide_all:
250
+ return dsmap_key
251
+
252
+ return None
253
+
254
+ def _download_single_file(
255
+ self,
256
+ ds_id: str,
257
+ variables: list,
258
+ ts: pd.Timestamp,
259
+ te: pd.Timestamp,
260
+ output_file: Path,
261
+ ) -> bool:
262
+ """Download single file via Copernicus Marine API"""
263
+ try:
264
+ xmin, xmax = self.xlims
265
+ ymin, ymax = self.ylims
266
+ zmin, zmax = self.zlims if self.zlims else (None, None)
267
+
268
+ # Enforce UTC timezone for the Copernicus marine API
269
+ ts = ts.tz_localize("UTC") if ts.tz is None else ts.tz_convert("UTC")
270
+ te = te.tz_localize("UTC") if te.tz is None else te.tz_convert("UTC")
271
+
272
+ cm.subset(
273
+ dataset_id=ds_id,
274
+ start_datetime=ts,
275
+ end_datetime=te,
276
+ minimum_longitude=xmin,
277
+ maximum_longitude=xmax,
278
+ minimum_latitude=ymin,
279
+ maximum_latitude=ymax,
280
+ minimum_depth=zmin,
281
+ maximum_depth=zmax,
282
+ variables=variables,
283
+ output_filename=output_file.name,
284
+ output_directory=output_file.parent,
285
+ dataset_part="default",
286
+ )
287
+ return True
288
+
289
+ except Exception as e:
290
+ if self.verbose:
291
+ error_msg = str(e)
292
+ if "time dimension exceed the dataset coordinates" in error_msg:
293
+ print("Time dimension exceeds coordinates - dataset incompatible")
294
+ else:
295
+ print(f"Error downloading {output_file.name}: {error_msg}")
296
+ return False
297
+
298
+ def download(self):
299
+ """Let her rip, potato chip"""
300
+
301
+ assert self._ensure_logged_in(), "Login failed, cannot download data"
302
+
303
+ # Get temporal extents for all variables upfront
304
+ variable_extents = self.get_variable_temporal_extents()
305
+
306
+ if not variable_extents:
307
+ print("No temporal extents found for any requested variables!")
308
+ return
309
+
310
+ # Filter times to only those where all variables have coverage
311
+ valid_times = []
312
+ for ts in self.times:
313
+ dsmap_key = self.get_best_dsmap_key_for_date(ts, variable_extents)
314
+ if dsmap_key:
315
+ valid_times.append(ts)
316
+ elif self.verbose:
317
+ print(f"Skipping {ts}: no dataset group provides all variables")
318
+
319
+ if not valid_times:
320
+ print(
321
+ "No time periods found where all requested variables have dataset coverage!"
322
+ )
323
+ return
324
+
325
+ if len(valid_times) < len(self.times):
326
+ print(
327
+ f"Warning: Only {len(valid_times)}/{len(self.times)} time periods have complete variable coverage"
328
+ )
329
+
330
+ # Process each valid time period
331
+ for ts in valid_times:
332
+ te = (ts + MonthEnd()).replace(hour=23, minute=59, second=59)
333
+
334
+ # Get the best dsmap key for this time
335
+ dsmap_key = self.get_best_dsmap_key_for_date(ts, variable_extents)
336
+ # dataset_group = self.dsmap[dsmap_key]
337
+
338
+ # Group variables by their datasets within this group
339
+ dataset_variables = {}
340
+ target_dt = ts.to_pydatetime()
341
+
342
+ for variable in self.variables:
343
+ for dataset_info in variable_extents[variable]["datasets"]:
344
+ if (
345
+ dataset_info["dsmap_key"] == dsmap_key
346
+ and dataset_info["start_date"]
347
+ <= target_dt
348
+ <= dataset_info["end_date"]
349
+ ):
350
+ ds_id = dataset_info["dataset_id"]
351
+
352
+ if ds_id not in dataset_variables:
353
+ dataset_variables[ds_id] = []
354
+ dataset_variables[ds_id].append(variable)
355
+ break
356
+
357
+ # Download from each dataset
358
+ for ds_id, variables in dataset_variables.items():
359
+ output_file = self._get_output_filename(ds_id, ts, te)
360
+
361
+ if self.verbose:
362
+ print(
363
+ f"Downloading {len(variables)} variables from {ds_id} for {ts}"
364
+ )
365
+ print(f" Variables: {', '.join(variables)}")
366
+
367
+ yield {
368
+ "file_path": output_file,
369
+ "url": f"copernicus://{ds_id}",
370
+ "timestamp": ts,
371
+ "variable": f"{len(variables)}_vars",
372
+ "download_func": partial(
373
+ self._download_single_file,
374
+ ds_id,
375
+ variables,
376
+ ts,
377
+ te,
378
+ output_file,
379
+ ),
380
+ }
381
+
382
+
383
+ class MergeCopernicusOcean(BaseMerger):
384
+ def _init_specific(self):
385
+ self.source = "COPERNICUS"
386
+ self.mode = "OCEAN"
387
+ if self.model == "default":
388
+ self.model = "GLO"
389
+ self._load_config()
390
+
391
+ def merge_files(self, file_list):
392
+ """Copernicus sometimes requires use to first merge on variables,
393
+ then concat on time.
394
+
395
+
396
+ Args:
397
+ file_list (list): list of path objects to open and concat.
398
+
399
+ Returns:
400
+ xr.Dataset: merged xarray dataset
401
+ list: files unable to be merged
402
+ """
403
+ startdates = [x.stem.split("_")[-2] for x in file_list]
404
+ unq_startdates = np.unique(startdates)
405
+
406
+ dsset = {k: [] for k in unq_startdates}
407
+
408
+ skipped_list = []
409
+ for i, f in enumerate(tqdm(file_list)):
410
+ dsx = self._open_subset_netcdf(f)
411
+
412
+ # Water-level is tricky - it can be stored high-res
413
+ # We'll create a daily mean water-level, plus the original hres water-level.
414
+ if "zos" in dsx.data_vars:
415
+ if dsx.sizes["time"] > 32:
416
+ dsx = dsx.rename(time="wl_time", zos="zosh")
417
+ dsx["zos"] = (
418
+ ("time", "latitude", "longitude"),
419
+ dsx["zosh"].resample(wl_time="24h").mean().data,
420
+ )
421
+ if dsx is not None:
422
+ dsset[startdates[i]].append(dsx)
423
+ else:
424
+ skipped_list.append(f)
425
+
426
+ # Merge the common start_dates first, then concatenate by time afterwards
427
+ dssetm = []
428
+ for v in dsset.values():
429
+ merge_list = []
430
+ for dsx in v:
431
+ # Any other modifications can be added here.
432
+ # None required for Copernicus right now.
433
+
434
+ merge_list.append(dsx)
435
+ dssetm.append(xr.merge(merge_list))
436
+
437
+ print("Concatenating xarray dataset")
438
+ ds = xr.concat(
439
+ dssetm,
440
+ dim="time",
441
+ combine_attrs="override",
442
+ data_vars="minimal",
443
+ coords="minimal",
444
+ compat="override",
445
+ )
446
+
447
+ # Sort by time and drop duplicates (from overlaps)
448
+ ds = ds.sortby("time")
449
+ _, idx = np.unique(ds["time"], return_index=True)
450
+ ds = ds.isel(time=idx)
451
+
452
+ # Interpolate nan gaps (mainly in forecast data)
453
+ # Turned it off; it creates issues with chunked data.
454
+ # It is mainly a problem with HYCOM, not copernicus.
455
+ # ds = ds.interpolate_na(max_gap="24h", dim="time")
456
+
457
+ return ds, skipped_list