weatherdb 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docker/Dockerfile +30 -0
- docker/docker-compose.yaml +58 -0
- docker/docker-compose_test.yaml +24 -0
- docker/start-docker-test.sh +6 -0
- docs/requirements.txt +10 -0
- docs/source/Changelog.md +2 -0
- docs/source/License.rst +7 -0
- docs/source/Methode.md +161 -0
- docs/source/_static/custom.css +8 -0
- docs/source/_static/favicon.ico +0 -0
- docs/source/_static/logo.png +0 -0
- docs/source/api/api.rst +15 -0
- docs/source/api/cli.rst +8 -0
- docs/source/api/weatherDB.broker.rst +10 -0
- docs/source/api/weatherDB.config.rst +7 -0
- docs/source/api/weatherDB.db.rst +23 -0
- docs/source/api/weatherDB.rst +22 -0
- docs/source/api/weatherDB.station.rst +56 -0
- docs/source/api/weatherDB.stations.rst +46 -0
- docs/source/api/weatherDB.utils.rst +22 -0
- docs/source/conf.py +137 -0
- docs/source/index.rst +33 -0
- docs/source/setup/Configuration.md +127 -0
- docs/source/setup/Hosting.md +9 -0
- docs/source/setup/Install.md +49 -0
- docs/source/setup/Quickstart.md +183 -0
- docs/source/setup/setup.rst +12 -0
- weatherdb/__init__.py +24 -0
- weatherdb/_version.py +1 -0
- weatherdb/alembic/README.md +8 -0
- weatherdb/alembic/alembic.ini +80 -0
- weatherdb/alembic/config.py +9 -0
- weatherdb/alembic/env.py +100 -0
- weatherdb/alembic/script.py.mako +26 -0
- weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
- weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
- weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
- weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
- weatherdb/broker.py +667 -0
- weatherdb/cli.py +214 -0
- weatherdb/config/ConfigParser.py +663 -0
- weatherdb/config/__init__.py +5 -0
- weatherdb/config/config_default.ini +162 -0
- weatherdb/db/__init__.py +3 -0
- weatherdb/db/connections.py +374 -0
- weatherdb/db/fixtures/RichterParameters.json +34 -0
- weatherdb/db/models.py +402 -0
- weatherdb/db/queries/get_quotient.py +155 -0
- weatherdb/db/views.py +165 -0
- weatherdb/station/GroupStation.py +710 -0
- weatherdb/station/StationBases.py +3108 -0
- weatherdb/station/StationET.py +111 -0
- weatherdb/station/StationP.py +807 -0
- weatherdb/station/StationPD.py +98 -0
- weatherdb/station/StationT.py +164 -0
- weatherdb/station/__init__.py +13 -0
- weatherdb/station/constants.py +21 -0
- weatherdb/stations/GroupStations.py +519 -0
- weatherdb/stations/StationsBase.py +1021 -0
- weatherdb/stations/StationsBaseTET.py +30 -0
- weatherdb/stations/StationsET.py +17 -0
- weatherdb/stations/StationsP.py +128 -0
- weatherdb/stations/StationsPD.py +24 -0
- weatherdb/stations/StationsT.py +21 -0
- weatherdb/stations/__init__.py +11 -0
- weatherdb/utils/TimestampPeriod.py +369 -0
- weatherdb/utils/__init__.py +3 -0
- weatherdb/utils/dwd.py +350 -0
- weatherdb/utils/geometry.py +69 -0
- weatherdb/utils/get_data.py +285 -0
- weatherdb/utils/logging.py +126 -0
- weatherdb-1.1.0.dist-info/LICENSE +674 -0
- weatherdb-1.1.0.dist-info/METADATA +765 -0
- weatherdb-1.1.0.dist-info/RECORD +77 -0
- weatherdb-1.1.0.dist-info/WHEEL +5 -0
- weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
- weatherdb-1.1.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,710 @@
|
|
1
|
+
# libraries
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from pathlib import Path
|
5
|
+
import warnings
|
6
|
+
import zipfile
|
7
|
+
from packaging import version
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from ..utils.TimestampPeriod import TimestampPeriod
|
11
|
+
from .StationBases import StationBase, AGG_TO
|
12
|
+
from . import StationP, StationT, StationET
|
13
|
+
|
14
|
+
# set settings
|
15
|
+
# ############
|
16
|
+
__all__ = ["GroupStation"]
|
17
|
+
log = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
# class definition
|
20
|
+
##################
|
21
|
+
class GroupStation(object):
|
22
|
+
"""A class to group all possible parameters of one station.
|
23
|
+
|
24
|
+
So if you want to create the input files for a simulation, where you need T, ET and N, use this class to download the data for one station.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, id, error_if_missing=True, **kwargs):
|
28
|
+
self.id = id
|
29
|
+
self.station_parts = []
|
30
|
+
self._error_if_missing = error_if_missing
|
31
|
+
for StatClass in [StationP, StationT, StationET]:
|
32
|
+
try:
|
33
|
+
self.station_parts.append(
|
34
|
+
StatClass(id=id, **kwargs)
|
35
|
+
)
|
36
|
+
except Exception as e:
|
37
|
+
if error_if_missing:
|
38
|
+
raise e
|
39
|
+
self.paras_available = [stat._para for stat in self.station_parts]
|
40
|
+
|
41
|
+
def _check_paras(self, paras):
|
42
|
+
if isinstance(paras, str) and paras != "all":
|
43
|
+
paras = [paras,]
|
44
|
+
|
45
|
+
if isinstance(paras, str) and (paras == "all"):
|
46
|
+
return self.paras_available
|
47
|
+
else:
|
48
|
+
paras_new = []
|
49
|
+
for para in paras:
|
50
|
+
if para in self.paras_available:
|
51
|
+
paras_new.append(para)
|
52
|
+
elif self._error_if_missing:
|
53
|
+
raise ValueError(
|
54
|
+
f"The parameter {para} you asked for is not available for station {self.id}")
|
55
|
+
return paras_new
|
56
|
+
|
57
|
+
@staticmethod
|
58
|
+
def _check_kinds(kinds):
|
59
|
+
# type cast kinds
|
60
|
+
if isinstance(kinds, str):
|
61
|
+
kinds = [kinds]
|
62
|
+
else:
|
63
|
+
kinds = kinds.copy()
|
64
|
+
return kinds
|
65
|
+
|
66
|
+
def get_available_paras(self, short=False):
|
67
|
+
"""Get the possible parameters for this station.
|
68
|
+
|
69
|
+
Parameters
|
70
|
+
----------
|
71
|
+
short : bool, optional
|
72
|
+
Should the short name of the parameters be returned.
|
73
|
+
The default is "long".
|
74
|
+
|
75
|
+
Returns
|
76
|
+
-------
|
77
|
+
list of str
|
78
|
+
A list of the long parameter names that are possible for this station to get.
|
79
|
+
"""
|
80
|
+
paras = []
|
81
|
+
attr_name = "_para" if short else "_para_long"
|
82
|
+
for stat in self.station_parts:
|
83
|
+
paras.append(getattr(stat, attr_name))
|
84
|
+
|
85
|
+
return paras
|
86
|
+
|
87
|
+
def get_filled_period(self, kinds="best", from_meta=True, join_how="inner"):
|
88
|
+
"""Get the combined filled period for all 3 stations.
|
89
|
+
|
90
|
+
This is the maximum possible timerange for these stations.
|
91
|
+
|
92
|
+
Parameters
|
93
|
+
----------
|
94
|
+
kind : str
|
95
|
+
The data kind to look for filled period.
|
96
|
+
Must be a column in the timeseries DB.
|
97
|
+
Must be one of "raw", "qc", "filled", "adj".
|
98
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
99
|
+
For Precipitation this is "corr" and for the other this is "filled".
|
100
|
+
For the precipitation also "qn" and "corr" are valid.
|
101
|
+
from_meta : bool, optional
|
102
|
+
Should the period be from the meta table?
|
103
|
+
If False: the period is returned from the timeserie. In this case this function is only a wrapper for .get_period_meta.
|
104
|
+
The default is True.
|
105
|
+
join_how : str, optional
|
106
|
+
How should the different periods get joined.
|
107
|
+
If "inner" then the minimal period that is inside of all the filled_periods is returned.
|
108
|
+
If "outer" then the maximal possible period is returned.
|
109
|
+
The default is "inner".
|
110
|
+
|
111
|
+
Returns
|
112
|
+
-------
|
113
|
+
TimestampPeriod
|
114
|
+
The maximum filled period for the 3 parameters for this station.
|
115
|
+
"""
|
116
|
+
kinds = self._check_kinds(kinds)
|
117
|
+
for kind in ["filled_by", "adj"]:
|
118
|
+
if kind in kinds:
|
119
|
+
kinds.remove(kind)
|
120
|
+
|
121
|
+
# get filled_period
|
122
|
+
for kind in kinds:
|
123
|
+
for stat in self.station_parts:
|
124
|
+
new_filled_period = stat.get_filled_period(
|
125
|
+
kind=kind, from_meta=from_meta)
|
126
|
+
|
127
|
+
if "filled_period" not in locals():
|
128
|
+
filled_period = new_filled_period.copy()
|
129
|
+
else:
|
130
|
+
filled_period = filled_period.union(
|
131
|
+
new_filled_period, how=join_how)
|
132
|
+
|
133
|
+
return filled_period
|
134
|
+
|
135
|
+
def get_df(self, period=(None, None), kinds="best", paras="all",
|
136
|
+
agg_to="day", nas_allowed=True, add_na_share=False,
|
137
|
+
add_t_min=False, add_t_max=False, **kwargs):
|
138
|
+
"""Get a DataFrame with the corresponding data.
|
139
|
+
|
140
|
+
Parameters
|
141
|
+
----------
|
142
|
+
period : TimestampPeriod or (tuple or list of datetime.datetime or None), optional
|
143
|
+
The minimum and maximum Timestamp for which to get the timeseries.
|
144
|
+
If None is given, the maximum or minimal possible Timestamp is taken.
|
145
|
+
The default is (None, None).
|
146
|
+
kinds : str or list of str
|
147
|
+
The data kind to look for filled period.
|
148
|
+
Must be a column in the timeseries DB.
|
149
|
+
Must be one of "raw", "qc", "filled", "adj", "filled_by", "best"("corr" for N and "filled" for T and ET).
|
150
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
151
|
+
For Precipitation this is "corr" and for the other this is "filled".
|
152
|
+
For the precipitation also "qn" and "corr" are valid.
|
153
|
+
agg_to : str, optional
|
154
|
+
To what aggregation level should the timeseries get aggregated to.
|
155
|
+
The minimum aggregation for Temperatur and ET is daily and for the precipitation it is 10 minutes.
|
156
|
+
If a smaller aggregation is selected the minimum possible aggregation for the respective parameter is returned.
|
157
|
+
So if 10 minutes is selected, than precipitation is returned in 10 minuets and T and ET as daily.
|
158
|
+
The default is "10 min".
|
159
|
+
nas_allowed : bool, optional
|
160
|
+
Should NAs be allowed?
|
161
|
+
If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
|
162
|
+
If False, then the minimal filled period is returned.
|
163
|
+
The default is True.
|
164
|
+
paras : list of str or str, optional
|
165
|
+
Give the parameters for which to get the meta information.
|
166
|
+
Can be "n", "t", "et" or "all".
|
167
|
+
If "all", then every available station parameter is returned.
|
168
|
+
The default is "all"
|
169
|
+
add_na_share : bool, optional
|
170
|
+
Should one or several columns be added to the Dataframe with the share of NAs in the data.
|
171
|
+
This is especially important, when the stations data get aggregated, because the aggregation doesn't make sense if there are a lot of NAs in the original data.
|
172
|
+
If True, one column per asked kind is added with the respective share of NAs, if the aggregation step is not the smallest.
|
173
|
+
The "kind"_na_share column is in percentage.
|
174
|
+
The default is False.
|
175
|
+
add_t_min : bool, optional
|
176
|
+
Should the minimal temperature value get added?
|
177
|
+
The default is False.
|
178
|
+
add_t_max : bool, optional
|
179
|
+
Should the maximal temperature value get added?
|
180
|
+
The default is False.
|
181
|
+
|
182
|
+
Returns
|
183
|
+
-------
|
184
|
+
pd.Dataframe
|
185
|
+
A DataFrame with the timeseries for this station and the given period.
|
186
|
+
"""
|
187
|
+
paras = self._check_paras(paras)
|
188
|
+
|
189
|
+
# download dataframes
|
190
|
+
dfs = []
|
191
|
+
for stat in self.station_parts:
|
192
|
+
if stat._para in paras:
|
193
|
+
# check if min and max for temperature should get added
|
194
|
+
use_kinds = kinds.copy()
|
195
|
+
if stat._para == "t":
|
196
|
+
if isinstance(use_kinds, str):
|
197
|
+
use_kinds=[use_kinds]
|
198
|
+
if "best" in use_kinds:
|
199
|
+
use_kinds.insert(use_kinds.index("best"), "filled")
|
200
|
+
use_kinds.remove("best")
|
201
|
+
for k in ["raw", "filled"]:
|
202
|
+
if k in use_kinds:
|
203
|
+
if add_t_max:
|
204
|
+
use_kinds.insert(
|
205
|
+
use_kinds.index(k)+1,
|
206
|
+
f"{k}_max")
|
207
|
+
if add_t_min:
|
208
|
+
use_kinds.insert(
|
209
|
+
use_kinds.index(k)+1,
|
210
|
+
f"{k}_min")
|
211
|
+
|
212
|
+
# get the data from station object
|
213
|
+
df = stat.get_df(
|
214
|
+
period=period,
|
215
|
+
kinds=use_kinds,
|
216
|
+
agg_to=agg_to,
|
217
|
+
nas_allowed=nas_allowed,
|
218
|
+
add_na_share=add_na_share,
|
219
|
+
**kwargs)
|
220
|
+
df = df.rename(dict(zip(
|
221
|
+
df.columns,
|
222
|
+
[stat._para.upper() + "_" + col for col in df.columns])),
|
223
|
+
axis=1)
|
224
|
+
dfs.append(df)
|
225
|
+
|
226
|
+
# concat the dfs
|
227
|
+
if len(dfs) > 1:
|
228
|
+
df_all = pd.concat(dfs, axis=1)
|
229
|
+
elif len(dfs) == 1 :
|
230
|
+
df_all = dfs[0]
|
231
|
+
else:
|
232
|
+
raise ValueError("No timeserie was found for {paras} and Station {stid}".format(
|
233
|
+
paras=", ".join(paras),
|
234
|
+
stid=self.id))
|
235
|
+
|
236
|
+
return df_all
|
237
|
+
|
238
|
+
@classmethod
|
239
|
+
def get_meta_explanation(cls, infos="all"):
|
240
|
+
"""Get the explanations of the available meta fields.
|
241
|
+
|
242
|
+
Parameters
|
243
|
+
----------
|
244
|
+
infos : list or string, optional
|
245
|
+
The infos you wish to get an explanation for.
|
246
|
+
If "all" then all the available information get returned.
|
247
|
+
The default is "all"
|
248
|
+
|
249
|
+
Returns
|
250
|
+
-------
|
251
|
+
pd.Series
|
252
|
+
a pandas Series with the information names as index and the explanation as values.
|
253
|
+
"""
|
254
|
+
return StationBase.get_meta_explanation(infos=infos)
|
255
|
+
|
256
|
+
def get_max_period(self, kinds, nas_allowed=False):
|
257
|
+
"""Get the maximum available period for this stations timeseries.
|
258
|
+
|
259
|
+
If nas_allowed is True, then the maximum range of the timeserie is returned.
|
260
|
+
Else the minimal filled period is returned
|
261
|
+
|
262
|
+
Parameters
|
263
|
+
----------
|
264
|
+
kinds : str or list of str
|
265
|
+
The data kinds to update.
|
266
|
+
Must be a column in the timeseries DB.
|
267
|
+
Must be one of "raw", "qc", "filled", "adj".
|
268
|
+
For the precipitation also "qn" and "corr" are valid.
|
269
|
+
nas_allowed : bool, optional
|
270
|
+
Should NAs be allowed?
|
271
|
+
If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
|
272
|
+
If False, then the minimal filled period is returned.
|
273
|
+
The default is False.
|
274
|
+
|
275
|
+
Returns
|
276
|
+
-------
|
277
|
+
utils.TimestampPeriod
|
278
|
+
The maximum Timestamp Period
|
279
|
+
"""
|
280
|
+
kinds = self._check_kinds(kinds)
|
281
|
+
max_period = None
|
282
|
+
for stat in self.station_parts:
|
283
|
+
max_period_i = stat.get_max_period(
|
284
|
+
kinds=kinds, nas_allowed=nas_allowed)
|
285
|
+
if max_period is None:
|
286
|
+
max_period = max_period_i
|
287
|
+
else:
|
288
|
+
max_period = max_period.union(
|
289
|
+
max_period_i,
|
290
|
+
how="outer" if nas_allowed else "inner")
|
291
|
+
|
292
|
+
return max_period
|
293
|
+
|
294
|
+
def get_meta(self, paras="all", **kwargs):
|
295
|
+
"""Get the meta information for every parameter of this station.
|
296
|
+
|
297
|
+
Parameters
|
298
|
+
----------
|
299
|
+
paras : list of str or str, optional
|
300
|
+
Give the parameters for which to get the meta information.
|
301
|
+
Can be "n", "t", "et" or "all".
|
302
|
+
If "all", then every available station parameter is returned.
|
303
|
+
The default is "all"
|
304
|
+
**kwargs : dict, optional
|
305
|
+
The optional keyword arguments are handed to the single Station get_meta methods. Can be e.g. "info".
|
306
|
+
|
307
|
+
Returns
|
308
|
+
-------
|
309
|
+
dict
|
310
|
+
dict with the information.
|
311
|
+
there is one subdict per parameter.
|
312
|
+
If only one parameter is asked for, then there is no subdict, but only a single value.
|
313
|
+
"""
|
314
|
+
paras = self._check_paras(paras)
|
315
|
+
|
316
|
+
for stat in self.station_parts:
|
317
|
+
if stat._para in paras:
|
318
|
+
meta_para = stat.get_meta(**kwargs)
|
319
|
+
if "meta_all" not in locals():
|
320
|
+
meta_all = {stat._para:meta_para}
|
321
|
+
else:
|
322
|
+
meta_all.update({stat._para:meta_para})
|
323
|
+
return meta_all
|
324
|
+
|
325
|
+
def get_geom(self, crs=None):
|
326
|
+
"""Get the point geometry of the station.
|
327
|
+
|
328
|
+
Parameters
|
329
|
+
----------
|
330
|
+
crs: str, int or None, optional
|
331
|
+
The coordinate reference system of the geometry.
|
332
|
+
If None, then the geometry is returned in WGS84 (EPSG:4326).
|
333
|
+
If string, then it should be in a pyproj readable format.
|
334
|
+
If int, then it should be the EPSG code.
|
335
|
+
The default is None.
|
336
|
+
|
337
|
+
Returns
|
338
|
+
-------
|
339
|
+
shapely.geometries.Point
|
340
|
+
The location of the station as shapely Point in the given coordinate reference system.
|
341
|
+
"""
|
342
|
+
return self.station_parts[0].get_geom(crs=crs)
|
343
|
+
|
344
|
+
def get_name(self):
|
345
|
+
return self.station_parts[0].get_name()
|
346
|
+
|
347
|
+
def create_roger_ts(self, dir, period=(None, None),
|
348
|
+
kind="best", r_r0=1, add_t_min=False, add_t_max=False,
|
349
|
+
do_toolbox_format=False,
|
350
|
+
**kwargs):
|
351
|
+
"""Create the timeserie files for roger as csv.
|
352
|
+
|
353
|
+
This is only a wrapper function for create_ts with some standard settings.
|
354
|
+
|
355
|
+
Parameters
|
356
|
+
----------
|
357
|
+
dir : pathlib like object or zipfile.ZipFile
|
358
|
+
The directory or Zipfile to store the timeseries in.
|
359
|
+
If a zipfile is given a folder with the statiopns ID is added to the filepath.
|
360
|
+
period : TimestampPeriod like object, optional
|
361
|
+
The period for which to get the timeseries.
|
362
|
+
If (None, None) is entered, then the maximal possible period is computed.
|
363
|
+
The default is (None, None)
|
364
|
+
kind : str
|
365
|
+
The data kind to look for filled period.
|
366
|
+
Must be a column in the timeseries DB.
|
367
|
+
Must be one of "raw", "qc", "filled", "adj".
|
368
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
369
|
+
For Precipitation this is "corr" and for the other this is "filled".
|
370
|
+
For the precipitation also "qn" and "corr" are valid.
|
371
|
+
r_r0 : int or float, list of int or float or None, optional
|
372
|
+
Should the ET timeserie contain a column with R/R0.
|
373
|
+
If None, then no column is added.
|
374
|
+
If int or float, then a R/R0 column is appended with this number as standard value.
|
375
|
+
If list of int or floats, then the list should have the same length as the ET-timeserie and is appanded to the Timeserie.
|
376
|
+
If pd.Series, then the index should be a timestamp index. The serie is then joined to the ET timeserie.
|
377
|
+
The default is 1.
|
378
|
+
add_t_min=False : bool, optional
|
379
|
+
Schould the minimal temperature value get added?
|
380
|
+
The default is False.
|
381
|
+
add_t_max=False : bool, optional
|
382
|
+
Schould the maximal temperature value get added?
|
383
|
+
The default is False.
|
384
|
+
do_toolbox_format : bool, optional
|
385
|
+
Should the timeseries be saved in the RoGeR toolbox format? (have a look at the RoGeR examples in https://github.com/Hydrology-IFH/roger)
|
386
|
+
The default is False.
|
387
|
+
**kwargs:
|
388
|
+
additional parameters for Station.get_df
|
389
|
+
|
390
|
+
Raises
|
391
|
+
------
|
392
|
+
Warning
|
393
|
+
If there are NAs in the timeseries or the period got changed.
|
394
|
+
"""
|
395
|
+
if do_toolbox_format:
|
396
|
+
return self.create_ts(
|
397
|
+
dir=dir, period=period, kinds=kind,
|
398
|
+
agg_to="10 min", r_r0=r_r0, split_date=True,
|
399
|
+
nas_allowed=False,
|
400
|
+
add_t_min=add_t_min, add_t_max=add_t_max,
|
401
|
+
file_names={"N":"PREC.txt", "T":"TA.txt", "ET":"PET.txt"},
|
402
|
+
col_names={"N":"PREC", "ET":"PET",
|
403
|
+
"T":"TA", "T_min":"TA_min", "T_max":"TA_max",
|
404
|
+
"Jahr":"YYYY", "Monat":"MM", "Tag":"DD",
|
405
|
+
"Stunde":"hh", "Minute":"mm"},
|
406
|
+
add_meta=False,
|
407
|
+
keep_date_parts=True,
|
408
|
+
**kwargs)
|
409
|
+
else:
|
410
|
+
return self.create_ts(
|
411
|
+
dir=dir, period=period, kinds=kind,
|
412
|
+
agg_to="10 min", r_r0=r_r0, split_date=True,
|
413
|
+
nas_allowed=False,
|
414
|
+
add_t_min=add_t_min, add_t_max=add_t_max,
|
415
|
+
**kwargs)
|
416
|
+
|
417
|
+
def create_ts(self, dir, period=(None, None),
|
418
|
+
kinds="best", paras="all",
|
419
|
+
agg_to="10 min", r_r0=None, split_date=False,
|
420
|
+
nas_allowed=True, add_na_share=False,
|
421
|
+
add_t_min=False, add_t_max=False,
|
422
|
+
add_meta=True, file_names={}, col_names={},
|
423
|
+
keep_date_parts=False,
|
424
|
+
**kwargs):
|
425
|
+
"""Create the timeserie files as csv.
|
426
|
+
|
427
|
+
Parameters
|
428
|
+
----------
|
429
|
+
dir : pathlib like object or zipfile.ZipFile
|
430
|
+
The directory or Zipfile to store the timeseries in.
|
431
|
+
If a zipfile is given a folder with the statiopns ID is added to the filepath.
|
432
|
+
period : TimestampPeriod like object, optional
|
433
|
+
The period for which to get the timeseries.
|
434
|
+
If (None, None) is entered, then the maximal possible period is computed.
|
435
|
+
The default is (None, None)
|
436
|
+
kinds : str or list of str
|
437
|
+
The data kinds to look for filled period.
|
438
|
+
Must be a column in the timeseries DB.
|
439
|
+
Must be one of "raw", "qc", "filled", "adj", "filled_by", "filled_share", "best".
|
440
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
441
|
+
For precipitation this is "corr" and for the other this is "filled".
|
442
|
+
For the precipitation also "qn" and "corr" are valid.
|
443
|
+
If only one kind is asked for, then the columns get renamed to only have the parameter name as column name.
|
444
|
+
paras : list of str or str, optional
|
445
|
+
Give the parameters for which to get the meta information.
|
446
|
+
Can be "n", "t", "et" or "all".
|
447
|
+
If "all", then every available station parameter is returned.
|
448
|
+
The default is "all"
|
449
|
+
agg_to : str, optional
|
450
|
+
To what aggregation level should the timeseries get aggregated to.
|
451
|
+
The minimum aggregation for Temperatur and ET is daily and for the precipitation it is 10 minutes.
|
452
|
+
If a smaller aggregation is selected the minimum possible aggregation for the respective parameter is returned.
|
453
|
+
So if 10 minutes is selected, than precipitation is returned in 10 minuets and T and ET as daily.
|
454
|
+
The default is "10 min".
|
455
|
+
r_r0 : int or float or None or pd.Series or list, optional
|
456
|
+
Should the ET timeserie contain a column with R/R0.
|
457
|
+
If None, then no column is added.
|
458
|
+
If int, then a R/R0 column is appended with this number as standard value.
|
459
|
+
If list of int or floats, then the list should have the same length as the ET-timeserie and is appanded to the Timeserie.
|
460
|
+
If pd.Series, then the index should be a timestamp index. The serie is then joined to the ET timeserie.
|
461
|
+
The default is None.
|
462
|
+
split_date : bool, optional
|
463
|
+
Should the timestamp get splitted into parts, so one column for year, one for month etc.?
|
464
|
+
If False the timestamp is saved in one column as string.
|
465
|
+
nas_allowed : bool, optional
|
466
|
+
Should NAs be allowed?
|
467
|
+
If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
|
468
|
+
If False, then the minimal filled period is returned.
|
469
|
+
The default is True.
|
470
|
+
add_na_share : bool, optional
|
471
|
+
Should one or several columns be added to the Dataframe with the share of NAs in the data.
|
472
|
+
This is especially important, when the stations data get aggregated, because the aggregation doesn't make sense if there are a lot of NAs in the original data.
|
473
|
+
If True, one column per asked kind is added with the respective share of NAs, if the aggregation step is not the smallest.
|
474
|
+
The "kind"_na_share column is in percentage.
|
475
|
+
The default is False.
|
476
|
+
add_t_min=False : bool, optional
|
477
|
+
Should the minimal temperature value get added?
|
478
|
+
The default is False.
|
479
|
+
add_t_max=False : bool, optional
|
480
|
+
Should the maximal temperature value get added?
|
481
|
+
The default is False.
|
482
|
+
add_meta : bool, optional
|
483
|
+
Should station Meta information like name and Location (lat, long) be added to the file?
|
484
|
+
The default is True.
|
485
|
+
file_names : dict, optional
|
486
|
+
A dictionary with the file names for the different parameters.
|
487
|
+
e.g.{"N":"PREC.txt", "T":"TA.txt", "ET":"ET.txt"}
|
488
|
+
If an empty dictionary is given, then the standard names are used.
|
489
|
+
The default is {}.
|
490
|
+
col_names : dict, optional
|
491
|
+
A dictionary with the column names for the different parameters.
|
492
|
+
e.g.{"N":"PREC", "T":"TA", "ET":"ET", "Jahr":"YYYY", "Monat":"MM", "Tag":"DD", "Stunde":"HH", "Minute":"MN"}
|
493
|
+
If an empty dictionary is given, then the standard names are used.
|
494
|
+
The default is {}.
|
495
|
+
keep_date_parts : bool, optional
|
496
|
+
only used if split_date is True.
|
497
|
+
Should the date parts that are not needed, e.g. hour value for daily timeseries, be kept?
|
498
|
+
If False, then the columns that are not needed are dropped.
|
499
|
+
The default is False.
|
500
|
+
**kwargs:
|
501
|
+
additional parameters for Station.get_df
|
502
|
+
|
503
|
+
Raises
|
504
|
+
------
|
505
|
+
Warning
|
506
|
+
If there are NAs in the timeseries and nas_allowed is False
|
507
|
+
or the period got changed.
|
508
|
+
"""
|
509
|
+
# check directory
|
510
|
+
dir = self._check_dir(dir)
|
511
|
+
|
512
|
+
# type cast kinds
|
513
|
+
kinds = self._check_kinds(kinds)
|
514
|
+
paras = self._check_paras(paras)
|
515
|
+
|
516
|
+
# get the period
|
517
|
+
if not ("_skip_period_check" in kwargs and kwargs["_skip_period_check"]):
|
518
|
+
period = TimestampPeriod._check_period(period).expand_to_timestamp()
|
519
|
+
period_filled = self.get_filled_period(
|
520
|
+
kinds=kinds,
|
521
|
+
join_how="outer" if nas_allowed else "inner")
|
522
|
+
|
523
|
+
if period.is_empty():
|
524
|
+
period = period_filled
|
525
|
+
else:
|
526
|
+
period_new = period_filled.union(
|
527
|
+
period,
|
528
|
+
how="inner")
|
529
|
+
if period_new != period:
|
530
|
+
warnings.warn(
|
531
|
+
f"The Period for Station {self.id} got changed from {str(period)} to {str(period_new)}.")
|
532
|
+
period = period_new
|
533
|
+
if "_skip_period_check" in kwargs:
|
534
|
+
del kwargs["_skip_period_check"]
|
535
|
+
|
536
|
+
# prepare loop
|
537
|
+
name_suffix = "_{stid:0>5}.txt".format(stid=self.id)
|
538
|
+
x, y = self.get_geom().coords.xy
|
539
|
+
name = self.get_name() + " (ID: {stid})".format(stid=self.id)
|
540
|
+
do_zip = isinstance(dir, zipfile.ZipFile)
|
541
|
+
|
542
|
+
for para in paras:
|
543
|
+
# get the timeserie
|
544
|
+
df = self.get_df(
|
545
|
+
period=period, kinds=kinds,
|
546
|
+
paras=[para], agg_to=agg_to,
|
547
|
+
nas_allowed=nas_allowed,
|
548
|
+
add_na_share=add_na_share,
|
549
|
+
add_t_min=add_t_min, add_t_max=add_t_max,
|
550
|
+
_skip_period_check=True,
|
551
|
+
**kwargs)
|
552
|
+
|
553
|
+
# rename columns
|
554
|
+
if len(kinds)==1 or ("filled_by" in kinds and len(kinds)==2):
|
555
|
+
if len(kinds)==1:
|
556
|
+
colname_base = [col for col in df.columns if len(col.split("_"))==2][0]
|
557
|
+
else:
|
558
|
+
colname_base = f"{para.upper()}_" + kinds[1-(kinds.index("filled_by"))]
|
559
|
+
df.rename(
|
560
|
+
{colname_base: para.upper(),
|
561
|
+
f"{colname_base}_min": f"{para.upper()}_min",
|
562
|
+
f"{colname_base}_max": f"{para.upper()}_max",},
|
563
|
+
axis=1, inplace=True)
|
564
|
+
else:
|
565
|
+
df.rename(
|
566
|
+
dict(zip(df.columns,
|
567
|
+
[col.replace(f"{para}_", f"{para.upper()}_")
|
568
|
+
for col in df.columns])),
|
569
|
+
axis=1, inplace=True)
|
570
|
+
|
571
|
+
# check for NAs
|
572
|
+
filled_cols = [col for col in df.columns if "filled_by" in col]
|
573
|
+
if not nas_allowed and df.drop(filled_cols, axis=1).isna().sum().sum() > 0:
|
574
|
+
warnings.warn("There were NAs in the timeserie for Station {stid}.".format(
|
575
|
+
stid=self.id))
|
576
|
+
|
577
|
+
# special operations for et
|
578
|
+
if para == "et" and r_r0 is not None:
|
579
|
+
if isinstance(r_r0, int) or isinstance(r_r0, float):
|
580
|
+
df = df.join(
|
581
|
+
pd.Series([r_r0]*len(df), name="R/R0", index=df.index))
|
582
|
+
elif isinstance(r_r0, pd.Series):
|
583
|
+
df = df.join(r_r0.rename("R_R0"))
|
584
|
+
elif isinstance(r_r0, list):
|
585
|
+
df = df.join(
|
586
|
+
pd.Series(r_r0, name="R/R0", index=df.index))
|
587
|
+
|
588
|
+
# create tables
|
589
|
+
if split_date:
|
590
|
+
n_parts = 5 if keep_date_parts else AGG_TO[agg_to]["split"][para]
|
591
|
+
df = self._split_date(df.index)\
|
592
|
+
.iloc[:, 0:n_parts]\
|
593
|
+
.join(df)
|
594
|
+
else:
|
595
|
+
df.reset_index(inplace=True)
|
596
|
+
|
597
|
+
# rename columns if user asked for
|
598
|
+
df.rename(col_names, axis=1, inplace=True)
|
599
|
+
|
600
|
+
# create header
|
601
|
+
if add_meta:
|
602
|
+
header = f"Name: {name}{"\t" * (len(df.columns)-1)}\n" +\
|
603
|
+
f"Lat: {y[0]} ,Lon: {x[0]}{"\t" * (len(df.columns)-1)}\n"
|
604
|
+
else:
|
605
|
+
header = ""
|
606
|
+
|
607
|
+
# get file name
|
608
|
+
if para.upper() in file_names:
|
609
|
+
file_name = file_names[para.upper()]
|
610
|
+
elif para in file_names:
|
611
|
+
file_name = file_names[para]
|
612
|
+
else:
|
613
|
+
file_name = para.upper() + name_suffix
|
614
|
+
|
615
|
+
# write table out
|
616
|
+
if version.parse(pd.__version__) > version.parse("1.5.0"):
|
617
|
+
to_csv_kwargs = dict(lineterminator="\n")
|
618
|
+
else:
|
619
|
+
to_csv_kwargs = dict(line_terminator="\n")
|
620
|
+
str_df = header + df.to_csv(
|
621
|
+
sep="\t", decimal=".", index=False, **to_csv_kwargs)
|
622
|
+
|
623
|
+
if do_zip:
|
624
|
+
dir.writestr(f"{self.id}/{file_name}", str_df)
|
625
|
+
else:
|
626
|
+
with open(dir.joinpath(file_name), "w") as f:
|
627
|
+
f.write(str_df)
|
628
|
+
|
629
|
+
@staticmethod
|
630
|
+
def _check_dir(dir):
|
631
|
+
"""Checks if a directors is valid and empty.
|
632
|
+
|
633
|
+
If not existing the directory is created.
|
634
|
+
|
635
|
+
Parameters
|
636
|
+
----------
|
637
|
+
dir : pathlib object or zipfile.ZipFile
|
638
|
+
The directory to check.
|
639
|
+
|
640
|
+
Raises
|
641
|
+
------
|
642
|
+
ValueError
|
643
|
+
If the directory is not empty.
|
644
|
+
ValueError
|
645
|
+
If the directory is not valid. E.G. it is a file path.
|
646
|
+
"""
|
647
|
+
# check types
|
648
|
+
if isinstance(dir, str):
|
649
|
+
dir = Path(dir)
|
650
|
+
|
651
|
+
# check directory
|
652
|
+
if isinstance(dir, Path):
|
653
|
+
if dir.is_dir():
|
654
|
+
if len(list(dir.iterdir())) > 0:
|
655
|
+
raise ValueError(
|
656
|
+
"The given directory '{dir}' is not empty.".format(
|
657
|
+
dir=str(dir)))
|
658
|
+
elif dir.suffix == "":
|
659
|
+
dir.mkdir()
|
660
|
+
else:
|
661
|
+
raise ValueError(
|
662
|
+
"The given directory '{dir}' is not a directory.".format(
|
663
|
+
dir=dir))
|
664
|
+
elif not isinstance(dir, zipfile.ZipFile):
|
665
|
+
raise ValueError(
|
666
|
+
"The given directory '{dir}' is not a directory or zipfile.".format(
|
667
|
+
dir=dir))
|
668
|
+
|
669
|
+
return dir
|
670
|
+
|
671
|
+
@staticmethod
|
672
|
+
def _split_date(dates):
|
673
|
+
"""
|
674
|
+
Split datetime into parts.
|
675
|
+
|
676
|
+
Parameters
|
677
|
+
----------
|
678
|
+
dates : pandas.DatetimeIndex or list of (datetime.dateime or pandas.Timestamp) or
|
679
|
+
pandas.DataFrame of (datetime.datetime or pandas.Timestamp)
|
680
|
+
The datetime's to split.
|
681
|
+
|
682
|
+
Returns
|
683
|
+
-------
|
684
|
+
pandas.DataFrame
|
685
|
+
A DataFrame with 5 columns (Jahr, Monat, Tag, Stunde, Minute).
|
686
|
+
"""
|
687
|
+
# if dates is not a list make it a list
|
688
|
+
if isinstance(dates, datetime) or isinstance(dates, pd.Timestamp):
|
689
|
+
dates = pd.DatetimeIndex([dates])
|
690
|
+
index = range(0, len(dates))
|
691
|
+
|
692
|
+
elif isinstance(dates, pd.DatetimeIndex):
|
693
|
+
index = dates
|
694
|
+
else:
|
695
|
+
index = range(0, len(dates))
|
696
|
+
|
697
|
+
# check if date is datetime or Timestamp:
|
698
|
+
if not (isinstance(dates[0], pd.Timestamp) or
|
699
|
+
isinstance(dates[0], datetime)):
|
700
|
+
raise ValueError("Error: The given date is not in a datetime or " +
|
701
|
+
"Timestamp format.")
|
702
|
+
|
703
|
+
return pd.DataFrame(
|
704
|
+
{"Jahr": dates.year,
|
705
|
+
"Monat": dates.month,
|
706
|
+
"Tag": dates.day,
|
707
|
+
"Stunde": dates.hour,
|
708
|
+
"Minute": dates.minute},
|
709
|
+
dtype=int,
|
710
|
+
index=index)
|