weatherdb 1.1.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- docker/Dockerfile +30 -0
- docker/docker-compose.yaml +58 -0
- docker/docker-compose_test.yaml +24 -0
- docker/start-docker-test.sh +6 -0
- docs/requirements.txt +10 -0
- docs/source/Changelog.md +2 -0
- docs/source/License.rst +7 -0
- docs/source/Methode.md +161 -0
- docs/source/_static/custom.css +8 -0
- docs/source/_static/favicon.ico +0 -0
- docs/source/_static/logo.png +0 -0
- docs/source/api/api.rst +15 -0
- docs/source/api/cli.rst +8 -0
- docs/source/api/weatherDB.broker.rst +10 -0
- docs/source/api/weatherDB.config.rst +7 -0
- docs/source/api/weatherDB.db.rst +23 -0
- docs/source/api/weatherDB.rst +22 -0
- docs/source/api/weatherDB.station.rst +56 -0
- docs/source/api/weatherDB.stations.rst +46 -0
- docs/source/api/weatherDB.utils.rst +22 -0
- docs/source/conf.py +137 -0
- docs/source/index.rst +33 -0
- docs/source/setup/Configuration.md +127 -0
- docs/source/setup/Hosting.md +9 -0
- docs/source/setup/Install.md +49 -0
- docs/source/setup/Quickstart.md +183 -0
- docs/source/setup/setup.rst +12 -0
- weatherdb/__init__.py +24 -0
- weatherdb/_version.py +1 -0
- weatherdb/alembic/README.md +8 -0
- weatherdb/alembic/alembic.ini +80 -0
- weatherdb/alembic/config.py +9 -0
- weatherdb/alembic/env.py +100 -0
- weatherdb/alembic/script.py.mako +26 -0
- weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
- weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
- weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
- weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
- weatherdb/broker.py +667 -0
- weatherdb/cli.py +214 -0
- weatherdb/config/ConfigParser.py +663 -0
- weatherdb/config/__init__.py +5 -0
- weatherdb/config/config_default.ini +162 -0
- weatherdb/db/__init__.py +3 -0
- weatherdb/db/connections.py +374 -0
- weatherdb/db/fixtures/RichterParameters.json +34 -0
- weatherdb/db/models.py +402 -0
- weatherdb/db/queries/get_quotient.py +155 -0
- weatherdb/db/views.py +165 -0
- weatherdb/station/GroupStation.py +710 -0
- weatherdb/station/StationBases.py +3108 -0
- weatherdb/station/StationET.py +111 -0
- weatherdb/station/StationP.py +807 -0
- weatherdb/station/StationPD.py +98 -0
- weatherdb/station/StationT.py +164 -0
- weatherdb/station/__init__.py +13 -0
- weatherdb/station/constants.py +21 -0
- weatherdb/stations/GroupStations.py +519 -0
- weatherdb/stations/StationsBase.py +1021 -0
- weatherdb/stations/StationsBaseTET.py +30 -0
- weatherdb/stations/StationsET.py +17 -0
- weatherdb/stations/StationsP.py +128 -0
- weatherdb/stations/StationsPD.py +24 -0
- weatherdb/stations/StationsT.py +21 -0
- weatherdb/stations/__init__.py +11 -0
- weatherdb/utils/TimestampPeriod.py +369 -0
- weatherdb/utils/__init__.py +3 -0
- weatherdb/utils/dwd.py +350 -0
- weatherdb/utils/geometry.py +69 -0
- weatherdb/utils/get_data.py +285 -0
- weatherdb/utils/logging.py +126 -0
- weatherdb-1.1.0.dist-info/LICENSE +674 -0
- weatherdb-1.1.0.dist-info/METADATA +765 -0
- weatherdb-1.1.0.dist-info/RECORD +77 -0
- weatherdb-1.1.0.dist-info/WHEEL +5 -0
- weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
- weatherdb-1.1.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,710 @@
|
|
1
|
+
# libraries
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from pathlib import Path
|
5
|
+
import warnings
|
6
|
+
import zipfile
|
7
|
+
from packaging import version
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from ..utils.TimestampPeriod import TimestampPeriod
|
11
|
+
from .StationBases import StationBase, AGG_TO
|
12
|
+
from . import StationP, StationT, StationET
|
13
|
+
|
14
|
+
# set settings
|
15
|
+
# ############
|
16
|
+
__all__ = ["GroupStation"]
|
17
|
+
log = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
# class definition
|
20
|
+
##################
|
21
|
+
class GroupStation(object):
|
22
|
+
"""A class to group all possible parameters of one station.
|
23
|
+
|
24
|
+
So if you want to create the input files for a simulation, where you need T, ET and N, use this class to download the data for one station.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, id, error_if_missing=True, **kwargs):
|
28
|
+
self.id = id
|
29
|
+
self.station_parts = []
|
30
|
+
self._error_if_missing = error_if_missing
|
31
|
+
for StatClass in [StationP, StationT, StationET]:
|
32
|
+
try:
|
33
|
+
self.station_parts.append(
|
34
|
+
StatClass(id=id, **kwargs)
|
35
|
+
)
|
36
|
+
except Exception as e:
|
37
|
+
if error_if_missing:
|
38
|
+
raise e
|
39
|
+
self.paras_available = [stat._para for stat in self.station_parts]
|
40
|
+
|
41
|
+
def _check_paras(self, paras):
|
42
|
+
if isinstance(paras, str) and paras != "all":
|
43
|
+
paras = [paras,]
|
44
|
+
|
45
|
+
if isinstance(paras, str) and (paras == "all"):
|
46
|
+
return self.paras_available
|
47
|
+
else:
|
48
|
+
paras_new = []
|
49
|
+
for para in paras:
|
50
|
+
if para in self.paras_available:
|
51
|
+
paras_new.append(para)
|
52
|
+
elif self._error_if_missing:
|
53
|
+
raise ValueError(
|
54
|
+
f"The parameter {para} you asked for is not available for station {self.id}")
|
55
|
+
return paras_new
|
56
|
+
|
57
|
+
@staticmethod
|
58
|
+
def _check_kinds(kinds):
|
59
|
+
# type cast kinds
|
60
|
+
if isinstance(kinds, str):
|
61
|
+
kinds = [kinds]
|
62
|
+
else:
|
63
|
+
kinds = kinds.copy()
|
64
|
+
return kinds
|
65
|
+
|
66
|
+
def get_available_paras(self, short=False):
|
67
|
+
"""Get the possible parameters for this station.
|
68
|
+
|
69
|
+
Parameters
|
70
|
+
----------
|
71
|
+
short : bool, optional
|
72
|
+
Should the short name of the parameters be returned.
|
73
|
+
The default is "long".
|
74
|
+
|
75
|
+
Returns
|
76
|
+
-------
|
77
|
+
list of str
|
78
|
+
A list of the long parameter names that are possible for this station to get.
|
79
|
+
"""
|
80
|
+
paras = []
|
81
|
+
attr_name = "_para" if short else "_para_long"
|
82
|
+
for stat in self.station_parts:
|
83
|
+
paras.append(getattr(stat, attr_name))
|
84
|
+
|
85
|
+
return paras
|
86
|
+
|
87
|
+
def get_filled_period(self, kinds="best", from_meta=True, join_how="inner"):
|
88
|
+
"""Get the combined filled period for all 3 stations.
|
89
|
+
|
90
|
+
This is the maximum possible timerange for these stations.
|
91
|
+
|
92
|
+
Parameters
|
93
|
+
----------
|
94
|
+
kind : str
|
95
|
+
The data kind to look for filled period.
|
96
|
+
Must be a column in the timeseries DB.
|
97
|
+
Must be one of "raw", "qc", "filled", "adj".
|
98
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
99
|
+
For Precipitation this is "corr" and for the other this is "filled".
|
100
|
+
For the precipitation also "qn" and "corr" are valid.
|
101
|
+
from_meta : bool, optional
|
102
|
+
Should the period be from the meta table?
|
103
|
+
If False: the period is returned from the timeserie. In this case this function is only a wrapper for .get_period_meta.
|
104
|
+
The default is True.
|
105
|
+
join_how : str, optional
|
106
|
+
How should the different periods get joined.
|
107
|
+
If "inner" then the minimal period that is inside of all the filled_periods is returned.
|
108
|
+
If "outer" then the maximal possible period is returned.
|
109
|
+
The default is "inner".
|
110
|
+
|
111
|
+
Returns
|
112
|
+
-------
|
113
|
+
TimestampPeriod
|
114
|
+
The maximum filled period for the 3 parameters for this station.
|
115
|
+
"""
|
116
|
+
kinds = self._check_kinds(kinds)
|
117
|
+
for kind in ["filled_by", "adj"]:
|
118
|
+
if kind in kinds:
|
119
|
+
kinds.remove(kind)
|
120
|
+
|
121
|
+
# get filled_period
|
122
|
+
for kind in kinds:
|
123
|
+
for stat in self.station_parts:
|
124
|
+
new_filled_period = stat.get_filled_period(
|
125
|
+
kind=kind, from_meta=from_meta)
|
126
|
+
|
127
|
+
if "filled_period" not in locals():
|
128
|
+
filled_period = new_filled_period.copy()
|
129
|
+
else:
|
130
|
+
filled_period = filled_period.union(
|
131
|
+
new_filled_period, how=join_how)
|
132
|
+
|
133
|
+
return filled_period
|
134
|
+
|
135
|
+
def get_df(self, period=(None, None), kinds="best", paras="all",
|
136
|
+
agg_to="day", nas_allowed=True, add_na_share=False,
|
137
|
+
add_t_min=False, add_t_max=False, **kwargs):
|
138
|
+
"""Get a DataFrame with the corresponding data.
|
139
|
+
|
140
|
+
Parameters
|
141
|
+
----------
|
142
|
+
period : TimestampPeriod or (tuple or list of datetime.datetime or None), optional
|
143
|
+
The minimum and maximum Timestamp for which to get the timeseries.
|
144
|
+
If None is given, the maximum or minimal possible Timestamp is taken.
|
145
|
+
The default is (None, None).
|
146
|
+
kinds : str or list of str
|
147
|
+
The data kind to look for filled period.
|
148
|
+
Must be a column in the timeseries DB.
|
149
|
+
Must be one of "raw", "qc", "filled", "adj", "filled_by", "best"("corr" for N and "filled" for T and ET).
|
150
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
151
|
+
For Precipitation this is "corr" and for the other this is "filled".
|
152
|
+
For the precipitation also "qn" and "corr" are valid.
|
153
|
+
agg_to : str, optional
|
154
|
+
To what aggregation level should the timeseries get aggregated to.
|
155
|
+
The minimum aggregation for Temperatur and ET is daily and for the precipitation it is 10 minutes.
|
156
|
+
If a smaller aggregation is selected the minimum possible aggregation for the respective parameter is returned.
|
157
|
+
So if 10 minutes is selected, than precipitation is returned in 10 minuets and T and ET as daily.
|
158
|
+
The default is "10 min".
|
159
|
+
nas_allowed : bool, optional
|
160
|
+
Should NAs be allowed?
|
161
|
+
If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
|
162
|
+
If False, then the minimal filled period is returned.
|
163
|
+
The default is True.
|
164
|
+
paras : list of str or str, optional
|
165
|
+
Give the parameters for which to get the meta information.
|
166
|
+
Can be "n", "t", "et" or "all".
|
167
|
+
If "all", then every available station parameter is returned.
|
168
|
+
The default is "all"
|
169
|
+
add_na_share : bool, optional
|
170
|
+
Should one or several columns be added to the Dataframe with the share of NAs in the data.
|
171
|
+
This is especially important, when the stations data get aggregated, because the aggregation doesn't make sense if there are a lot of NAs in the original data.
|
172
|
+
If True, one column per asked kind is added with the respective share of NAs, if the aggregation step is not the smallest.
|
173
|
+
The "kind"_na_share column is in percentage.
|
174
|
+
The default is False.
|
175
|
+
add_t_min : bool, optional
|
176
|
+
Should the minimal temperature value get added?
|
177
|
+
The default is False.
|
178
|
+
add_t_max : bool, optional
|
179
|
+
Should the maximal temperature value get added?
|
180
|
+
The default is False.
|
181
|
+
|
182
|
+
Returns
|
183
|
+
-------
|
184
|
+
pd.Dataframe
|
185
|
+
A DataFrame with the timeseries for this station and the given period.
|
186
|
+
"""
|
187
|
+
paras = self._check_paras(paras)
|
188
|
+
|
189
|
+
# download dataframes
|
190
|
+
dfs = []
|
191
|
+
for stat in self.station_parts:
|
192
|
+
if stat._para in paras:
|
193
|
+
# check if min and max for temperature should get added
|
194
|
+
use_kinds = kinds.copy()
|
195
|
+
if stat._para == "t":
|
196
|
+
if isinstance(use_kinds, str):
|
197
|
+
use_kinds=[use_kinds]
|
198
|
+
if "best" in use_kinds:
|
199
|
+
use_kinds.insert(use_kinds.index("best"), "filled")
|
200
|
+
use_kinds.remove("best")
|
201
|
+
for k in ["raw", "filled"]:
|
202
|
+
if k in use_kinds:
|
203
|
+
if add_t_max:
|
204
|
+
use_kinds.insert(
|
205
|
+
use_kinds.index(k)+1,
|
206
|
+
f"{k}_max")
|
207
|
+
if add_t_min:
|
208
|
+
use_kinds.insert(
|
209
|
+
use_kinds.index(k)+1,
|
210
|
+
f"{k}_min")
|
211
|
+
|
212
|
+
# get the data from station object
|
213
|
+
df = stat.get_df(
|
214
|
+
period=period,
|
215
|
+
kinds=use_kinds,
|
216
|
+
agg_to=agg_to,
|
217
|
+
nas_allowed=nas_allowed,
|
218
|
+
add_na_share=add_na_share,
|
219
|
+
**kwargs)
|
220
|
+
df = df.rename(dict(zip(
|
221
|
+
df.columns,
|
222
|
+
[stat._para.upper() + "_" + col for col in df.columns])),
|
223
|
+
axis=1)
|
224
|
+
dfs.append(df)
|
225
|
+
|
226
|
+
# concat the dfs
|
227
|
+
if len(dfs) > 1:
|
228
|
+
df_all = pd.concat(dfs, axis=1)
|
229
|
+
elif len(dfs) == 1 :
|
230
|
+
df_all = dfs[0]
|
231
|
+
else:
|
232
|
+
raise ValueError("No timeserie was found for {paras} and Station {stid}".format(
|
233
|
+
paras=", ".join(paras),
|
234
|
+
stid=self.id))
|
235
|
+
|
236
|
+
return df_all
|
237
|
+
|
238
|
+
@classmethod
|
239
|
+
def get_meta_explanation(cls, infos="all"):
|
240
|
+
"""Get the explanations of the available meta fields.
|
241
|
+
|
242
|
+
Parameters
|
243
|
+
----------
|
244
|
+
infos : list or string, optional
|
245
|
+
The infos you wish to get an explanation for.
|
246
|
+
If "all" then all the available information get returned.
|
247
|
+
The default is "all"
|
248
|
+
|
249
|
+
Returns
|
250
|
+
-------
|
251
|
+
pd.Series
|
252
|
+
a pandas Series with the information names as index and the explanation as values.
|
253
|
+
"""
|
254
|
+
return StationBase.get_meta_explanation(infos=infos)
|
255
|
+
|
256
|
+
def get_max_period(self, kinds, nas_allowed=False):
|
257
|
+
"""Get the maximum available period for this stations timeseries.
|
258
|
+
|
259
|
+
If nas_allowed is True, then the maximum range of the timeserie is returned.
|
260
|
+
Else the minimal filled period is returned
|
261
|
+
|
262
|
+
Parameters
|
263
|
+
----------
|
264
|
+
kinds : str or list of str
|
265
|
+
The data kinds to update.
|
266
|
+
Must be a column in the timeseries DB.
|
267
|
+
Must be one of "raw", "qc", "filled", "adj".
|
268
|
+
For the precipitation also "qn" and "corr" are valid.
|
269
|
+
nas_allowed : bool, optional
|
270
|
+
Should NAs be allowed?
|
271
|
+
If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
|
272
|
+
If False, then the minimal filled period is returned.
|
273
|
+
The default is False.
|
274
|
+
|
275
|
+
Returns
|
276
|
+
-------
|
277
|
+
utils.TimestampPeriod
|
278
|
+
The maximum Timestamp Period
|
279
|
+
"""
|
280
|
+
kinds = self._check_kinds(kinds)
|
281
|
+
max_period = None
|
282
|
+
for stat in self.station_parts:
|
283
|
+
max_period_i = stat.get_max_period(
|
284
|
+
kinds=kinds, nas_allowed=nas_allowed)
|
285
|
+
if max_period is None:
|
286
|
+
max_period = max_period_i
|
287
|
+
else:
|
288
|
+
max_period = max_period.union(
|
289
|
+
max_period_i,
|
290
|
+
how="outer" if nas_allowed else "inner")
|
291
|
+
|
292
|
+
return max_period
|
293
|
+
|
294
|
+
def get_meta(self, paras="all", **kwargs):
|
295
|
+
"""Get the meta information for every parameter of this station.
|
296
|
+
|
297
|
+
Parameters
|
298
|
+
----------
|
299
|
+
paras : list of str or str, optional
|
300
|
+
Give the parameters for which to get the meta information.
|
301
|
+
Can be "n", "t", "et" or "all".
|
302
|
+
If "all", then every available station parameter is returned.
|
303
|
+
The default is "all"
|
304
|
+
**kwargs : dict, optional
|
305
|
+
The optional keyword arguments are handed to the single Station get_meta methods. Can be e.g. "info".
|
306
|
+
|
307
|
+
Returns
|
308
|
+
-------
|
309
|
+
dict
|
310
|
+
dict with the information.
|
311
|
+
there is one subdict per parameter.
|
312
|
+
If only one parameter is asked for, then there is no subdict, but only a single value.
|
313
|
+
"""
|
314
|
+
paras = self._check_paras(paras)
|
315
|
+
|
316
|
+
for stat in self.station_parts:
|
317
|
+
if stat._para in paras:
|
318
|
+
meta_para = stat.get_meta(**kwargs)
|
319
|
+
if "meta_all" not in locals():
|
320
|
+
meta_all = {stat._para:meta_para}
|
321
|
+
else:
|
322
|
+
meta_all.update({stat._para:meta_para})
|
323
|
+
return meta_all
|
324
|
+
|
325
|
+
def get_geom(self, crs=None):
|
326
|
+
"""Get the point geometry of the station.
|
327
|
+
|
328
|
+
Parameters
|
329
|
+
----------
|
330
|
+
crs: str, int or None, optional
|
331
|
+
The coordinate reference system of the geometry.
|
332
|
+
If None, then the geometry is returned in WGS84 (EPSG:4326).
|
333
|
+
If string, then it should be in a pyproj readable format.
|
334
|
+
If int, then it should be the EPSG code.
|
335
|
+
The default is None.
|
336
|
+
|
337
|
+
Returns
|
338
|
+
-------
|
339
|
+
shapely.geometries.Point
|
340
|
+
The location of the station as shapely Point in the given coordinate reference system.
|
341
|
+
"""
|
342
|
+
return self.station_parts[0].get_geom(crs=crs)
|
343
|
+
|
344
|
+
def get_name(self):
|
345
|
+
return self.station_parts[0].get_name()
|
346
|
+
|
347
|
+
def create_roger_ts(self, dir, period=(None, None),
|
348
|
+
kind="best", r_r0=1, add_t_min=False, add_t_max=False,
|
349
|
+
do_toolbox_format=False,
|
350
|
+
**kwargs):
|
351
|
+
"""Create the timeserie files for roger as csv.
|
352
|
+
|
353
|
+
This is only a wrapper function for create_ts with some standard settings.
|
354
|
+
|
355
|
+
Parameters
|
356
|
+
----------
|
357
|
+
dir : pathlib like object or zipfile.ZipFile
|
358
|
+
The directory or Zipfile to store the timeseries in.
|
359
|
+
If a zipfile is given a folder with the statiopns ID is added to the filepath.
|
360
|
+
period : TimestampPeriod like object, optional
|
361
|
+
The period for which to get the timeseries.
|
362
|
+
If (None, None) is entered, then the maximal possible period is computed.
|
363
|
+
The default is (None, None)
|
364
|
+
kind : str
|
365
|
+
The data kind to look for filled period.
|
366
|
+
Must be a column in the timeseries DB.
|
367
|
+
Must be one of "raw", "qc", "filled", "adj".
|
368
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
369
|
+
For Precipitation this is "corr" and for the other this is "filled".
|
370
|
+
For the precipitation also "qn" and "corr" are valid.
|
371
|
+
r_r0 : int or float, list of int or float or None, optional
|
372
|
+
Should the ET timeserie contain a column with R/R0.
|
373
|
+
If None, then no column is added.
|
374
|
+
If int or float, then a R/R0 column is appended with this number as standard value.
|
375
|
+
If list of int or floats, then the list should have the same length as the ET-timeserie and is appanded to the Timeserie.
|
376
|
+
If pd.Series, then the index should be a timestamp index. The serie is then joined to the ET timeserie.
|
377
|
+
The default is 1.
|
378
|
+
add_t_min=False : bool, optional
|
379
|
+
Schould the minimal temperature value get added?
|
380
|
+
The default is False.
|
381
|
+
add_t_max=False : bool, optional
|
382
|
+
Schould the maximal temperature value get added?
|
383
|
+
The default is False.
|
384
|
+
do_toolbox_format : bool, optional
|
385
|
+
Should the timeseries be saved in the RoGeR toolbox format? (have a look at the RoGeR examples in https://github.com/Hydrology-IFH/roger)
|
386
|
+
The default is False.
|
387
|
+
**kwargs:
|
388
|
+
additional parameters for Station.get_df
|
389
|
+
|
390
|
+
Raises
|
391
|
+
------
|
392
|
+
Warning
|
393
|
+
If there are NAs in the timeseries or the period got changed.
|
394
|
+
"""
|
395
|
+
if do_toolbox_format:
|
396
|
+
return self.create_ts(
|
397
|
+
dir=dir, period=period, kinds=kind,
|
398
|
+
agg_to="10 min", r_r0=r_r0, split_date=True,
|
399
|
+
nas_allowed=False,
|
400
|
+
add_t_min=add_t_min, add_t_max=add_t_max,
|
401
|
+
file_names={"N":"PREC.txt", "T":"TA.txt", "ET":"PET.txt"},
|
402
|
+
col_names={"N":"PREC", "ET":"PET",
|
403
|
+
"T":"TA", "T_min":"TA_min", "T_max":"TA_max",
|
404
|
+
"Jahr":"YYYY", "Monat":"MM", "Tag":"DD",
|
405
|
+
"Stunde":"hh", "Minute":"mm"},
|
406
|
+
add_meta=False,
|
407
|
+
keep_date_parts=True,
|
408
|
+
**kwargs)
|
409
|
+
else:
|
410
|
+
return self.create_ts(
|
411
|
+
dir=dir, period=period, kinds=kind,
|
412
|
+
agg_to="10 min", r_r0=r_r0, split_date=True,
|
413
|
+
nas_allowed=False,
|
414
|
+
add_t_min=add_t_min, add_t_max=add_t_max,
|
415
|
+
**kwargs)
|
416
|
+
|
417
|
+
def create_ts(self, dir, period=(None, None),
|
418
|
+
kinds="best", paras="all",
|
419
|
+
agg_to="10 min", r_r0=None, split_date=False,
|
420
|
+
nas_allowed=True, add_na_share=False,
|
421
|
+
add_t_min=False, add_t_max=False,
|
422
|
+
add_meta=True, file_names={}, col_names={},
|
423
|
+
keep_date_parts=False,
|
424
|
+
**kwargs):
|
425
|
+
"""Create the timeserie files as csv.
|
426
|
+
|
427
|
+
Parameters
|
428
|
+
----------
|
429
|
+
dir : pathlib like object or zipfile.ZipFile
|
430
|
+
The directory or Zipfile to store the timeseries in.
|
431
|
+
If a zipfile is given a folder with the statiopns ID is added to the filepath.
|
432
|
+
period : TimestampPeriod like object, optional
|
433
|
+
The period for which to get the timeseries.
|
434
|
+
If (None, None) is entered, then the maximal possible period is computed.
|
435
|
+
The default is (None, None)
|
436
|
+
kinds : str or list of str
|
437
|
+
The data kinds to look for filled period.
|
438
|
+
Must be a column in the timeseries DB.
|
439
|
+
Must be one of "raw", "qc", "filled", "adj", "filled_by", "filled_share", "best".
|
440
|
+
If "best" is given, then depending on the parameter of the station the best kind is selected.
|
441
|
+
For precipitation this is "corr" and for the other this is "filled".
|
442
|
+
For the precipitation also "qn" and "corr" are valid.
|
443
|
+
If only one kind is asked for, then the columns get renamed to only have the parameter name as column name.
|
444
|
+
paras : list of str or str, optional
|
445
|
+
Give the parameters for which to get the meta information.
|
446
|
+
Can be "n", "t", "et" or "all".
|
447
|
+
If "all", then every available station parameter is returned.
|
448
|
+
The default is "all"
|
449
|
+
agg_to : str, optional
|
450
|
+
To what aggregation level should the timeseries get aggregated to.
|
451
|
+
The minimum aggregation for Temperatur and ET is daily and for the precipitation it is 10 minutes.
|
452
|
+
If a smaller aggregation is selected the minimum possible aggregation for the respective parameter is returned.
|
453
|
+
So if 10 minutes is selected, than precipitation is returned in 10 minuets and T and ET as daily.
|
454
|
+
The default is "10 min".
|
455
|
+
r_r0 : int or float or None or pd.Series or list, optional
|
456
|
+
Should the ET timeserie contain a column with R/R0.
|
457
|
+
If None, then no column is added.
|
458
|
+
If int, then a R/R0 column is appended with this number as standard value.
|
459
|
+
If list of int or floats, then the list should have the same length as the ET-timeserie and is appanded to the Timeserie.
|
460
|
+
If pd.Series, then the index should be a timestamp index. The serie is then joined to the ET timeserie.
|
461
|
+
The default is None.
|
462
|
+
split_date : bool, optional
|
463
|
+
Should the timestamp get splitted into parts, so one column for year, one for month etc.?
|
464
|
+
If False the timestamp is saved in one column as string.
|
465
|
+
nas_allowed : bool, optional
|
466
|
+
Should NAs be allowed?
|
467
|
+
If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
|
468
|
+
If False, then the minimal filled period is returned.
|
469
|
+
The default is True.
|
470
|
+
add_na_share : bool, optional
|
471
|
+
Should one or several columns be added to the Dataframe with the share of NAs in the data.
|
472
|
+
This is especially important, when the stations data get aggregated, because the aggregation doesn't make sense if there are a lot of NAs in the original data.
|
473
|
+
If True, one column per asked kind is added with the respective share of NAs, if the aggregation step is not the smallest.
|
474
|
+
The "kind"_na_share column is in percentage.
|
475
|
+
The default is False.
|
476
|
+
add_t_min=False : bool, optional
|
477
|
+
Should the minimal temperature value get added?
|
478
|
+
The default is False.
|
479
|
+
add_t_max=False : bool, optional
|
480
|
+
Should the maximal temperature value get added?
|
481
|
+
The default is False.
|
482
|
+
add_meta : bool, optional
|
483
|
+
Should station Meta information like name and Location (lat, long) be added to the file?
|
484
|
+
The default is True.
|
485
|
+
file_names : dict, optional
|
486
|
+
A dictionary with the file names for the different parameters.
|
487
|
+
e.g.{"N":"PREC.txt", "T":"TA.txt", "ET":"ET.txt"}
|
488
|
+
If an empty dictionary is given, then the standard names are used.
|
489
|
+
The default is {}.
|
490
|
+
col_names : dict, optional
|
491
|
+
A dictionary with the column names for the different parameters.
|
492
|
+
e.g.{"N":"PREC", "T":"TA", "ET":"ET", "Jahr":"YYYY", "Monat":"MM", "Tag":"DD", "Stunde":"HH", "Minute":"MN"}
|
493
|
+
If an empty dictionary is given, then the standard names are used.
|
494
|
+
The default is {}.
|
495
|
+
keep_date_parts : bool, optional
|
496
|
+
only used if split_date is True.
|
497
|
+
Should the date parts that are not needed, e.g. hour value for daily timeseries, be kept?
|
498
|
+
If False, then the columns that are not needed are dropped.
|
499
|
+
The default is False.
|
500
|
+
**kwargs:
|
501
|
+
additional parameters for Station.get_df
|
502
|
+
|
503
|
+
Raises
|
504
|
+
------
|
505
|
+
Warning
|
506
|
+
If there are NAs in the timeseries and nas_allowed is False
|
507
|
+
or the period got changed.
|
508
|
+
"""
|
509
|
+
# check directory
|
510
|
+
dir = self._check_dir(dir)
|
511
|
+
|
512
|
+
# type cast kinds
|
513
|
+
kinds = self._check_kinds(kinds)
|
514
|
+
paras = self._check_paras(paras)
|
515
|
+
|
516
|
+
# get the period
|
517
|
+
if not ("_skip_period_check" in kwargs and kwargs["_skip_period_check"]):
|
518
|
+
period = TimestampPeriod._check_period(period).expand_to_timestamp()
|
519
|
+
period_filled = self.get_filled_period(
|
520
|
+
kinds=kinds,
|
521
|
+
join_how="outer" if nas_allowed else "inner")
|
522
|
+
|
523
|
+
if period.is_empty():
|
524
|
+
period = period_filled
|
525
|
+
else:
|
526
|
+
period_new = period_filled.union(
|
527
|
+
period,
|
528
|
+
how="inner")
|
529
|
+
if period_new != period:
|
530
|
+
warnings.warn(
|
531
|
+
f"The Period for Station {self.id} got changed from {str(period)} to {str(period_new)}.")
|
532
|
+
period = period_new
|
533
|
+
if "_skip_period_check" in kwargs:
|
534
|
+
del kwargs["_skip_period_check"]
|
535
|
+
|
536
|
+
# prepare loop
|
537
|
+
name_suffix = "_{stid:0>5}.txt".format(stid=self.id)
|
538
|
+
x, y = self.get_geom().coords.xy
|
539
|
+
name = self.get_name() + " (ID: {stid})".format(stid=self.id)
|
540
|
+
do_zip = isinstance(dir, zipfile.ZipFile)
|
541
|
+
|
542
|
+
for para in paras:
|
543
|
+
# get the timeserie
|
544
|
+
df = self.get_df(
|
545
|
+
period=period, kinds=kinds,
|
546
|
+
paras=[para], agg_to=agg_to,
|
547
|
+
nas_allowed=nas_allowed,
|
548
|
+
add_na_share=add_na_share,
|
549
|
+
add_t_min=add_t_min, add_t_max=add_t_max,
|
550
|
+
_skip_period_check=True,
|
551
|
+
**kwargs)
|
552
|
+
|
553
|
+
# rename columns
|
554
|
+
if len(kinds)==1 or ("filled_by" in kinds and len(kinds)==2):
|
555
|
+
if len(kinds)==1:
|
556
|
+
colname_base = [col for col in df.columns if len(col.split("_"))==2][0]
|
557
|
+
else:
|
558
|
+
colname_base = f"{para.upper()}_" + kinds[1-(kinds.index("filled_by"))]
|
559
|
+
df.rename(
|
560
|
+
{colname_base: para.upper(),
|
561
|
+
f"{colname_base}_min": f"{para.upper()}_min",
|
562
|
+
f"{colname_base}_max": f"{para.upper()}_max",},
|
563
|
+
axis=1, inplace=True)
|
564
|
+
else:
|
565
|
+
df.rename(
|
566
|
+
dict(zip(df.columns,
|
567
|
+
[col.replace(f"{para}_", f"{para.upper()}_")
|
568
|
+
for col in df.columns])),
|
569
|
+
axis=1, inplace=True)
|
570
|
+
|
571
|
+
# check for NAs
|
572
|
+
filled_cols = [col for col in df.columns if "filled_by" in col]
|
573
|
+
if not nas_allowed and df.drop(filled_cols, axis=1).isna().sum().sum() > 0:
|
574
|
+
warnings.warn("There were NAs in the timeserie for Station {stid}.".format(
|
575
|
+
stid=self.id))
|
576
|
+
|
577
|
+
# special operations for et
|
578
|
+
if para == "et" and r_r0 is not None:
|
579
|
+
if isinstance(r_r0, int) or isinstance(r_r0, float):
|
580
|
+
df = df.join(
|
581
|
+
pd.Series([r_r0]*len(df), name="R/R0", index=df.index))
|
582
|
+
elif isinstance(r_r0, pd.Series):
|
583
|
+
df = df.join(r_r0.rename("R_R0"))
|
584
|
+
elif isinstance(r_r0, list):
|
585
|
+
df = df.join(
|
586
|
+
pd.Series(r_r0, name="R/R0", index=df.index))
|
587
|
+
|
588
|
+
# create tables
|
589
|
+
if split_date:
|
590
|
+
n_parts = 5 if keep_date_parts else AGG_TO[agg_to]["split"][para]
|
591
|
+
df = self._split_date(df.index)\
|
592
|
+
.iloc[:, 0:n_parts]\
|
593
|
+
.join(df)
|
594
|
+
else:
|
595
|
+
df.reset_index(inplace=True)
|
596
|
+
|
597
|
+
# rename columns if user asked for
|
598
|
+
df.rename(col_names, axis=1, inplace=True)
|
599
|
+
|
600
|
+
# create header
|
601
|
+
if add_meta:
|
602
|
+
header = f"Name: {name}{"\t" * (len(df.columns)-1)}\n" +\
|
603
|
+
f"Lat: {y[0]} ,Lon: {x[0]}{"\t" * (len(df.columns)-1)}\n"
|
604
|
+
else:
|
605
|
+
header = ""
|
606
|
+
|
607
|
+
# get file name
|
608
|
+
if para.upper() in file_names:
|
609
|
+
file_name = file_names[para.upper()]
|
610
|
+
elif para in file_names:
|
611
|
+
file_name = file_names[para]
|
612
|
+
else:
|
613
|
+
file_name = para.upper() + name_suffix
|
614
|
+
|
615
|
+
# write table out
|
616
|
+
if version.parse(pd.__version__) > version.parse("1.5.0"):
|
617
|
+
to_csv_kwargs = dict(lineterminator="\n")
|
618
|
+
else:
|
619
|
+
to_csv_kwargs = dict(line_terminator="\n")
|
620
|
+
str_df = header + df.to_csv(
|
621
|
+
sep="\t", decimal=".", index=False, **to_csv_kwargs)
|
622
|
+
|
623
|
+
if do_zip:
|
624
|
+
dir.writestr(f"{self.id}/{file_name}", str_df)
|
625
|
+
else:
|
626
|
+
with open(dir.joinpath(file_name), "w") as f:
|
627
|
+
f.write(str_df)
|
628
|
+
|
629
|
+
@staticmethod
|
630
|
+
def _check_dir(dir):
|
631
|
+
"""Checks if a directors is valid and empty.
|
632
|
+
|
633
|
+
If not existing the directory is created.
|
634
|
+
|
635
|
+
Parameters
|
636
|
+
----------
|
637
|
+
dir : pathlib object or zipfile.ZipFile
|
638
|
+
The directory to check.
|
639
|
+
|
640
|
+
Raises
|
641
|
+
------
|
642
|
+
ValueError
|
643
|
+
If the directory is not empty.
|
644
|
+
ValueError
|
645
|
+
If the directory is not valid. E.G. it is a file path.
|
646
|
+
"""
|
647
|
+
# check types
|
648
|
+
if isinstance(dir, str):
|
649
|
+
dir = Path(dir)
|
650
|
+
|
651
|
+
# check directory
|
652
|
+
if isinstance(dir, Path):
|
653
|
+
if dir.is_dir():
|
654
|
+
if len(list(dir.iterdir())) > 0:
|
655
|
+
raise ValueError(
|
656
|
+
"The given directory '{dir}' is not empty.".format(
|
657
|
+
dir=str(dir)))
|
658
|
+
elif dir.suffix == "":
|
659
|
+
dir.mkdir()
|
660
|
+
else:
|
661
|
+
raise ValueError(
|
662
|
+
"The given directory '{dir}' is not a directory.".format(
|
663
|
+
dir=dir))
|
664
|
+
elif not isinstance(dir, zipfile.ZipFile):
|
665
|
+
raise ValueError(
|
666
|
+
"The given directory '{dir}' is not a directory or zipfile.".format(
|
667
|
+
dir=dir))
|
668
|
+
|
669
|
+
return dir
|
670
|
+
|
671
|
+
@staticmethod
|
672
|
+
def _split_date(dates):
|
673
|
+
"""
|
674
|
+
Split datetime into parts.
|
675
|
+
|
676
|
+
Parameters
|
677
|
+
----------
|
678
|
+
dates : pandas.DatetimeIndex or list of (datetime.dateime or pandas.Timestamp) or
|
679
|
+
pandas.DataFrame of (datetime.datetime or pandas.Timestamp)
|
680
|
+
The datetime's to split.
|
681
|
+
|
682
|
+
Returns
|
683
|
+
-------
|
684
|
+
pandas.DataFrame
|
685
|
+
A DataFrame with 5 columns (Jahr, Monat, Tag, Stunde, Minute).
|
686
|
+
"""
|
687
|
+
# if dates is not a list make it a list
|
688
|
+
if isinstance(dates, datetime) or isinstance(dates, pd.Timestamp):
|
689
|
+
dates = pd.DatetimeIndex([dates])
|
690
|
+
index = range(0, len(dates))
|
691
|
+
|
692
|
+
elif isinstance(dates, pd.DatetimeIndex):
|
693
|
+
index = dates
|
694
|
+
else:
|
695
|
+
index = range(0, len(dates))
|
696
|
+
|
697
|
+
# check if date is datetime or Timestamp:
|
698
|
+
if not (isinstance(dates[0], pd.Timestamp) or
|
699
|
+
isinstance(dates[0], datetime)):
|
700
|
+
raise ValueError("Error: The given date is not in a datetime or " +
|
701
|
+
"Timestamp format.")
|
702
|
+
|
703
|
+
return pd.DataFrame(
|
704
|
+
{"Jahr": dates.year,
|
705
|
+
"Monat": dates.month,
|
706
|
+
"Tag": dates.day,
|
707
|
+
"Stunde": dates.hour,
|
708
|
+
"Minute": dates.minute},
|
709
|
+
dtype=int,
|
710
|
+
index=index)
|