weatherdb 1.1.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- docker/Dockerfile +30 -0
- docker/docker-compose.yaml +58 -0
- docker/docker-compose_test.yaml +24 -0
- docker/start-docker-test.sh +6 -0
- docs/requirements.txt +10 -0
- docs/source/Changelog.md +2 -0
- docs/source/License.rst +7 -0
- docs/source/Methode.md +161 -0
- docs/source/_static/custom.css +8 -0
- docs/source/_static/favicon.ico +0 -0
- docs/source/_static/logo.png +0 -0
- docs/source/api/api.rst +15 -0
- docs/source/api/cli.rst +8 -0
- docs/source/api/weatherDB.broker.rst +10 -0
- docs/source/api/weatherDB.config.rst +7 -0
- docs/source/api/weatherDB.db.rst +23 -0
- docs/source/api/weatherDB.rst +22 -0
- docs/source/api/weatherDB.station.rst +56 -0
- docs/source/api/weatherDB.stations.rst +46 -0
- docs/source/api/weatherDB.utils.rst +22 -0
- docs/source/conf.py +137 -0
- docs/source/index.rst +33 -0
- docs/source/setup/Configuration.md +127 -0
- docs/source/setup/Hosting.md +9 -0
- docs/source/setup/Install.md +49 -0
- docs/source/setup/Quickstart.md +183 -0
- docs/source/setup/setup.rst +12 -0
- weatherdb/__init__.py +24 -0
- weatherdb/_version.py +1 -0
- weatherdb/alembic/README.md +8 -0
- weatherdb/alembic/alembic.ini +80 -0
- weatherdb/alembic/config.py +9 -0
- weatherdb/alembic/env.py +100 -0
- weatherdb/alembic/script.py.mako +26 -0
- weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
- weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
- weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
- weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
- weatherdb/broker.py +667 -0
- weatherdb/cli.py +214 -0
- weatherdb/config/ConfigParser.py +663 -0
- weatherdb/config/__init__.py +5 -0
- weatherdb/config/config_default.ini +162 -0
- weatherdb/db/__init__.py +3 -0
- weatherdb/db/connections.py +374 -0
- weatherdb/db/fixtures/RichterParameters.json +34 -0
- weatherdb/db/models.py +402 -0
- weatherdb/db/queries/get_quotient.py +155 -0
- weatherdb/db/views.py +165 -0
- weatherdb/station/GroupStation.py +710 -0
- weatherdb/station/StationBases.py +3108 -0
- weatherdb/station/StationET.py +111 -0
- weatherdb/station/StationP.py +807 -0
- weatherdb/station/StationPD.py +98 -0
- weatherdb/station/StationT.py +164 -0
- weatherdb/station/__init__.py +13 -0
- weatherdb/station/constants.py +21 -0
- weatherdb/stations/GroupStations.py +519 -0
- weatherdb/stations/StationsBase.py +1021 -0
- weatherdb/stations/StationsBaseTET.py +30 -0
- weatherdb/stations/StationsET.py +17 -0
- weatherdb/stations/StationsP.py +128 -0
- weatherdb/stations/StationsPD.py +24 -0
- weatherdb/stations/StationsT.py +21 -0
- weatherdb/stations/__init__.py +11 -0
- weatherdb/utils/TimestampPeriod.py +369 -0
- weatherdb/utils/__init__.py +3 -0
- weatherdb/utils/dwd.py +350 -0
- weatherdb/utils/geometry.py +69 -0
- weatherdb/utils/get_data.py +285 -0
- weatherdb/utils/logging.py +126 -0
- weatherdb-1.1.0.dist-info/LICENSE +674 -0
- weatherdb-1.1.0.dist-info/METADATA +765 -0
- weatherdb-1.1.0.dist-info/RECORD +77 -0
- weatherdb-1.1.0.dist-info/WHEEL +5 -0
- weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
- weatherdb-1.1.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,1021 @@
|
|
1
|
+
# libraries
|
2
|
+
import warnings
|
3
|
+
import traceback
|
4
|
+
import pandas as pd
|
5
|
+
import geopandas as gpd
|
6
|
+
from shapely import wkt
|
7
|
+
import multiprocessing as mp
|
8
|
+
from multiprocessing.pool import ThreadPool
|
9
|
+
import time
|
10
|
+
import progressbar as pb
|
11
|
+
import logging
|
12
|
+
import itertools
|
13
|
+
import datetime
|
14
|
+
from sqlalchemy import text as sqltxt
|
15
|
+
import sqlalchemy as sa
|
16
|
+
import textwrap
|
17
|
+
|
18
|
+
from ..db.connections import db_engine
|
19
|
+
from ..utils.dwd import get_dwd_meta, get_cdc_file_list
|
20
|
+
from ..station.StationBases import StationBase
|
21
|
+
from ..db import models
|
22
|
+
from ..db.queries.get_quotient import _get_quotient
|
23
|
+
|
24
|
+
# set settings
|
25
|
+
# ############
|
26
|
+
try:# else I get strange errors with linux
|
27
|
+
mp.set_start_method('spawn')
|
28
|
+
except RuntimeError:
|
29
|
+
pass
|
30
|
+
|
31
|
+
__all__ = ["StationsBase"]
|
32
|
+
log = logging.getLogger(__name__)
|
33
|
+
|
34
|
+
# Base class definitions
|
35
|
+
########################
|
36
|
+
|
37
|
+
class StationsBase:
|
38
|
+
_StationClass = StationBase
|
39
|
+
_timeout_raw_imp = 240
|
40
|
+
|
41
|
+
def __init__(self):
|
42
|
+
if type(self) is StationsBase:
|
43
|
+
raise NotImplementedError("""
|
44
|
+
The StationsBase is only a wrapper class an is not working on its own.
|
45
|
+
Please use StationP, StationPD, StationT or StationET instead""")
|
46
|
+
self._ftp_folder_base = self._StationClass._ftp_folder_base
|
47
|
+
if isinstance(self._ftp_folder_base, str):
|
48
|
+
self._ftp_folder_base = [self._ftp_folder_base]
|
49
|
+
|
50
|
+
# create ftp_folders in order of importance
|
51
|
+
self._ftp_folders = list(itertools.chain(*[
|
52
|
+
[base + "historical/", base + "recent/"]
|
53
|
+
for base in self._ftp_folder_base]))
|
54
|
+
|
55
|
+
self._para = self._StationClass._para
|
56
|
+
self._para_long = self._StationClass._para_long
|
57
|
+
|
58
|
+
def download_meta(self):
|
59
|
+
"""Download the meta file(s) from the CDC server.
|
60
|
+
|
61
|
+
Returns
|
62
|
+
-------
|
63
|
+
geopandas.GeoDataFrame
|
64
|
+
The meta file from the CDC server.
|
65
|
+
If there are several meta files on the server, they are joined together.
|
66
|
+
"""
|
67
|
+
# download historic meta file
|
68
|
+
meta = get_dwd_meta(self._ftp_folders[0])
|
69
|
+
|
70
|
+
for ftp_folder in self._ftp_folders[1:]:
|
71
|
+
meta_new = get_dwd_meta(ftp_folder=ftp_folder)
|
72
|
+
|
73
|
+
# add new stations
|
74
|
+
meta = pd.concat(
|
75
|
+
[meta, meta_new[~meta_new.index.isin(meta.index)]])
|
76
|
+
if isinstance(meta_new, gpd.GeoDataFrame):
|
77
|
+
meta = gpd.GeoDataFrame(meta, crs=meta_new.crs)
|
78
|
+
|
79
|
+
# check for wider timespan
|
80
|
+
if "bis_datum" in meta.columns:
|
81
|
+
meta = meta.join(
|
82
|
+
meta_new[["bis_datum", "von_datum"]],
|
83
|
+
how="left", rsuffix="_new")
|
84
|
+
|
85
|
+
mask = meta["von_datum"] > meta["von_datum_new"]
|
86
|
+
meta.loc[mask, "von_datum"] = meta_new.loc[mask, "von_datum"]
|
87
|
+
|
88
|
+
mask = meta["bis_datum"] < meta["bis_datum_new"]
|
89
|
+
meta.loc[mask, "bis_datum"] = meta_new.loc[mask, "bis_datum"]
|
90
|
+
|
91
|
+
meta.drop(["von_datum_new", "bis_datum_new"], axis=1, inplace=True)
|
92
|
+
|
93
|
+
return meta
|
94
|
+
|
95
|
+
@db_engine.deco_update_privilege
|
96
|
+
def update_meta(self, stids="all", **kwargs):
|
97
|
+
"""Update the meta table by comparing to the CDC server.
|
98
|
+
|
99
|
+
The "von_datum" and "bis_datum" is ignored because it is better to set this by the filled period of the stations in the database.
|
100
|
+
Often the CDC period is not correct.
|
101
|
+
|
102
|
+
Parameters
|
103
|
+
----------
|
104
|
+
stids: string or list of int, optional
|
105
|
+
The Stations for which to compute.
|
106
|
+
Can either be "all", for all possible stations
|
107
|
+
or a list with the Station IDs.
|
108
|
+
The default is "all".
|
109
|
+
"""
|
110
|
+
log.info(
|
111
|
+
"The {para_long} meta table gets updated."\
|
112
|
+
.format(para_long=self._para_long))
|
113
|
+
meta = self.download_meta()
|
114
|
+
|
115
|
+
# check if Abgabe is in meta
|
116
|
+
if "Abgabe" in meta.columns:
|
117
|
+
meta.drop("Abgabe", axis=1, inplace=True)
|
118
|
+
|
119
|
+
# get dropped stations and delete from meta file
|
120
|
+
sql_get_dropped = sa\
|
121
|
+
.select(models.DroppedStations.station_id)\
|
122
|
+
.where(models.DroppedStations.parameter == self._para)
|
123
|
+
with db_engine.connect() as con:
|
124
|
+
dropped_stids = con.execute(sql_get_dropped).all()
|
125
|
+
dropped_stids = [row[0] for row in dropped_stids
|
126
|
+
if row[0] in meta.index]
|
127
|
+
meta.drop(dropped_stids, inplace=True)
|
128
|
+
|
129
|
+
# check if only some stids should be updated
|
130
|
+
if stids != "all":
|
131
|
+
if not isinstance(stids, list):
|
132
|
+
stids = [stids,]
|
133
|
+
meta.drop([stid for stid in meta.index if stid not in stids], inplace=True)
|
134
|
+
|
135
|
+
# to have a meta entry for every station before looping over them
|
136
|
+
if "von_datum" in meta.columns and "bis_datum" in meta.columns:
|
137
|
+
self._update_db_meta(
|
138
|
+
meta=meta.drop(["von_datum", "bis_datum"], axis=1))
|
139
|
+
else:
|
140
|
+
self._update_db_meta(meta=meta)
|
141
|
+
|
142
|
+
log.info(
|
143
|
+
"The {para_long} meta table got successfully updated."\
|
144
|
+
.format(para_long=self._para_long))
|
145
|
+
|
146
|
+
@db_engine.deco_update_privilege
|
147
|
+
def _update_db_meta(self, meta):
|
148
|
+
"""Update a meta table on the database with new DataFrame.
|
149
|
+
|
150
|
+
Parameters
|
151
|
+
----------
|
152
|
+
meta : pandas.DataFrame
|
153
|
+
A DataFrame with station_id as index.
|
154
|
+
"""
|
155
|
+
# get the columns of meta
|
156
|
+
meta = meta.rename_axis("station_id").reset_index()
|
157
|
+
columns = [col.lower() for col in meta.columns]
|
158
|
+
columns = columns + ['geometry_utm'] if 'geometry' in columns else columns
|
159
|
+
meta.rename(dict(zip(meta.columns, columns)), axis=1, inplace=True)
|
160
|
+
|
161
|
+
# check if columns are initiated in DB
|
162
|
+
with db_engine.connect() as con:
|
163
|
+
columns_db = con.execute(sqltxt(
|
164
|
+
"""
|
165
|
+
SELECT column_name
|
166
|
+
FROM information_schema.columns
|
167
|
+
WHERE table_name='meta_{para}';
|
168
|
+
""".format(para=self._para)
|
169
|
+
)).all()
|
170
|
+
columns_db = [col[0] for col in columns_db]
|
171
|
+
|
172
|
+
problem_cols = [col for col in columns if col not in columns_db]
|
173
|
+
if len(problem_cols) > 0:
|
174
|
+
warnings.warn("""
|
175
|
+
The meta_{para} column '{cols}' is not initiated in the database.
|
176
|
+
This column is therefor skiped.
|
177
|
+
Please review the DB or the code.
|
178
|
+
""".format(
|
179
|
+
para=self._para,
|
180
|
+
cols=", ".join(problem_cols))
|
181
|
+
)
|
182
|
+
columns = [col for col in columns if col in columns_db]
|
183
|
+
|
184
|
+
# change date columns
|
185
|
+
for colname, col in \
|
186
|
+
meta.select_dtypes(include="datetime64").items():
|
187
|
+
meta.loc[:,colname] = col.dt.strftime("%Y%m%d %H:%M")
|
188
|
+
|
189
|
+
# change geometry
|
190
|
+
if "geometry" in meta.columns:
|
191
|
+
with warnings.catch_warnings():
|
192
|
+
warnings.simplefilter("ignore")
|
193
|
+
meta["geometry_utm"] = meta.geometry.to_crs(25832).to_wkt()
|
194
|
+
meta["geometry"] = meta.geometry.to_crs(4326).to_wkt()
|
195
|
+
|
196
|
+
# change all to strings
|
197
|
+
meta = meta.astype(str)
|
198
|
+
|
199
|
+
# get values
|
200
|
+
values_all = ["', '".join(pair) for pair in meta.loc[:,columns].values]
|
201
|
+
values = "('" + "'), ('".join(values_all) + "')"
|
202
|
+
values = values.replace("'nan'", "NULL").replace("'<NA>'", "NULL")
|
203
|
+
|
204
|
+
# create sql
|
205
|
+
sql = '''
|
206
|
+
INSERT INTO meta_{para}({columns})
|
207
|
+
Values {values}
|
208
|
+
ON CONFLICT (station_id) DO UPDATE SET
|
209
|
+
'''.format(
|
210
|
+
columns=", ".join(columns),
|
211
|
+
values=values,
|
212
|
+
para=self._para)
|
213
|
+
for col in columns:
|
214
|
+
sql += ' "{col}" = EXCLUDED."{col}", '.format(col=col)
|
215
|
+
|
216
|
+
sql = sql[:-2] + ";"
|
217
|
+
|
218
|
+
# run sql command
|
219
|
+
with db_engine.connect() as con:
|
220
|
+
con.execute(sqltxt(sql))
|
221
|
+
con.commit()
|
222
|
+
|
223
|
+
@db_engine.deco_update_privilege
|
224
|
+
def update_period_meta(self, stids="all", **kwargs):
|
225
|
+
"""Update the period in the meta table of the raw data.
|
226
|
+
|
227
|
+
Parameters
|
228
|
+
----------
|
229
|
+
stids: string or list of int, optional
|
230
|
+
The Stations for which to compute.
|
231
|
+
Can either be "all", for all possible stations
|
232
|
+
or a list with the Station IDs.
|
233
|
+
The default is "all".
|
234
|
+
**kwargs : dict, optional
|
235
|
+
**kwargs : dict, optional
|
236
|
+
The additional keyword arguments are passed to the get_stations method.
|
237
|
+
|
238
|
+
Raises
|
239
|
+
------
|
240
|
+
ValueError
|
241
|
+
If the given stids (Station_IDs) are not all valid.
|
242
|
+
"""
|
243
|
+
self._run_simple_loop(
|
244
|
+
stations=self.get_stations(only_real=True, stids=stids, **kwargs),
|
245
|
+
method="update_period_meta",
|
246
|
+
name="update period in meta",
|
247
|
+
kwargs=kwargs
|
248
|
+
)
|
249
|
+
|
250
|
+
@classmethod
|
251
|
+
def get_meta_explanation(cls, infos="all"):
|
252
|
+
"""Get the explanations of the available meta fields.
|
253
|
+
|
254
|
+
Parameters
|
255
|
+
----------
|
256
|
+
infos : list or string, optional
|
257
|
+
The infos you wish to get an explanation for.
|
258
|
+
If "all" then all the available information get returned.
|
259
|
+
The default is "all"
|
260
|
+
|
261
|
+
Returns
|
262
|
+
-------
|
263
|
+
pd.Series
|
264
|
+
a pandas Series with the information names as index and the explanation as values.
|
265
|
+
"""
|
266
|
+
return cls._StationClass.get_meta_explanation(infos=infos)
|
267
|
+
|
268
|
+
def get_meta(self,
|
269
|
+
infos=["station_id", "filled_from", "filled_until", "geometry"],
|
270
|
+
stids="all",
|
271
|
+
only_real=True):
|
272
|
+
"""Get the meta Dataframe from the Database.
|
273
|
+
|
274
|
+
Parameters
|
275
|
+
----------
|
276
|
+
infos : list or str, optional
|
277
|
+
A list of information from the meta file to return
|
278
|
+
If "all" than all possible columns are returned, but only one geometry column.
|
279
|
+
The default is: ["Station_id", "filled_from", "filled_until", "geometry"]
|
280
|
+
only_real: bool, optional
|
281
|
+
Whether only real stations are returned or also virtual ones.
|
282
|
+
True: only stations with own data are returned.
|
283
|
+
The default is True.
|
284
|
+
|
285
|
+
Returns
|
286
|
+
-------
|
287
|
+
pandas.DataFrame or geopandas.GeoDataFrae
|
288
|
+
The meta DataFrame.
|
289
|
+
"""
|
290
|
+
# make sure columns is of type list
|
291
|
+
if isinstance(infos, str):
|
292
|
+
if infos=="all":
|
293
|
+
infos = self.get_meta_explanation(infos="all").index.to_list()
|
294
|
+
if "geometry_utm" in infos:
|
295
|
+
infos.remove("geometry_utm")
|
296
|
+
else:
|
297
|
+
infos = [infos]
|
298
|
+
|
299
|
+
# check infos
|
300
|
+
infos = [col.lower() for col in infos]
|
301
|
+
if "station_id" not in infos:
|
302
|
+
infos.insert(0, "station_id")
|
303
|
+
if "geometry" in infos and "geometry_utm" in infos:
|
304
|
+
warnings.warn(textwrap.dedent("""\
|
305
|
+
You selected 2 geometry columns.
|
306
|
+
Only the geometry column with EPSG 4326 is returned"""))
|
307
|
+
infos.remove("geometry_utm")
|
308
|
+
|
309
|
+
# create geometry select statement
|
310
|
+
infos_select = []
|
311
|
+
for info in infos:
|
312
|
+
if info in ["geometry", "geometry_utm"]:
|
313
|
+
infos_select.append(
|
314
|
+
f"ST_AsText({info}) as {info}")
|
315
|
+
else:
|
316
|
+
infos_select.append(info)
|
317
|
+
|
318
|
+
# create sql statement
|
319
|
+
sql = "SELECT {cols} FROM meta_{para}"\
|
320
|
+
.format(cols=", ".join(infos_select), para=self._para)
|
321
|
+
if only_real:
|
322
|
+
where_clause = " WHERE is_real=true"
|
323
|
+
if stids != "all":
|
324
|
+
if not isinstance(stids, list):
|
325
|
+
stids = [stids,]
|
326
|
+
if "where_clause" not in locals():
|
327
|
+
where_clause = " WHERE "
|
328
|
+
else:
|
329
|
+
where_clause += " AND "
|
330
|
+
where_clause += "station_id in ({stids})".format(
|
331
|
+
stids=", ".join([str(stid) for stid in stids]))
|
332
|
+
if "where_clause" in locals():
|
333
|
+
sql += where_clause
|
334
|
+
|
335
|
+
# execute queries to db
|
336
|
+
with db_engine.connect() as con:
|
337
|
+
meta = pd.read_sql(
|
338
|
+
sqltxt(sql),
|
339
|
+
con,
|
340
|
+
index_col="station_id")
|
341
|
+
|
342
|
+
# make datetime columns timezone aware
|
343
|
+
meta = meta.apply(
|
344
|
+
lambda col: col.dt.tz_localize(datetime.timezone.utc) \
|
345
|
+
if hasattr(col, "dt") and not col.dt.tz else col)
|
346
|
+
|
347
|
+
# change to GeoDataFrame if geometry column was selected
|
348
|
+
for geom_col, srid in zip(["geometry", "geometry_utm"],
|
349
|
+
["4326", "25832"]):
|
350
|
+
if geom_col in infos:
|
351
|
+
meta[geom_col] = meta[geom_col].apply(wkt.loads)
|
352
|
+
meta = gpd.GeoDataFrame(
|
353
|
+
meta, crs="EPSG:" + srid, geometry=geom_col)
|
354
|
+
|
355
|
+
# strip whitespaces in string columns
|
356
|
+
for col in meta.columns[meta.dtypes == "object"]:
|
357
|
+
try:
|
358
|
+
meta[col] = meta[col].str.strip()
|
359
|
+
except:
|
360
|
+
pass
|
361
|
+
|
362
|
+
return meta
|
363
|
+
|
364
|
+
def get_stations(self, only_real=True, stids="all", skip_missing_stids=False, **kwargs):
|
365
|
+
"""Get a list with all the stations as Station-objects.
|
366
|
+
|
367
|
+
Parameters
|
368
|
+
----------
|
369
|
+
only_real: bool, optional
|
370
|
+
Whether only real stations are returned or also virtual ones.
|
371
|
+
True: only stations with own data are returned.
|
372
|
+
The default is True.
|
373
|
+
stids: string or list of int, optional
|
374
|
+
The Stations to return.
|
375
|
+
Can either be "all", for all possible stations
|
376
|
+
or a list with the Station IDs.
|
377
|
+
The default is "all".
|
378
|
+
skip_missing_stids: bool, optional
|
379
|
+
Should the method skip the missing stations from input stids?
|
380
|
+
If False, then a ValueError is raised if a station is not found.
|
381
|
+
The default is False.
|
382
|
+
**kwargs : dict, optional
|
383
|
+
The additional keyword arguments aren't used in this method.
|
384
|
+
|
385
|
+
Returns
|
386
|
+
-------
|
387
|
+
Station-object
|
388
|
+
returns a list with the corresponding station objects.
|
389
|
+
|
390
|
+
Raises
|
391
|
+
------
|
392
|
+
ValueError
|
393
|
+
If the given stids (Station_IDs) are not all valid.
|
394
|
+
"""
|
395
|
+
meta = self.get_meta(
|
396
|
+
infos=["station_id"], only_real=only_real, stids=stids)
|
397
|
+
|
398
|
+
if isinstance(stids, str) and (stids == "all"):
|
399
|
+
stations = [
|
400
|
+
self._StationClass(stid, _skip_meta_check=True)
|
401
|
+
for stid in meta.index]
|
402
|
+
else:
|
403
|
+
stids = list(stids)
|
404
|
+
stations = [
|
405
|
+
self._StationClass(stid, _skip_meta_check=True)
|
406
|
+
for stid in meta.index
|
407
|
+
if stid in stids]
|
408
|
+
if (not skip_missing_stids) and (len(stations) != len(stids)):
|
409
|
+
stations_ids = [stat.id for stat in stations]
|
410
|
+
raise ValueError(
|
411
|
+
"It was not possible to create a {para_long} Station with the following IDs: {stids}".format(
|
412
|
+
para_long=self._para_long,
|
413
|
+
stids = ", ".join([str(stid) for stid in stids if stid not in stations_ids])
|
414
|
+
))
|
415
|
+
|
416
|
+
return stations
|
417
|
+
|
418
|
+
def get_quotient(self, kinds_num, kinds_denom, stids="all", return_as="df", **kwargs):
|
419
|
+
"""Get the quotient of multi-annual means of two different kinds or the timeserie and the multi annual raster value.
|
420
|
+
|
421
|
+
$quotient = \\overline{ts}_{kind_num} / \\overline{ts}_{denom}$
|
422
|
+
|
423
|
+
Parameters
|
424
|
+
----------
|
425
|
+
kinds_num : list of str or str
|
426
|
+
The timeseries kinds of the numerators.
|
427
|
+
Should be one of ['raw', 'qc', 'filled'].
|
428
|
+
For precipitation also "corr" is possible.
|
429
|
+
kinds_denom : list of str or str
|
430
|
+
The timeseries kinds of the denominator or the multi annual raster key.
|
431
|
+
If the denominator is a multi annual raster key, then the result is the quotient of the timeserie and the raster value.
|
432
|
+
Possible values are:
|
433
|
+
- for timeserie kinds: 'raw', 'qc', 'filled' or for precipitation also "corr".
|
434
|
+
- for raster keys: 'hyras', 'dwd' or 'regnie', depending on your defined raster files.
|
435
|
+
stids : list of Integer
|
436
|
+
The stations IDs for which to compute the quotient.
|
437
|
+
return_as : str, optional
|
438
|
+
The format of the return value.
|
439
|
+
If "df" then a pandas DataFrame is returned.
|
440
|
+
If "json" then a list with dictionaries is returned.
|
441
|
+
**kwargs : dict, optional
|
442
|
+
The additional keyword arguments are passed to the get_stations method.
|
443
|
+
|
444
|
+
Returns
|
445
|
+
-------
|
446
|
+
pandas.DataFrame or list of dict
|
447
|
+
The quotient of the two timeseries as DataFrame or list of dictionaries (JSON) depending on the return_as parameter.
|
448
|
+
The default is pd.DataFrame.
|
449
|
+
|
450
|
+
Raises
|
451
|
+
------
|
452
|
+
ValueError
|
453
|
+
If the input parameters were not correct.
|
454
|
+
"""
|
455
|
+
# check stids
|
456
|
+
if stids == "all":
|
457
|
+
stids = None
|
458
|
+
|
459
|
+
# check kinds
|
460
|
+
rast_keys = {"hyras", "regnie", "dwd"}
|
461
|
+
kinds_num = self._StationClass._check_kinds(kinds_num)
|
462
|
+
kinds_denom = self._StationClass._check_kinds(
|
463
|
+
kinds_denom,
|
464
|
+
valids=self._StationClass._valid_kinds | rast_keys)
|
465
|
+
|
466
|
+
# get quotient
|
467
|
+
with db_engine.connect() as con:
|
468
|
+
return _get_quotient(
|
469
|
+
con=con,
|
470
|
+
stids=stids,
|
471
|
+
paras=self._para,
|
472
|
+
kinds_num=kinds_num,
|
473
|
+
kinds_denom=kinds_denom,
|
474
|
+
return_as=return_as)
|
475
|
+
|
476
|
+
def count_holes(self, stids="all", **kwargs):
|
477
|
+
"""Count holes in timeseries depending on there length.
|
478
|
+
|
479
|
+
Parameters
|
480
|
+
----------
|
481
|
+
stids: string or list of int, optional
|
482
|
+
The Stations to return.
|
483
|
+
Can either be "all", for all possible stations
|
484
|
+
or a list with the Station IDs.
|
485
|
+
The default is "all".
|
486
|
+
**kwargs : dict, optional
|
487
|
+
**kwargs : dict, optional
|
488
|
+
This is a list of parameters, that is supported by the StationBase.count_holes method.
|
489
|
+
|
490
|
+
Furthermore the kwargs are passed to the get_stations method.
|
491
|
+
|
492
|
+
possible values are:
|
493
|
+
|
494
|
+
- weeks : list, optional
|
495
|
+
A list of hole length to count.
|
496
|
+
Every hole longer than the duration of weeks specified is counted.
|
497
|
+
The default is [2, 4, 8, 12, 16, 20, 24]
|
498
|
+
- kind : str
|
499
|
+
The kind of the timeserie to analyze.
|
500
|
+
Should be one of ['raw', 'qc', 'filled'].
|
501
|
+
For N also "corr" is possible.
|
502
|
+
Normally only "raw" and "qc" make sense, because the other timeseries should not have holes.
|
503
|
+
- period : TimestampPeriod or (tuple or list of datetime.datetime or None), optional
|
504
|
+
The minimum and maximum Timestamp for which to analyze the timeseries.
|
505
|
+
If None is given, the maximum and minimal possible Timestamp is taken.
|
506
|
+
The default is (None, None).
|
507
|
+
- between_meta_period : bool, optional
|
508
|
+
Only check between the respective period that is defined in the meta table.
|
509
|
+
If "qc" is chosen as kind, then the "raw" meta period is taken.
|
510
|
+
The default is True.
|
511
|
+
- crop_period : bool, optional
|
512
|
+
should the period get cropped to the maximum filled period.
|
513
|
+
This will result in holes being ignored when they are at the end or at the beginning of the timeserie.
|
514
|
+
If period = (None, None) is given, then this parameter is set to True.
|
515
|
+
The default is False.
|
516
|
+
|
517
|
+
Returns
|
518
|
+
-------
|
519
|
+
pandas.DataFrame
|
520
|
+
A Pandas Dataframe, with station_id as index and one column per week.
|
521
|
+
The numbers in the table are the amount of NA-periods longer than the respective amount of weeks.
|
522
|
+
|
523
|
+
Raises
|
524
|
+
------
|
525
|
+
ValueError
|
526
|
+
If the input parameters were not correct.
|
527
|
+
"""
|
528
|
+
# check input parameters
|
529
|
+
stations = self.get_stations(stids=stids, only_real=True, **kwargs)
|
530
|
+
|
531
|
+
# iter stations
|
532
|
+
first = True
|
533
|
+
for station in pb.progressbar(stations, line_breaks=False):
|
534
|
+
new_count = station.count_holes(**kwargs)
|
535
|
+
if first:
|
536
|
+
meta = new_count
|
537
|
+
first = False
|
538
|
+
else:
|
539
|
+
meta = pd.concat([meta, new_count], axis=0)
|
540
|
+
|
541
|
+
return meta
|
542
|
+
|
543
|
+
@staticmethod
|
544
|
+
def _get_progressbar(max_value, name):
|
545
|
+
pbar = pb.ProgressBar(
|
546
|
+
widgets=[
|
547
|
+
pb.widgets.RotatingMarker(),
|
548
|
+
" " + name ,
|
549
|
+
pb.widgets.Percentage(), ' ',
|
550
|
+
pb.widgets.SimpleProgress(
|
551
|
+
format=("('%(value_s)s/%(max_value_s)s')")), ' ',
|
552
|
+
pb.widgets.Bar(min_width=80), ' ',
|
553
|
+
pb.widgets.Timer(format='%(elapsed)s'), ' | ',
|
554
|
+
pb.widgets.ETA(),
|
555
|
+
pb.widgets.DynamicMessage(
|
556
|
+
"last_station",
|
557
|
+
format=", last id: {formatted_value}",
|
558
|
+
precision=4)
|
559
|
+
],
|
560
|
+
max_value=max_value,
|
561
|
+
variables={"last_station": "None"},
|
562
|
+
term_width=100,
|
563
|
+
is_terminal=True
|
564
|
+
)
|
565
|
+
pbar.update(0)
|
566
|
+
|
567
|
+
return pbar
|
568
|
+
|
569
|
+
def _run_method(self, stations, method, name, kwds=dict(),
|
570
|
+
do_mp=True, processes=mp.cpu_count()-1, **kwargs):
|
571
|
+
"""Run methods of the given stations objects in multiprocessing/threading mode.
|
572
|
+
|
573
|
+
Parameters
|
574
|
+
----------
|
575
|
+
stations : list of station objects
|
576
|
+
A list of station objects. Those must be children of the StationBase class.
|
577
|
+
method : str
|
578
|
+
The name of the method to call.
|
579
|
+
name : str
|
580
|
+
A descriptive name of the method to show in the progressbar.
|
581
|
+
kwds : dict
|
582
|
+
The keyword arguments to give to the methods
|
583
|
+
do_mp : bool, optional
|
584
|
+
Should the method be done in multiprocessing mode?
|
585
|
+
If False the methods will be called in threading mode.
|
586
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
587
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
588
|
+
The default is True.
|
589
|
+
processes : int, optional
|
590
|
+
The number of processes that should get started simultaneously.
|
591
|
+
If 1 or less, then the process is computed as a simple loop, so there is no multiprocessing or threading done.
|
592
|
+
The default is the cpu count -1.
|
593
|
+
"""
|
594
|
+
log.info(
|
595
|
+
f"{self._para_long} Stations async loop over method '{method}' started." +
|
596
|
+
"\n" +"-"*80
|
597
|
+
)
|
598
|
+
|
599
|
+
if processes<=1:
|
600
|
+
log.info(f"As the number of processes is 1 or lower, the method '{method}' is started as a simple loop.")
|
601
|
+
self._run_simple_loop(
|
602
|
+
stations=stations, method=method, name=name, kwds=kwds)
|
603
|
+
else:
|
604
|
+
# progressbar
|
605
|
+
num_stations = len(stations)
|
606
|
+
pbar = self._get_progressbar(max_value=num_stations, name=name)
|
607
|
+
|
608
|
+
# create pool
|
609
|
+
if do_mp:
|
610
|
+
try:
|
611
|
+
pool = mp.Pool(processes=processes)
|
612
|
+
log.debug("the multiprocessing Pool is started")
|
613
|
+
except AssertionError:
|
614
|
+
log.debug('daemonic processes are not allowed to have children, therefor threads are used')
|
615
|
+
pool = ThreadPool(processes=processes)
|
616
|
+
else:
|
617
|
+
log.debug("the threading Pool is started")
|
618
|
+
pool = ThreadPool(processes=processes)
|
619
|
+
|
620
|
+
# start processes
|
621
|
+
results = []
|
622
|
+
for stat in stations:
|
623
|
+
results.append(
|
624
|
+
pool.apply_async(
|
625
|
+
getattr(stat, method),
|
626
|
+
kwds=kwds))
|
627
|
+
pool.close()
|
628
|
+
|
629
|
+
# check results until all finished
|
630
|
+
finished = [False] * num_stations
|
631
|
+
while (True):
|
632
|
+
if all(finished):
|
633
|
+
break
|
634
|
+
|
635
|
+
for result in [result for i, result in enumerate(results)
|
636
|
+
if not finished[i] and result.ready()]:
|
637
|
+
index = results.index(result)
|
638
|
+
finished[index] = True
|
639
|
+
pbar.variables["last_station"] = stations[index].id
|
640
|
+
# get stdout and log
|
641
|
+
header = f"""The {name} of the {self._para_long} Station with ID {stations[index].id} finished with """
|
642
|
+
try:
|
643
|
+
stdout = result.get(10)
|
644
|
+
if stdout is not None:
|
645
|
+
log.debug(f"{header}stdout:\n{result.get(10)}")
|
646
|
+
except Exception:
|
647
|
+
log.error(f"{header}stderr:\n{traceback.format_exc()}")
|
648
|
+
|
649
|
+
pbar.update(sum(finished))
|
650
|
+
time.sleep(2)
|
651
|
+
|
652
|
+
pbar.update(sum(finished))
|
653
|
+
pool.join()
|
654
|
+
pool.terminate()
|
655
|
+
|
656
|
+
def _run_simple_loop(self, stations, method, name, kwds=dict()):
|
657
|
+
log.info("-"*79 +
|
658
|
+
"\n{para_long} Stations simple loop over method '{method}' started.".format(
|
659
|
+
para_long=self._para_long,
|
660
|
+
method=method
|
661
|
+
))
|
662
|
+
|
663
|
+
# progressbar
|
664
|
+
num_stations = len(stations)
|
665
|
+
pbar = self._get_progressbar(max_value=num_stations, name=name)
|
666
|
+
|
667
|
+
# start processes
|
668
|
+
for stat in stations:
|
669
|
+
getattr(stat, method)(**kwds)
|
670
|
+
pbar.variables["last_station"] = stat.id
|
671
|
+
pbar.update(pbar.value + 1)
|
672
|
+
|
673
|
+
@db_engine.deco_update_privilege
|
674
|
+
def update_raw(self, only_new=True, only_real=True, stids="all",
|
675
|
+
remove_nas=True, do_mp=True, **kwargs):
|
676
|
+
"""Download all stations data from CDC and upload to database.
|
677
|
+
|
678
|
+
Parameters
|
679
|
+
----------
|
680
|
+
only_new : bool, optional
|
681
|
+
Get only the files that are not yet in the database?
|
682
|
+
If False all the available files are loaded again.
|
683
|
+
The default is True
|
684
|
+
only_real: bool, optional
|
685
|
+
Whether only real stations are tried to download.
|
686
|
+
True: only stations with a date in raw_from in meta are downloaded.
|
687
|
+
The default is True.
|
688
|
+
stids: string or list of int, optional
|
689
|
+
The Stations to return.
|
690
|
+
Can either be "all", for all possible stations
|
691
|
+
or a list with the Station IDs.
|
692
|
+
The default is "all".
|
693
|
+
do_mp : bool, optional
|
694
|
+
Should the method be done in multiprocessing mode?
|
695
|
+
If False the methods will be called in threading mode.
|
696
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
697
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
698
|
+
The default is True.
|
699
|
+
remove_nas : bool, optional
|
700
|
+
Remove the NAs from the downloaded data before updating it to the database.
|
701
|
+
This has computational advantages.
|
702
|
+
The default is True.
|
703
|
+
**kwargs : dict, optional
|
704
|
+
The additional keyword arguments for the _run_method and get_stations method
|
705
|
+
|
706
|
+
Raises
|
707
|
+
------
|
708
|
+
ValueError
|
709
|
+
If the given stids (Station_IDs) are not all valid.
|
710
|
+
"""
|
711
|
+
start_tstp = datetime.datetime.now()
|
712
|
+
|
713
|
+
# get FTP file list
|
714
|
+
ftp_file_list = get_cdc_file_list(
|
715
|
+
ftp_folders=self._ftp_folders)
|
716
|
+
|
717
|
+
# run the tasks in multiprocessing mode
|
718
|
+
self._run_method(
|
719
|
+
stations=self.get_stations(only_real=only_real, stids=stids, **kwargs),
|
720
|
+
method="update_raw",
|
721
|
+
name="download raw {para} data".format(para=self._para.upper()),
|
722
|
+
kwds=dict(
|
723
|
+
only_new=only_new,
|
724
|
+
ftp_file_list=ftp_file_list,
|
725
|
+
remove_nas=remove_nas),
|
726
|
+
do_mp=do_mp, **kwargs)
|
727
|
+
|
728
|
+
# save start time as variable to db
|
729
|
+
do_update_period = isinstance(stids, str) and (stids == "all")
|
730
|
+
if not do_update_period and isinstance(stids, list):
|
731
|
+
all_stids = self.get_meta(["station_id"], stids="all", only_real=True).index
|
732
|
+
do_update_period = all([stid in stids for stid in all_stids])
|
733
|
+
|
734
|
+
if do_update_period:
|
735
|
+
with db_engine.connect() as con:
|
736
|
+
con.execute(sqltxt("""
|
737
|
+
INSERT INTO parameter_variables (parameter, start_tstp_last_imp, max_tstp_last_imp)
|
738
|
+
VALUES ('{para}',
|
739
|
+
'{start_tstp}'::timestamp,
|
740
|
+
(SELECT max(raw_until) FROM meta_{para}))
|
741
|
+
ON CONFLICT (parameter) DO UPDATE SET
|
742
|
+
start_tstp_last_imp=EXCLUDED.start_tstp_last_imp,
|
743
|
+
max_tstp_last_imp=EXCLUDED.max_tstp_last_imp;
|
744
|
+
""".format(
|
745
|
+
para=self._para,
|
746
|
+
start_tstp=start_tstp.strftime("%Y%m%d %H:%M"))))
|
747
|
+
con.commit()
|
748
|
+
|
749
|
+
@db_engine.deco_update_privilege
|
750
|
+
def last_imp_quality_check(self, stids="all", do_mp=False, **kwargs):
|
751
|
+
"""Do the quality check of the last import.
|
752
|
+
|
753
|
+
Parameters
|
754
|
+
----------
|
755
|
+
do_mp : bool, optional
|
756
|
+
Should the method be done in multiprocessing mode?
|
757
|
+
If False the methods will be called in threading mode.
|
758
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
759
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
760
|
+
The default is False.
|
761
|
+
stids: string or list of int, optional
|
762
|
+
The Stations for which to compute.
|
763
|
+
Can either be "all", for all possible stations
|
764
|
+
or a list with the Station IDs.
|
765
|
+
The default is "all".
|
766
|
+
**kwargs : dict, optional
|
767
|
+
The additional keyword arguments for the _run_method and get_stations method
|
768
|
+
"""
|
769
|
+
self._run_method(
|
770
|
+
stations=self.get_stations(only_real=True, stids=stids, **kwargs),
|
771
|
+
method="last_imp_quality_check",
|
772
|
+
name="quality check {para} data".format(para=self._para.upper()),
|
773
|
+
do_mp=do_mp, **kwargs)
|
774
|
+
|
775
|
+
@db_engine.deco_update_privilege
|
776
|
+
def last_imp_fillup(self, stids="all", do_mp=False, **kwargs):
|
777
|
+
"""Do the gap filling of the last import.
|
778
|
+
|
779
|
+
Parameters
|
780
|
+
----------
|
781
|
+
do_mp : bool, optional
|
782
|
+
Should the method be done in multiprocessing mode?
|
783
|
+
If False the methods will be called in threading mode.
|
784
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
785
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
786
|
+
The default is False.
|
787
|
+
stids: string or list of int, optional
|
788
|
+
The Stations for which to compute.
|
789
|
+
Can either be "all", for all possible stations
|
790
|
+
or a list with the Station IDs.
|
791
|
+
The default is "all".
|
792
|
+
**kwargs : dict, optional
|
793
|
+
The additional keyword arguments for the _run_method and get_stations method
|
794
|
+
"""
|
795
|
+
stations = self.get_stations(only_real=False, stids=stids, **kwargs)
|
796
|
+
period = stations[0].get_last_imp_period(all=True)
|
797
|
+
period_log = period.strftime("%Y-%m-%d %H:%M")
|
798
|
+
log.info("The {para_long} Stations fillup of the last import is started for the period {min_tstp} - {max_tstp}".format(
|
799
|
+
para_long=self._para_long,
|
800
|
+
min_tstp=period_log[0],
|
801
|
+
max_tstp=period_log[1]))
|
802
|
+
self._run_method(
|
803
|
+
stations=stations,
|
804
|
+
method="last_imp_fillup",
|
805
|
+
name="fillup {para} data".format(para=self._para.upper()),
|
806
|
+
kwds=dict(_last_imp_period=period),
|
807
|
+
do_mp=do_mp,
|
808
|
+
**kwargs)
|
809
|
+
|
810
|
+
@db_engine.deco_update_privilege
|
811
|
+
def quality_check(self, period=(None, None), only_real=True, stids="all",
|
812
|
+
do_mp=False, **kwargs):
|
813
|
+
"""Quality check the raw data for a given period.
|
814
|
+
|
815
|
+
Parameters
|
816
|
+
----------
|
817
|
+
period : tuple or list of datetime.datetime or None, optional
|
818
|
+
The minimum and maximum Timestamp for which to get the timeseries.
|
819
|
+
If None is given, the maximum or minimal possible Timestamp is taken.
|
820
|
+
The default is (None, None).
|
821
|
+
stids: string or list of int, optional
|
822
|
+
The Stations for which to compute.
|
823
|
+
Can either be "all", for all possible stations
|
824
|
+
or a list with the Station IDs.
|
825
|
+
The default is "all".
|
826
|
+
do_mp : bool, optional
|
827
|
+
Should the method be done in multiprocessing mode?
|
828
|
+
If False the methods will be called in threading mode.
|
829
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
830
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
831
|
+
The default is False.
|
832
|
+
**kwargs : dict, optional
|
833
|
+
The additional keyword arguments for the _run_method and get_stations method
|
834
|
+
"""
|
835
|
+
self._run_method(
|
836
|
+
stations=self.get_stations(only_real=only_real, stids=stids, **kwargs),
|
837
|
+
method="quality_check",
|
838
|
+
name="quality check {para} data".format(para=self._para.upper()),
|
839
|
+
kwds=dict(period=period),
|
840
|
+
do_mp=do_mp,
|
841
|
+
**kwargs)
|
842
|
+
|
843
|
+
@db_engine.deco_update_privilege
|
844
|
+
def update_ma_raster(self, stids="all", do_mp=False, **kwargs):
|
845
|
+
"""Update the multi annual raster values for the stations.
|
846
|
+
|
847
|
+
Get a multi annual value from the corresponding raster and save to the multi annual table in the database.
|
848
|
+
|
849
|
+
Parameters
|
850
|
+
----------
|
851
|
+
stids: string or list of int, optional
|
852
|
+
The Stations for which to compute.
|
853
|
+
Can either be "all", for all possible stations
|
854
|
+
or a list with the Station IDs.
|
855
|
+
The default is "all".
|
856
|
+
do_mp : bool, optional
|
857
|
+
Should the method be done in multiprocessing mode?
|
858
|
+
If False the methods will be called in threading mode.
|
859
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
860
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
861
|
+
The default is False.
|
862
|
+
**kwargs : dict, optional
|
863
|
+
The additional keyword arguments for the _run_method and get_stations method
|
864
|
+
|
865
|
+
Raises
|
866
|
+
------
|
867
|
+
ValueError
|
868
|
+
If the given stids (Station_IDs) are not all valid.
|
869
|
+
"""
|
870
|
+
self._run_method(
|
871
|
+
stations=self.get_stations(only_real=False, stids=stids, **kwargs),
|
872
|
+
method="update_ma_raster",
|
873
|
+
name="update ma-raster-values for {para}".format(para=self._para.upper()),
|
874
|
+
do_mp=do_mp,
|
875
|
+
**kwargs)
|
876
|
+
|
877
|
+
@db_engine.deco_update_privilege
|
878
|
+
def update_ma_timeseries(self, kind, stids="all", do_mp=False, **kwargs):
|
879
|
+
"""Update the multi annual timeseries values for the stations.
|
880
|
+
|
881
|
+
Get a multi annual value from the corresponding timeseries and save to the database.
|
882
|
+
|
883
|
+
Parameters
|
884
|
+
----------
|
885
|
+
kind : str or list of str
|
886
|
+
The timeseries data kind to update theire multi annual value.
|
887
|
+
Must be a column in the timeseries DB.
|
888
|
+
Must be one of "raw", "qc", "filled".
|
889
|
+
For the precipitation also "corr" is valid.
|
890
|
+
stids: string or list of int, optional
|
891
|
+
The Stations for which to compute.
|
892
|
+
Can either be "all", for all possible stations
|
893
|
+
or a list with the Station IDs.
|
894
|
+
The default is "all".
|
895
|
+
do_mp : bool, optional
|
896
|
+
Should the method be done in multiprocessing mode?
|
897
|
+
If False the methods will be called in threading mode.
|
898
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
899
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
900
|
+
The default is False.
|
901
|
+
**kwargs : dict, optional
|
902
|
+
The additional keyword arguments for the _run_method and get_stations method
|
903
|
+
|
904
|
+
Raises
|
905
|
+
------
|
906
|
+
ValueError
|
907
|
+
If the given stids (Station_IDs) are not all valid.
|
908
|
+
"""
|
909
|
+
self._run_method(
|
910
|
+
stations=self.get_stations(only_real=False, stids=stids, **kwargs),
|
911
|
+
method="update_ma_timeseries",
|
912
|
+
name="update ma-ts-values for {para}".format(para=self._para.upper()),
|
913
|
+
do_mp=do_mp,
|
914
|
+
kwds=dict(kind=kind),
|
915
|
+
**kwargs)
|
916
|
+
|
917
|
+
@db_engine.deco_update_privilege
|
918
|
+
def fillup(self, only_real=False, stids="all", do_mp=False, **kwargs):
|
919
|
+
"""Fill up the quality checked data with data from nearby stations to get complete timeseries.
|
920
|
+
|
921
|
+
Parameters
|
922
|
+
----------
|
923
|
+
only_real: bool, optional
|
924
|
+
Whether only real stations are computed or also virtual ones.
|
925
|
+
True: only stations with own data are returned.
|
926
|
+
The default is True.
|
927
|
+
stids: string or list of int, optional
|
928
|
+
The Stations for which to compute.
|
929
|
+
Can either be "all", for all possible stations
|
930
|
+
or a list with the Station IDs.
|
931
|
+
The default is "all".
|
932
|
+
do_mp : bool, optional
|
933
|
+
Should the method be done in multiprocessing mode?
|
934
|
+
If False the methods will be called in threading mode.
|
935
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
936
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
937
|
+
The default is False.
|
938
|
+
**kwargs : dict, optional
|
939
|
+
The additional keyword arguments for the _run_method and get_stations method
|
940
|
+
|
941
|
+
Raises
|
942
|
+
------
|
943
|
+
ValueError
|
944
|
+
If the given stids (Station_IDs) are not all valid.
|
945
|
+
"""
|
946
|
+
self._run_method(
|
947
|
+
stations=self.get_stations(only_real=only_real, stids=stids, **kwargs),
|
948
|
+
method="fillup",
|
949
|
+
name="fillup {para} data".format(para=self._para.upper()),
|
950
|
+
do_mp=do_mp,
|
951
|
+
**kwargs)
|
952
|
+
|
953
|
+
@db_engine.deco_update_privilege
|
954
|
+
def update(self, only_new=True, **kwargs):
|
955
|
+
"""Make a complete update of the stations.
|
956
|
+
|
957
|
+
Does the update_raw, quality check and fillup of the stations.
|
958
|
+
|
959
|
+
Parameters
|
960
|
+
----------
|
961
|
+
only_new : bool, optional
|
962
|
+
Should a only new values be computed?
|
963
|
+
If False: The stations are updated for the whole possible period.
|
964
|
+
If True, the stations are only updated for new values.
|
965
|
+
The default is True.
|
966
|
+
"""
|
967
|
+
self.update_raw(only_new=only_new, **kwargs)
|
968
|
+
if only_new:
|
969
|
+
self.last_imp_quality_check(**kwargs)
|
970
|
+
self.last_imp_fillup(**kwargs)
|
971
|
+
else:
|
972
|
+
self.quality_check(**kwargs)
|
973
|
+
self.fillup(**kwargs)
|
974
|
+
|
975
|
+
def get_df(self, stids, **kwargs):
|
976
|
+
"""Get a DataFrame with the corresponding data.
|
977
|
+
|
978
|
+
Parameters
|
979
|
+
----------
|
980
|
+
stids: string or list of int, optional
|
981
|
+
The Stations for which to compute.
|
982
|
+
Can either be "all", for all possible stations
|
983
|
+
or a list with the Station IDs.
|
984
|
+
The default is "all".
|
985
|
+
**kwargs: optional keyword arguments
|
986
|
+
Those keyword arguments are passed to the get_df function of the station class.
|
987
|
+
Possible parameters are period, agg_to, kinds.
|
988
|
+
Furthermore the kwargs are passed to the get_stations method.
|
989
|
+
|
990
|
+
Returns
|
991
|
+
-------
|
992
|
+
pd.Dataframe
|
993
|
+
A DataFrame with the timeseries for the selected stations, kind(s) and the given period.
|
994
|
+
If multiple columns are selected, the columns in this DataFrame is a MultiIndex with the station IDs as first level and the kind as second level.
|
995
|
+
"""
|
996
|
+
if "kinds" in kwargs and "kind" in kwargs:
|
997
|
+
raise ValueError("Either enter kind or kinds, not both.")
|
998
|
+
if "kind" in kwargs:
|
999
|
+
kinds=[kwargs.pop("kind")]
|
1000
|
+
else:
|
1001
|
+
kinds=kwargs.pop("kinds")
|
1002
|
+
kwargs.update(dict(only_real=kwargs.get("only_real", False)))
|
1003
|
+
stats = self.get_stations(stids=stids, **kwargs)
|
1004
|
+
df_all = None
|
1005
|
+
for stat in pb.progressbar(stats, line_breaks=False):
|
1006
|
+
df = stat.get_df(kinds=kinds, **kwargs)
|
1007
|
+
if df is None:
|
1008
|
+
warnings.warn(
|
1009
|
+
f"There was no data for {stat._para_long} station {stat.id}!")
|
1010
|
+
continue
|
1011
|
+
if len(df.columns) == 1:
|
1012
|
+
df.rename(
|
1013
|
+
dict(zip(df.columns, [stat.id])),
|
1014
|
+
axis=1, inplace=True)
|
1015
|
+
else:
|
1016
|
+
df.columns = pd.MultiIndex.from_product(
|
1017
|
+
[[stat.id], df.columns],
|
1018
|
+
names=["Station ID", "kind"])
|
1019
|
+
df_all = pd.concat([df_all, df], axis=1)
|
1020
|
+
|
1021
|
+
return df_all
|