weatherdb 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docker/Dockerfile +30 -0
- docker/docker-compose.yaml +58 -0
- docker/docker-compose_test.yaml +24 -0
- docker/start-docker-test.sh +6 -0
- docs/requirements.txt +10 -0
- docs/source/Changelog.md +2 -0
- docs/source/License.rst +7 -0
- docs/source/Methode.md +161 -0
- docs/source/_static/custom.css +8 -0
- docs/source/_static/favicon.ico +0 -0
- docs/source/_static/logo.png +0 -0
- docs/source/api/api.rst +15 -0
- docs/source/api/cli.rst +8 -0
- docs/source/api/weatherDB.broker.rst +10 -0
- docs/source/api/weatherDB.config.rst +7 -0
- docs/source/api/weatherDB.db.rst +23 -0
- docs/source/api/weatherDB.rst +22 -0
- docs/source/api/weatherDB.station.rst +56 -0
- docs/source/api/weatherDB.stations.rst +46 -0
- docs/source/api/weatherDB.utils.rst +22 -0
- docs/source/conf.py +137 -0
- docs/source/index.rst +33 -0
- docs/source/setup/Configuration.md +127 -0
- docs/source/setup/Hosting.md +9 -0
- docs/source/setup/Install.md +49 -0
- docs/source/setup/Quickstart.md +183 -0
- docs/source/setup/setup.rst +12 -0
- weatherdb/__init__.py +24 -0
- weatherdb/_version.py +1 -0
- weatherdb/alembic/README.md +8 -0
- weatherdb/alembic/alembic.ini +80 -0
- weatherdb/alembic/config.py +9 -0
- weatherdb/alembic/env.py +100 -0
- weatherdb/alembic/script.py.mako +26 -0
- weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
- weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
- weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
- weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
- weatherdb/broker.py +667 -0
- weatherdb/cli.py +214 -0
- weatherdb/config/ConfigParser.py +663 -0
- weatherdb/config/__init__.py +5 -0
- weatherdb/config/config_default.ini +162 -0
- weatherdb/db/__init__.py +3 -0
- weatherdb/db/connections.py +374 -0
- weatherdb/db/fixtures/RichterParameters.json +34 -0
- weatherdb/db/models.py +402 -0
- weatherdb/db/queries/get_quotient.py +155 -0
- weatherdb/db/views.py +165 -0
- weatherdb/station/GroupStation.py +710 -0
- weatherdb/station/StationBases.py +3108 -0
- weatherdb/station/StationET.py +111 -0
- weatherdb/station/StationP.py +807 -0
- weatherdb/station/StationPD.py +98 -0
- weatherdb/station/StationT.py +164 -0
- weatherdb/station/__init__.py +13 -0
- weatherdb/station/constants.py +21 -0
- weatherdb/stations/GroupStations.py +519 -0
- weatherdb/stations/StationsBase.py +1021 -0
- weatherdb/stations/StationsBaseTET.py +30 -0
- weatherdb/stations/StationsET.py +17 -0
- weatherdb/stations/StationsP.py +128 -0
- weatherdb/stations/StationsPD.py +24 -0
- weatherdb/stations/StationsT.py +21 -0
- weatherdb/stations/__init__.py +11 -0
- weatherdb/utils/TimestampPeriod.py +369 -0
- weatherdb/utils/__init__.py +3 -0
- weatherdb/utils/dwd.py +350 -0
- weatherdb/utils/geometry.py +69 -0
- weatherdb/utils/get_data.py +285 -0
- weatherdb/utils/logging.py +126 -0
- weatherdb-1.1.0.dist-info/LICENSE +674 -0
- weatherdb-1.1.0.dist-info/METADATA +765 -0
- weatherdb-1.1.0.dist-info/RECORD +77 -0
- weatherdb-1.1.0.dist-info/WHEEL +5 -0
- weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
- weatherdb-1.1.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,1021 @@
|
|
1
|
+
# libraries
|
2
|
+
import warnings
|
3
|
+
import traceback
|
4
|
+
import pandas as pd
|
5
|
+
import geopandas as gpd
|
6
|
+
from shapely import wkt
|
7
|
+
import multiprocessing as mp
|
8
|
+
from multiprocessing.pool import ThreadPool
|
9
|
+
import time
|
10
|
+
import progressbar as pb
|
11
|
+
import logging
|
12
|
+
import itertools
|
13
|
+
import datetime
|
14
|
+
from sqlalchemy import text as sqltxt
|
15
|
+
import sqlalchemy as sa
|
16
|
+
import textwrap
|
17
|
+
|
18
|
+
from ..db.connections import db_engine
|
19
|
+
from ..utils.dwd import get_dwd_meta, get_cdc_file_list
|
20
|
+
from ..station.StationBases import StationBase
|
21
|
+
from ..db import models
|
22
|
+
from ..db.queries.get_quotient import _get_quotient
|
23
|
+
|
24
|
+
# set settings
|
25
|
+
# ############
|
26
|
+
try:# else I get strange errors with linux
|
27
|
+
mp.set_start_method('spawn')
|
28
|
+
except RuntimeError:
|
29
|
+
pass
|
30
|
+
|
31
|
+
__all__ = ["StationsBase"]
|
32
|
+
log = logging.getLogger(__name__)
|
33
|
+
|
34
|
+
# Base class definitions
|
35
|
+
########################
|
36
|
+
|
37
|
+
class StationsBase:
|
38
|
+
_StationClass = StationBase
|
39
|
+
_timeout_raw_imp = 240
|
40
|
+
|
41
|
+
def __init__(self):
|
42
|
+
if type(self) is StationsBase:
|
43
|
+
raise NotImplementedError("""
|
44
|
+
The StationsBase is only a wrapper class an is not working on its own.
|
45
|
+
Please use StationP, StationPD, StationT or StationET instead""")
|
46
|
+
self._ftp_folder_base = self._StationClass._ftp_folder_base
|
47
|
+
if isinstance(self._ftp_folder_base, str):
|
48
|
+
self._ftp_folder_base = [self._ftp_folder_base]
|
49
|
+
|
50
|
+
# create ftp_folders in order of importance
|
51
|
+
self._ftp_folders = list(itertools.chain(*[
|
52
|
+
[base + "historical/", base + "recent/"]
|
53
|
+
for base in self._ftp_folder_base]))
|
54
|
+
|
55
|
+
self._para = self._StationClass._para
|
56
|
+
self._para_long = self._StationClass._para_long
|
57
|
+
|
58
|
+
def download_meta(self):
|
59
|
+
"""Download the meta file(s) from the CDC server.
|
60
|
+
|
61
|
+
Returns
|
62
|
+
-------
|
63
|
+
geopandas.GeoDataFrame
|
64
|
+
The meta file from the CDC server.
|
65
|
+
If there are several meta files on the server, they are joined together.
|
66
|
+
"""
|
67
|
+
# download historic meta file
|
68
|
+
meta = get_dwd_meta(self._ftp_folders[0])
|
69
|
+
|
70
|
+
for ftp_folder in self._ftp_folders[1:]:
|
71
|
+
meta_new = get_dwd_meta(ftp_folder=ftp_folder)
|
72
|
+
|
73
|
+
# add new stations
|
74
|
+
meta = pd.concat(
|
75
|
+
[meta, meta_new[~meta_new.index.isin(meta.index)]])
|
76
|
+
if isinstance(meta_new, gpd.GeoDataFrame):
|
77
|
+
meta = gpd.GeoDataFrame(meta, crs=meta_new.crs)
|
78
|
+
|
79
|
+
# check for wider timespan
|
80
|
+
if "bis_datum" in meta.columns:
|
81
|
+
meta = meta.join(
|
82
|
+
meta_new[["bis_datum", "von_datum"]],
|
83
|
+
how="left", rsuffix="_new")
|
84
|
+
|
85
|
+
mask = meta["von_datum"] > meta["von_datum_new"]
|
86
|
+
meta.loc[mask, "von_datum"] = meta_new.loc[mask, "von_datum"]
|
87
|
+
|
88
|
+
mask = meta["bis_datum"] < meta["bis_datum_new"]
|
89
|
+
meta.loc[mask, "bis_datum"] = meta_new.loc[mask, "bis_datum"]
|
90
|
+
|
91
|
+
meta.drop(["von_datum_new", "bis_datum_new"], axis=1, inplace=True)
|
92
|
+
|
93
|
+
return meta
|
94
|
+
|
95
|
+
@db_engine.deco_update_privilege
|
96
|
+
def update_meta(self, stids="all", **kwargs):
|
97
|
+
"""Update the meta table by comparing to the CDC server.
|
98
|
+
|
99
|
+
The "von_datum" and "bis_datum" is ignored because it is better to set this by the filled period of the stations in the database.
|
100
|
+
Often the CDC period is not correct.
|
101
|
+
|
102
|
+
Parameters
|
103
|
+
----------
|
104
|
+
stids: string or list of int, optional
|
105
|
+
The Stations for which to compute.
|
106
|
+
Can either be "all", for all possible stations
|
107
|
+
or a list with the Station IDs.
|
108
|
+
The default is "all".
|
109
|
+
"""
|
110
|
+
log.info(
|
111
|
+
"The {para_long} meta table gets updated."\
|
112
|
+
.format(para_long=self._para_long))
|
113
|
+
meta = self.download_meta()
|
114
|
+
|
115
|
+
# check if Abgabe is in meta
|
116
|
+
if "Abgabe" in meta.columns:
|
117
|
+
meta.drop("Abgabe", axis=1, inplace=True)
|
118
|
+
|
119
|
+
# get dropped stations and delete from meta file
|
120
|
+
sql_get_dropped = sa\
|
121
|
+
.select(models.DroppedStations.station_id)\
|
122
|
+
.where(models.DroppedStations.parameter == self._para)
|
123
|
+
with db_engine.connect() as con:
|
124
|
+
dropped_stids = con.execute(sql_get_dropped).all()
|
125
|
+
dropped_stids = [row[0] for row in dropped_stids
|
126
|
+
if row[0] in meta.index]
|
127
|
+
meta.drop(dropped_stids, inplace=True)
|
128
|
+
|
129
|
+
# check if only some stids should be updated
|
130
|
+
if stids != "all":
|
131
|
+
if not isinstance(stids, list):
|
132
|
+
stids = [stids,]
|
133
|
+
meta.drop([stid for stid in meta.index if stid not in stids], inplace=True)
|
134
|
+
|
135
|
+
# to have a meta entry for every station before looping over them
|
136
|
+
if "von_datum" in meta.columns and "bis_datum" in meta.columns:
|
137
|
+
self._update_db_meta(
|
138
|
+
meta=meta.drop(["von_datum", "bis_datum"], axis=1))
|
139
|
+
else:
|
140
|
+
self._update_db_meta(meta=meta)
|
141
|
+
|
142
|
+
log.info(
|
143
|
+
"The {para_long} meta table got successfully updated."\
|
144
|
+
.format(para_long=self._para_long))
|
145
|
+
|
146
|
+
@db_engine.deco_update_privilege
|
147
|
+
def _update_db_meta(self, meta):
|
148
|
+
"""Update a meta table on the database with new DataFrame.
|
149
|
+
|
150
|
+
Parameters
|
151
|
+
----------
|
152
|
+
meta : pandas.DataFrame
|
153
|
+
A DataFrame with station_id as index.
|
154
|
+
"""
|
155
|
+
# get the columns of meta
|
156
|
+
meta = meta.rename_axis("station_id").reset_index()
|
157
|
+
columns = [col.lower() for col in meta.columns]
|
158
|
+
columns = columns + ['geometry_utm'] if 'geometry' in columns else columns
|
159
|
+
meta.rename(dict(zip(meta.columns, columns)), axis=1, inplace=True)
|
160
|
+
|
161
|
+
# check if columns are initiated in DB
|
162
|
+
with db_engine.connect() as con:
|
163
|
+
columns_db = con.execute(sqltxt(
|
164
|
+
"""
|
165
|
+
SELECT column_name
|
166
|
+
FROM information_schema.columns
|
167
|
+
WHERE table_name='meta_{para}';
|
168
|
+
""".format(para=self._para)
|
169
|
+
)).all()
|
170
|
+
columns_db = [col[0] for col in columns_db]
|
171
|
+
|
172
|
+
problem_cols = [col for col in columns if col not in columns_db]
|
173
|
+
if len(problem_cols) > 0:
|
174
|
+
warnings.warn("""
|
175
|
+
The meta_{para} column '{cols}' is not initiated in the database.
|
176
|
+
This column is therefor skiped.
|
177
|
+
Please review the DB or the code.
|
178
|
+
""".format(
|
179
|
+
para=self._para,
|
180
|
+
cols=", ".join(problem_cols))
|
181
|
+
)
|
182
|
+
columns = [col for col in columns if col in columns_db]
|
183
|
+
|
184
|
+
# change date columns
|
185
|
+
for colname, col in \
|
186
|
+
meta.select_dtypes(include="datetime64").items():
|
187
|
+
meta.loc[:,colname] = col.dt.strftime("%Y%m%d %H:%M")
|
188
|
+
|
189
|
+
# change geometry
|
190
|
+
if "geometry" in meta.columns:
|
191
|
+
with warnings.catch_warnings():
|
192
|
+
warnings.simplefilter("ignore")
|
193
|
+
meta["geometry_utm"] = meta.geometry.to_crs(25832).to_wkt()
|
194
|
+
meta["geometry"] = meta.geometry.to_crs(4326).to_wkt()
|
195
|
+
|
196
|
+
# change all to strings
|
197
|
+
meta = meta.astype(str)
|
198
|
+
|
199
|
+
# get values
|
200
|
+
values_all = ["', '".join(pair) for pair in meta.loc[:,columns].values]
|
201
|
+
values = "('" + "'), ('".join(values_all) + "')"
|
202
|
+
values = values.replace("'nan'", "NULL").replace("'<NA>'", "NULL")
|
203
|
+
|
204
|
+
# create sql
|
205
|
+
sql = '''
|
206
|
+
INSERT INTO meta_{para}({columns})
|
207
|
+
Values {values}
|
208
|
+
ON CONFLICT (station_id) DO UPDATE SET
|
209
|
+
'''.format(
|
210
|
+
columns=", ".join(columns),
|
211
|
+
values=values,
|
212
|
+
para=self._para)
|
213
|
+
for col in columns:
|
214
|
+
sql += ' "{col}" = EXCLUDED."{col}", '.format(col=col)
|
215
|
+
|
216
|
+
sql = sql[:-2] + ";"
|
217
|
+
|
218
|
+
# run sql command
|
219
|
+
with db_engine.connect() as con:
|
220
|
+
con.execute(sqltxt(sql))
|
221
|
+
con.commit()
|
222
|
+
|
223
|
+
@db_engine.deco_update_privilege
|
224
|
+
def update_period_meta(self, stids="all", **kwargs):
|
225
|
+
"""Update the period in the meta table of the raw data.
|
226
|
+
|
227
|
+
Parameters
|
228
|
+
----------
|
229
|
+
stids: string or list of int, optional
|
230
|
+
The Stations for which to compute.
|
231
|
+
Can either be "all", for all possible stations
|
232
|
+
or a list with the Station IDs.
|
233
|
+
The default is "all".
|
234
|
+
**kwargs : dict, optional
|
235
|
+
**kwargs : dict, optional
|
236
|
+
The additional keyword arguments are passed to the get_stations method.
|
237
|
+
|
238
|
+
Raises
|
239
|
+
------
|
240
|
+
ValueError
|
241
|
+
If the given stids (Station_IDs) are not all valid.
|
242
|
+
"""
|
243
|
+
self._run_simple_loop(
|
244
|
+
stations=self.get_stations(only_real=True, stids=stids, **kwargs),
|
245
|
+
method="update_period_meta",
|
246
|
+
name="update period in meta",
|
247
|
+
kwargs=kwargs
|
248
|
+
)
|
249
|
+
|
250
|
+
@classmethod
|
251
|
+
def get_meta_explanation(cls, infos="all"):
|
252
|
+
"""Get the explanations of the available meta fields.
|
253
|
+
|
254
|
+
Parameters
|
255
|
+
----------
|
256
|
+
infos : list or string, optional
|
257
|
+
The infos you wish to get an explanation for.
|
258
|
+
If "all" then all the available information get returned.
|
259
|
+
The default is "all"
|
260
|
+
|
261
|
+
Returns
|
262
|
+
-------
|
263
|
+
pd.Series
|
264
|
+
a pandas Series with the information names as index and the explanation as values.
|
265
|
+
"""
|
266
|
+
return cls._StationClass.get_meta_explanation(infos=infos)
|
267
|
+
|
268
|
+
def get_meta(self,
|
269
|
+
infos=["station_id", "filled_from", "filled_until", "geometry"],
|
270
|
+
stids="all",
|
271
|
+
only_real=True):
|
272
|
+
"""Get the meta Dataframe from the Database.
|
273
|
+
|
274
|
+
Parameters
|
275
|
+
----------
|
276
|
+
infos : list or str, optional
|
277
|
+
A list of information from the meta file to return
|
278
|
+
If "all" than all possible columns are returned, but only one geometry column.
|
279
|
+
The default is: ["Station_id", "filled_from", "filled_until", "geometry"]
|
280
|
+
only_real: bool, optional
|
281
|
+
Whether only real stations are returned or also virtual ones.
|
282
|
+
True: only stations with own data are returned.
|
283
|
+
The default is True.
|
284
|
+
|
285
|
+
Returns
|
286
|
+
-------
|
287
|
+
pandas.DataFrame or geopandas.GeoDataFrae
|
288
|
+
The meta DataFrame.
|
289
|
+
"""
|
290
|
+
# make sure columns is of type list
|
291
|
+
if isinstance(infos, str):
|
292
|
+
if infos=="all":
|
293
|
+
infos = self.get_meta_explanation(infos="all").index.to_list()
|
294
|
+
if "geometry_utm" in infos:
|
295
|
+
infos.remove("geometry_utm")
|
296
|
+
else:
|
297
|
+
infos = [infos]
|
298
|
+
|
299
|
+
# check infos
|
300
|
+
infos = [col.lower() for col in infos]
|
301
|
+
if "station_id" not in infos:
|
302
|
+
infos.insert(0, "station_id")
|
303
|
+
if "geometry" in infos and "geometry_utm" in infos:
|
304
|
+
warnings.warn(textwrap.dedent("""\
|
305
|
+
You selected 2 geometry columns.
|
306
|
+
Only the geometry column with EPSG 4326 is returned"""))
|
307
|
+
infos.remove("geometry_utm")
|
308
|
+
|
309
|
+
# create geometry select statement
|
310
|
+
infos_select = []
|
311
|
+
for info in infos:
|
312
|
+
if info in ["geometry", "geometry_utm"]:
|
313
|
+
infos_select.append(
|
314
|
+
f"ST_AsText({info}) as {info}")
|
315
|
+
else:
|
316
|
+
infos_select.append(info)
|
317
|
+
|
318
|
+
# create sql statement
|
319
|
+
sql = "SELECT {cols} FROM meta_{para}"\
|
320
|
+
.format(cols=", ".join(infos_select), para=self._para)
|
321
|
+
if only_real:
|
322
|
+
where_clause = " WHERE is_real=true"
|
323
|
+
if stids != "all":
|
324
|
+
if not isinstance(stids, list):
|
325
|
+
stids = [stids,]
|
326
|
+
if "where_clause" not in locals():
|
327
|
+
where_clause = " WHERE "
|
328
|
+
else:
|
329
|
+
where_clause += " AND "
|
330
|
+
where_clause += "station_id in ({stids})".format(
|
331
|
+
stids=", ".join([str(stid) for stid in stids]))
|
332
|
+
if "where_clause" in locals():
|
333
|
+
sql += where_clause
|
334
|
+
|
335
|
+
# execute queries to db
|
336
|
+
with db_engine.connect() as con:
|
337
|
+
meta = pd.read_sql(
|
338
|
+
sqltxt(sql),
|
339
|
+
con,
|
340
|
+
index_col="station_id")
|
341
|
+
|
342
|
+
# make datetime columns timezone aware
|
343
|
+
meta = meta.apply(
|
344
|
+
lambda col: col.dt.tz_localize(datetime.timezone.utc) \
|
345
|
+
if hasattr(col, "dt") and not col.dt.tz else col)
|
346
|
+
|
347
|
+
# change to GeoDataFrame if geometry column was selected
|
348
|
+
for geom_col, srid in zip(["geometry", "geometry_utm"],
|
349
|
+
["4326", "25832"]):
|
350
|
+
if geom_col in infos:
|
351
|
+
meta[geom_col] = meta[geom_col].apply(wkt.loads)
|
352
|
+
meta = gpd.GeoDataFrame(
|
353
|
+
meta, crs="EPSG:" + srid, geometry=geom_col)
|
354
|
+
|
355
|
+
# strip whitespaces in string columns
|
356
|
+
for col in meta.columns[meta.dtypes == "object"]:
|
357
|
+
try:
|
358
|
+
meta[col] = meta[col].str.strip()
|
359
|
+
except:
|
360
|
+
pass
|
361
|
+
|
362
|
+
return meta
|
363
|
+
|
364
|
+
def get_stations(self, only_real=True, stids="all", skip_missing_stids=False, **kwargs):
|
365
|
+
"""Get a list with all the stations as Station-objects.
|
366
|
+
|
367
|
+
Parameters
|
368
|
+
----------
|
369
|
+
only_real: bool, optional
|
370
|
+
Whether only real stations are returned or also virtual ones.
|
371
|
+
True: only stations with own data are returned.
|
372
|
+
The default is True.
|
373
|
+
stids: string or list of int, optional
|
374
|
+
The Stations to return.
|
375
|
+
Can either be "all", for all possible stations
|
376
|
+
or a list with the Station IDs.
|
377
|
+
The default is "all".
|
378
|
+
skip_missing_stids: bool, optional
|
379
|
+
Should the method skip the missing stations from input stids?
|
380
|
+
If False, then a ValueError is raised if a station is not found.
|
381
|
+
The default is False.
|
382
|
+
**kwargs : dict, optional
|
383
|
+
The additional keyword arguments aren't used in this method.
|
384
|
+
|
385
|
+
Returns
|
386
|
+
-------
|
387
|
+
Station-object
|
388
|
+
returns a list with the corresponding station objects.
|
389
|
+
|
390
|
+
Raises
|
391
|
+
------
|
392
|
+
ValueError
|
393
|
+
If the given stids (Station_IDs) are not all valid.
|
394
|
+
"""
|
395
|
+
meta = self.get_meta(
|
396
|
+
infos=["station_id"], only_real=only_real, stids=stids)
|
397
|
+
|
398
|
+
if isinstance(stids, str) and (stids == "all"):
|
399
|
+
stations = [
|
400
|
+
self._StationClass(stid, _skip_meta_check=True)
|
401
|
+
for stid in meta.index]
|
402
|
+
else:
|
403
|
+
stids = list(stids)
|
404
|
+
stations = [
|
405
|
+
self._StationClass(stid, _skip_meta_check=True)
|
406
|
+
for stid in meta.index
|
407
|
+
if stid in stids]
|
408
|
+
if (not skip_missing_stids) and (len(stations) != len(stids)):
|
409
|
+
stations_ids = [stat.id for stat in stations]
|
410
|
+
raise ValueError(
|
411
|
+
"It was not possible to create a {para_long} Station with the following IDs: {stids}".format(
|
412
|
+
para_long=self._para_long,
|
413
|
+
stids = ", ".join([str(stid) for stid in stids if stid not in stations_ids])
|
414
|
+
))
|
415
|
+
|
416
|
+
return stations
|
417
|
+
|
418
|
+
def get_quotient(self, kinds_num, kinds_denom, stids="all", return_as="df", **kwargs):
|
419
|
+
"""Get the quotient of multi-annual means of two different kinds or the timeserie and the multi annual raster value.
|
420
|
+
|
421
|
+
$quotient = \\overline{ts}_{kind_num} / \\overline{ts}_{denom}$
|
422
|
+
|
423
|
+
Parameters
|
424
|
+
----------
|
425
|
+
kinds_num : list of str or str
|
426
|
+
The timeseries kinds of the numerators.
|
427
|
+
Should be one of ['raw', 'qc', 'filled'].
|
428
|
+
For precipitation also "corr" is possible.
|
429
|
+
kinds_denom : list of str or str
|
430
|
+
The timeseries kinds of the denominator or the multi annual raster key.
|
431
|
+
If the denominator is a multi annual raster key, then the result is the quotient of the timeserie and the raster value.
|
432
|
+
Possible values are:
|
433
|
+
- for timeserie kinds: 'raw', 'qc', 'filled' or for precipitation also "corr".
|
434
|
+
- for raster keys: 'hyras', 'dwd' or 'regnie', depending on your defined raster files.
|
435
|
+
stids : list of Integer
|
436
|
+
The stations IDs for which to compute the quotient.
|
437
|
+
return_as : str, optional
|
438
|
+
The format of the return value.
|
439
|
+
If "df" then a pandas DataFrame is returned.
|
440
|
+
If "json" then a list with dictionaries is returned.
|
441
|
+
**kwargs : dict, optional
|
442
|
+
The additional keyword arguments are passed to the get_stations method.
|
443
|
+
|
444
|
+
Returns
|
445
|
+
-------
|
446
|
+
pandas.DataFrame or list of dict
|
447
|
+
The quotient of the two timeseries as DataFrame or list of dictionaries (JSON) depending on the return_as parameter.
|
448
|
+
The default is pd.DataFrame.
|
449
|
+
|
450
|
+
Raises
|
451
|
+
------
|
452
|
+
ValueError
|
453
|
+
If the input parameters were not correct.
|
454
|
+
"""
|
455
|
+
# check stids
|
456
|
+
if stids == "all":
|
457
|
+
stids = None
|
458
|
+
|
459
|
+
# check kinds
|
460
|
+
rast_keys = {"hyras", "regnie", "dwd"}
|
461
|
+
kinds_num = self._StationClass._check_kinds(kinds_num)
|
462
|
+
kinds_denom = self._StationClass._check_kinds(
|
463
|
+
kinds_denom,
|
464
|
+
valids=self._StationClass._valid_kinds | rast_keys)
|
465
|
+
|
466
|
+
# get quotient
|
467
|
+
with db_engine.connect() as con:
|
468
|
+
return _get_quotient(
|
469
|
+
con=con,
|
470
|
+
stids=stids,
|
471
|
+
paras=self._para,
|
472
|
+
kinds_num=kinds_num,
|
473
|
+
kinds_denom=kinds_denom,
|
474
|
+
return_as=return_as)
|
475
|
+
|
476
|
+
def count_holes(self, stids="all", **kwargs):
|
477
|
+
"""Count holes in timeseries depending on there length.
|
478
|
+
|
479
|
+
Parameters
|
480
|
+
----------
|
481
|
+
stids: string or list of int, optional
|
482
|
+
The Stations to return.
|
483
|
+
Can either be "all", for all possible stations
|
484
|
+
or a list with the Station IDs.
|
485
|
+
The default is "all".
|
486
|
+
**kwargs : dict, optional
|
487
|
+
**kwargs : dict, optional
|
488
|
+
This is a list of parameters, that is supported by the StationBase.count_holes method.
|
489
|
+
|
490
|
+
Furthermore the kwargs are passed to the get_stations method.
|
491
|
+
|
492
|
+
possible values are:
|
493
|
+
|
494
|
+
- weeks : list, optional
|
495
|
+
A list of hole length to count.
|
496
|
+
Every hole longer than the duration of weeks specified is counted.
|
497
|
+
The default is [2, 4, 8, 12, 16, 20, 24]
|
498
|
+
- kind : str
|
499
|
+
The kind of the timeserie to analyze.
|
500
|
+
Should be one of ['raw', 'qc', 'filled'].
|
501
|
+
For N also "corr" is possible.
|
502
|
+
Normally only "raw" and "qc" make sense, because the other timeseries should not have holes.
|
503
|
+
- period : TimestampPeriod or (tuple or list of datetime.datetime or None), optional
|
504
|
+
The minimum and maximum Timestamp for which to analyze the timeseries.
|
505
|
+
If None is given, the maximum and minimal possible Timestamp is taken.
|
506
|
+
The default is (None, None).
|
507
|
+
- between_meta_period : bool, optional
|
508
|
+
Only check between the respective period that is defined in the meta table.
|
509
|
+
If "qc" is chosen as kind, then the "raw" meta period is taken.
|
510
|
+
The default is True.
|
511
|
+
- crop_period : bool, optional
|
512
|
+
should the period get cropped to the maximum filled period.
|
513
|
+
This will result in holes being ignored when they are at the end or at the beginning of the timeserie.
|
514
|
+
If period = (None, None) is given, then this parameter is set to True.
|
515
|
+
The default is False.
|
516
|
+
|
517
|
+
Returns
|
518
|
+
-------
|
519
|
+
pandas.DataFrame
|
520
|
+
A Pandas Dataframe, with station_id as index and one column per week.
|
521
|
+
The numbers in the table are the amount of NA-periods longer than the respective amount of weeks.
|
522
|
+
|
523
|
+
Raises
|
524
|
+
------
|
525
|
+
ValueError
|
526
|
+
If the input parameters were not correct.
|
527
|
+
"""
|
528
|
+
# check input parameters
|
529
|
+
stations = self.get_stations(stids=stids, only_real=True, **kwargs)
|
530
|
+
|
531
|
+
# iter stations
|
532
|
+
first = True
|
533
|
+
for station in pb.progressbar(stations, line_breaks=False):
|
534
|
+
new_count = station.count_holes(**kwargs)
|
535
|
+
if first:
|
536
|
+
meta = new_count
|
537
|
+
first = False
|
538
|
+
else:
|
539
|
+
meta = pd.concat([meta, new_count], axis=0)
|
540
|
+
|
541
|
+
return meta
|
542
|
+
|
543
|
+
@staticmethod
|
544
|
+
def _get_progressbar(max_value, name):
|
545
|
+
pbar = pb.ProgressBar(
|
546
|
+
widgets=[
|
547
|
+
pb.widgets.RotatingMarker(),
|
548
|
+
" " + name ,
|
549
|
+
pb.widgets.Percentage(), ' ',
|
550
|
+
pb.widgets.SimpleProgress(
|
551
|
+
format=("('%(value_s)s/%(max_value_s)s')")), ' ',
|
552
|
+
pb.widgets.Bar(min_width=80), ' ',
|
553
|
+
pb.widgets.Timer(format='%(elapsed)s'), ' | ',
|
554
|
+
pb.widgets.ETA(),
|
555
|
+
pb.widgets.DynamicMessage(
|
556
|
+
"last_station",
|
557
|
+
format=", last id: {formatted_value}",
|
558
|
+
precision=4)
|
559
|
+
],
|
560
|
+
max_value=max_value,
|
561
|
+
variables={"last_station": "None"},
|
562
|
+
term_width=100,
|
563
|
+
is_terminal=True
|
564
|
+
)
|
565
|
+
pbar.update(0)
|
566
|
+
|
567
|
+
return pbar
|
568
|
+
|
569
|
+
def _run_method(self, stations, method, name, kwds=dict(),
|
570
|
+
do_mp=True, processes=mp.cpu_count()-1, **kwargs):
|
571
|
+
"""Run methods of the given stations objects in multiprocessing/threading mode.
|
572
|
+
|
573
|
+
Parameters
|
574
|
+
----------
|
575
|
+
stations : list of station objects
|
576
|
+
A list of station objects. Those must be children of the StationBase class.
|
577
|
+
method : str
|
578
|
+
The name of the method to call.
|
579
|
+
name : str
|
580
|
+
A descriptive name of the method to show in the progressbar.
|
581
|
+
kwds : dict
|
582
|
+
The keyword arguments to give to the methods
|
583
|
+
do_mp : bool, optional
|
584
|
+
Should the method be done in multiprocessing mode?
|
585
|
+
If False the methods will be called in threading mode.
|
586
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
587
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
588
|
+
The default is True.
|
589
|
+
processes : int, optional
|
590
|
+
The number of processes that should get started simultaneously.
|
591
|
+
If 1 or less, then the process is computed as a simple loop, so there is no multiprocessing or threading done.
|
592
|
+
The default is the cpu count -1.
|
593
|
+
"""
|
594
|
+
log.info(
|
595
|
+
f"{self._para_long} Stations async loop over method '{method}' started." +
|
596
|
+
"\n" +"-"*80
|
597
|
+
)
|
598
|
+
|
599
|
+
if processes<=1:
|
600
|
+
log.info(f"As the number of processes is 1 or lower, the method '{method}' is started as a simple loop.")
|
601
|
+
self._run_simple_loop(
|
602
|
+
stations=stations, method=method, name=name, kwds=kwds)
|
603
|
+
else:
|
604
|
+
# progressbar
|
605
|
+
num_stations = len(stations)
|
606
|
+
pbar = self._get_progressbar(max_value=num_stations, name=name)
|
607
|
+
|
608
|
+
# create pool
|
609
|
+
if do_mp:
|
610
|
+
try:
|
611
|
+
pool = mp.Pool(processes=processes)
|
612
|
+
log.debug("the multiprocessing Pool is started")
|
613
|
+
except AssertionError:
|
614
|
+
log.debug('daemonic processes are not allowed to have children, therefor threads are used')
|
615
|
+
pool = ThreadPool(processes=processes)
|
616
|
+
else:
|
617
|
+
log.debug("the threading Pool is started")
|
618
|
+
pool = ThreadPool(processes=processes)
|
619
|
+
|
620
|
+
# start processes
|
621
|
+
results = []
|
622
|
+
for stat in stations:
|
623
|
+
results.append(
|
624
|
+
pool.apply_async(
|
625
|
+
getattr(stat, method),
|
626
|
+
kwds=kwds))
|
627
|
+
pool.close()
|
628
|
+
|
629
|
+
# check results until all finished
|
630
|
+
finished = [False] * num_stations
|
631
|
+
while (True):
|
632
|
+
if all(finished):
|
633
|
+
break
|
634
|
+
|
635
|
+
for result in [result for i, result in enumerate(results)
|
636
|
+
if not finished[i] and result.ready()]:
|
637
|
+
index = results.index(result)
|
638
|
+
finished[index] = True
|
639
|
+
pbar.variables["last_station"] = stations[index].id
|
640
|
+
# get stdout and log
|
641
|
+
header = f"""The {name} of the {self._para_long} Station with ID {stations[index].id} finished with """
|
642
|
+
try:
|
643
|
+
stdout = result.get(10)
|
644
|
+
if stdout is not None:
|
645
|
+
log.debug(f"{header}stdout:\n{result.get(10)}")
|
646
|
+
except Exception:
|
647
|
+
log.error(f"{header}stderr:\n{traceback.format_exc()}")
|
648
|
+
|
649
|
+
pbar.update(sum(finished))
|
650
|
+
time.sleep(2)
|
651
|
+
|
652
|
+
pbar.update(sum(finished))
|
653
|
+
pool.join()
|
654
|
+
pool.terminate()
|
655
|
+
|
656
|
+
def _run_simple_loop(self, stations, method, name, kwds=dict()):
|
657
|
+
log.info("-"*79 +
|
658
|
+
"\n{para_long} Stations simple loop over method '{method}' started.".format(
|
659
|
+
para_long=self._para_long,
|
660
|
+
method=method
|
661
|
+
))
|
662
|
+
|
663
|
+
# progressbar
|
664
|
+
num_stations = len(stations)
|
665
|
+
pbar = self._get_progressbar(max_value=num_stations, name=name)
|
666
|
+
|
667
|
+
# start processes
|
668
|
+
for stat in stations:
|
669
|
+
getattr(stat, method)(**kwds)
|
670
|
+
pbar.variables["last_station"] = stat.id
|
671
|
+
pbar.update(pbar.value + 1)
|
672
|
+
|
673
|
+
@db_engine.deco_update_privilege
|
674
|
+
def update_raw(self, only_new=True, only_real=True, stids="all",
|
675
|
+
remove_nas=True, do_mp=True, **kwargs):
|
676
|
+
"""Download all stations data from CDC and upload to database.
|
677
|
+
|
678
|
+
Parameters
|
679
|
+
----------
|
680
|
+
only_new : bool, optional
|
681
|
+
Get only the files that are not yet in the database?
|
682
|
+
If False all the available files are loaded again.
|
683
|
+
The default is True
|
684
|
+
only_real: bool, optional
|
685
|
+
Whether only real stations are tried to download.
|
686
|
+
True: only stations with a date in raw_from in meta are downloaded.
|
687
|
+
The default is True.
|
688
|
+
stids: string or list of int, optional
|
689
|
+
The Stations to return.
|
690
|
+
Can either be "all", for all possible stations
|
691
|
+
or a list with the Station IDs.
|
692
|
+
The default is "all".
|
693
|
+
do_mp : bool, optional
|
694
|
+
Should the method be done in multiprocessing mode?
|
695
|
+
If False the methods will be called in threading mode.
|
696
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
697
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
698
|
+
The default is True.
|
699
|
+
remove_nas : bool, optional
|
700
|
+
Remove the NAs from the downloaded data before updating it to the database.
|
701
|
+
This has computational advantages.
|
702
|
+
The default is True.
|
703
|
+
**kwargs : dict, optional
|
704
|
+
The additional keyword arguments for the _run_method and get_stations method
|
705
|
+
|
706
|
+
Raises
|
707
|
+
------
|
708
|
+
ValueError
|
709
|
+
If the given stids (Station_IDs) are not all valid.
|
710
|
+
"""
|
711
|
+
start_tstp = datetime.datetime.now()
|
712
|
+
|
713
|
+
# get FTP file list
|
714
|
+
ftp_file_list = get_cdc_file_list(
|
715
|
+
ftp_folders=self._ftp_folders)
|
716
|
+
|
717
|
+
# run the tasks in multiprocessing mode
|
718
|
+
self._run_method(
|
719
|
+
stations=self.get_stations(only_real=only_real, stids=stids, **kwargs),
|
720
|
+
method="update_raw",
|
721
|
+
name="download raw {para} data".format(para=self._para.upper()),
|
722
|
+
kwds=dict(
|
723
|
+
only_new=only_new,
|
724
|
+
ftp_file_list=ftp_file_list,
|
725
|
+
remove_nas=remove_nas),
|
726
|
+
do_mp=do_mp, **kwargs)
|
727
|
+
|
728
|
+
# save start time as variable to db
|
729
|
+
do_update_period = isinstance(stids, str) and (stids == "all")
|
730
|
+
if not do_update_period and isinstance(stids, list):
|
731
|
+
all_stids = self.get_meta(["station_id"], stids="all", only_real=True).index
|
732
|
+
do_update_period = all([stid in stids for stid in all_stids])
|
733
|
+
|
734
|
+
if do_update_period:
|
735
|
+
with db_engine.connect() as con:
|
736
|
+
con.execute(sqltxt("""
|
737
|
+
INSERT INTO parameter_variables (parameter, start_tstp_last_imp, max_tstp_last_imp)
|
738
|
+
VALUES ('{para}',
|
739
|
+
'{start_tstp}'::timestamp,
|
740
|
+
(SELECT max(raw_until) FROM meta_{para}))
|
741
|
+
ON CONFLICT (parameter) DO UPDATE SET
|
742
|
+
start_tstp_last_imp=EXCLUDED.start_tstp_last_imp,
|
743
|
+
max_tstp_last_imp=EXCLUDED.max_tstp_last_imp;
|
744
|
+
""".format(
|
745
|
+
para=self._para,
|
746
|
+
start_tstp=start_tstp.strftime("%Y%m%d %H:%M"))))
|
747
|
+
con.commit()
|
748
|
+
|
749
|
+
@db_engine.deco_update_privilege
|
750
|
+
def last_imp_quality_check(self, stids="all", do_mp=False, **kwargs):
|
751
|
+
"""Do the quality check of the last import.
|
752
|
+
|
753
|
+
Parameters
|
754
|
+
----------
|
755
|
+
do_mp : bool, optional
|
756
|
+
Should the method be done in multiprocessing mode?
|
757
|
+
If False the methods will be called in threading mode.
|
758
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
759
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
760
|
+
The default is False.
|
761
|
+
stids: string or list of int, optional
|
762
|
+
The Stations for which to compute.
|
763
|
+
Can either be "all", for all possible stations
|
764
|
+
or a list with the Station IDs.
|
765
|
+
The default is "all".
|
766
|
+
**kwargs : dict, optional
|
767
|
+
The additional keyword arguments for the _run_method and get_stations method
|
768
|
+
"""
|
769
|
+
self._run_method(
|
770
|
+
stations=self.get_stations(only_real=True, stids=stids, **kwargs),
|
771
|
+
method="last_imp_quality_check",
|
772
|
+
name="quality check {para} data".format(para=self._para.upper()),
|
773
|
+
do_mp=do_mp, **kwargs)
|
774
|
+
|
775
|
+
@db_engine.deco_update_privilege
|
776
|
+
def last_imp_fillup(self, stids="all", do_mp=False, **kwargs):
|
777
|
+
"""Do the gap filling of the last import.
|
778
|
+
|
779
|
+
Parameters
|
780
|
+
----------
|
781
|
+
do_mp : bool, optional
|
782
|
+
Should the method be done in multiprocessing mode?
|
783
|
+
If False the methods will be called in threading mode.
|
784
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
785
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
786
|
+
The default is False.
|
787
|
+
stids: string or list of int, optional
|
788
|
+
The Stations for which to compute.
|
789
|
+
Can either be "all", for all possible stations
|
790
|
+
or a list with the Station IDs.
|
791
|
+
The default is "all".
|
792
|
+
**kwargs : dict, optional
|
793
|
+
The additional keyword arguments for the _run_method and get_stations method
|
794
|
+
"""
|
795
|
+
stations = self.get_stations(only_real=False, stids=stids, **kwargs)
|
796
|
+
period = stations[0].get_last_imp_period(all=True)
|
797
|
+
period_log = period.strftime("%Y-%m-%d %H:%M")
|
798
|
+
log.info("The {para_long} Stations fillup of the last import is started for the period {min_tstp} - {max_tstp}".format(
|
799
|
+
para_long=self._para_long,
|
800
|
+
min_tstp=period_log[0],
|
801
|
+
max_tstp=period_log[1]))
|
802
|
+
self._run_method(
|
803
|
+
stations=stations,
|
804
|
+
method="last_imp_fillup",
|
805
|
+
name="fillup {para} data".format(para=self._para.upper()),
|
806
|
+
kwds=dict(_last_imp_period=period),
|
807
|
+
do_mp=do_mp,
|
808
|
+
**kwargs)
|
809
|
+
|
810
|
+
@db_engine.deco_update_privilege
|
811
|
+
def quality_check(self, period=(None, None), only_real=True, stids="all",
|
812
|
+
do_mp=False, **kwargs):
|
813
|
+
"""Quality check the raw data for a given period.
|
814
|
+
|
815
|
+
Parameters
|
816
|
+
----------
|
817
|
+
period : tuple or list of datetime.datetime or None, optional
|
818
|
+
The minimum and maximum Timestamp for which to get the timeseries.
|
819
|
+
If None is given, the maximum or minimal possible Timestamp is taken.
|
820
|
+
The default is (None, None).
|
821
|
+
stids: string or list of int, optional
|
822
|
+
The Stations for which to compute.
|
823
|
+
Can either be "all", for all possible stations
|
824
|
+
or a list with the Station IDs.
|
825
|
+
The default is "all".
|
826
|
+
do_mp : bool, optional
|
827
|
+
Should the method be done in multiprocessing mode?
|
828
|
+
If False the methods will be called in threading mode.
|
829
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
830
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
831
|
+
The default is False.
|
832
|
+
**kwargs : dict, optional
|
833
|
+
The additional keyword arguments for the _run_method and get_stations method
|
834
|
+
"""
|
835
|
+
self._run_method(
|
836
|
+
stations=self.get_stations(only_real=only_real, stids=stids, **kwargs),
|
837
|
+
method="quality_check",
|
838
|
+
name="quality check {para} data".format(para=self._para.upper()),
|
839
|
+
kwds=dict(period=period),
|
840
|
+
do_mp=do_mp,
|
841
|
+
**kwargs)
|
842
|
+
|
843
|
+
@db_engine.deco_update_privilege
|
844
|
+
def update_ma_raster(self, stids="all", do_mp=False, **kwargs):
|
845
|
+
"""Update the multi annual raster values for the stations.
|
846
|
+
|
847
|
+
Get a multi annual value from the corresponding raster and save to the multi annual table in the database.
|
848
|
+
|
849
|
+
Parameters
|
850
|
+
----------
|
851
|
+
stids: string or list of int, optional
|
852
|
+
The Stations for which to compute.
|
853
|
+
Can either be "all", for all possible stations
|
854
|
+
or a list with the Station IDs.
|
855
|
+
The default is "all".
|
856
|
+
do_mp : bool, optional
|
857
|
+
Should the method be done in multiprocessing mode?
|
858
|
+
If False the methods will be called in threading mode.
|
859
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
860
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
861
|
+
The default is False.
|
862
|
+
**kwargs : dict, optional
|
863
|
+
The additional keyword arguments for the _run_method and get_stations method
|
864
|
+
|
865
|
+
Raises
|
866
|
+
------
|
867
|
+
ValueError
|
868
|
+
If the given stids (Station_IDs) are not all valid.
|
869
|
+
"""
|
870
|
+
self._run_method(
|
871
|
+
stations=self.get_stations(only_real=False, stids=stids, **kwargs),
|
872
|
+
method="update_ma_raster",
|
873
|
+
name="update ma-raster-values for {para}".format(para=self._para.upper()),
|
874
|
+
do_mp=do_mp,
|
875
|
+
**kwargs)
|
876
|
+
|
877
|
+
@db_engine.deco_update_privilege
|
878
|
+
def update_ma_timeseries(self, kind, stids="all", do_mp=False, **kwargs):
|
879
|
+
"""Update the multi annual timeseries values for the stations.
|
880
|
+
|
881
|
+
Get a multi annual value from the corresponding timeseries and save to the database.
|
882
|
+
|
883
|
+
Parameters
|
884
|
+
----------
|
885
|
+
kind : str or list of str
|
886
|
+
The timeseries data kind to update theire multi annual value.
|
887
|
+
Must be a column in the timeseries DB.
|
888
|
+
Must be one of "raw", "qc", "filled".
|
889
|
+
For the precipitation also "corr" is valid.
|
890
|
+
stids: string or list of int, optional
|
891
|
+
The Stations for which to compute.
|
892
|
+
Can either be "all", for all possible stations
|
893
|
+
or a list with the Station IDs.
|
894
|
+
The default is "all".
|
895
|
+
do_mp : bool, optional
|
896
|
+
Should the method be done in multiprocessing mode?
|
897
|
+
If False the methods will be called in threading mode.
|
898
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
899
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
900
|
+
The default is False.
|
901
|
+
**kwargs : dict, optional
|
902
|
+
The additional keyword arguments for the _run_method and get_stations method
|
903
|
+
|
904
|
+
Raises
|
905
|
+
------
|
906
|
+
ValueError
|
907
|
+
If the given stids (Station_IDs) are not all valid.
|
908
|
+
"""
|
909
|
+
self._run_method(
|
910
|
+
stations=self.get_stations(only_real=False, stids=stids, **kwargs),
|
911
|
+
method="update_ma_timeseries",
|
912
|
+
name="update ma-ts-values for {para}".format(para=self._para.upper()),
|
913
|
+
do_mp=do_mp,
|
914
|
+
kwds=dict(kind=kind),
|
915
|
+
**kwargs)
|
916
|
+
|
917
|
+
@db_engine.deco_update_privilege
|
918
|
+
def fillup(self, only_real=False, stids="all", do_mp=False, **kwargs):
|
919
|
+
"""Fill up the quality checked data with data from nearby stations to get complete timeseries.
|
920
|
+
|
921
|
+
Parameters
|
922
|
+
----------
|
923
|
+
only_real: bool, optional
|
924
|
+
Whether only real stations are computed or also virtual ones.
|
925
|
+
True: only stations with own data are returned.
|
926
|
+
The default is True.
|
927
|
+
stids: string or list of int, optional
|
928
|
+
The Stations for which to compute.
|
929
|
+
Can either be "all", for all possible stations
|
930
|
+
or a list with the Station IDs.
|
931
|
+
The default is "all".
|
932
|
+
do_mp : bool, optional
|
933
|
+
Should the method be done in multiprocessing mode?
|
934
|
+
If False the methods will be called in threading mode.
|
935
|
+
Multiprocessing needs more memory and a bit more initiating time. Therefor it is only usefull for methods with a lot of computation effort in the python code.
|
936
|
+
If the most computation of a method is done in the postgresql database, then threading is enough to speed the process up.
|
937
|
+
The default is False.
|
938
|
+
**kwargs : dict, optional
|
939
|
+
The additional keyword arguments for the _run_method and get_stations method
|
940
|
+
|
941
|
+
Raises
|
942
|
+
------
|
943
|
+
ValueError
|
944
|
+
If the given stids (Station_IDs) are not all valid.
|
945
|
+
"""
|
946
|
+
self._run_method(
|
947
|
+
stations=self.get_stations(only_real=only_real, stids=stids, **kwargs),
|
948
|
+
method="fillup",
|
949
|
+
name="fillup {para} data".format(para=self._para.upper()),
|
950
|
+
do_mp=do_mp,
|
951
|
+
**kwargs)
|
952
|
+
|
953
|
+
@db_engine.deco_update_privilege
|
954
|
+
def update(self, only_new=True, **kwargs):
|
955
|
+
"""Make a complete update of the stations.
|
956
|
+
|
957
|
+
Does the update_raw, quality check and fillup of the stations.
|
958
|
+
|
959
|
+
Parameters
|
960
|
+
----------
|
961
|
+
only_new : bool, optional
|
962
|
+
Should a only new values be computed?
|
963
|
+
If False: The stations are updated for the whole possible period.
|
964
|
+
If True, the stations are only updated for new values.
|
965
|
+
The default is True.
|
966
|
+
"""
|
967
|
+
self.update_raw(only_new=only_new, **kwargs)
|
968
|
+
if only_new:
|
969
|
+
self.last_imp_quality_check(**kwargs)
|
970
|
+
self.last_imp_fillup(**kwargs)
|
971
|
+
else:
|
972
|
+
self.quality_check(**kwargs)
|
973
|
+
self.fillup(**kwargs)
|
974
|
+
|
975
|
+
def get_df(self, stids, **kwargs):
|
976
|
+
"""Get a DataFrame with the corresponding data.
|
977
|
+
|
978
|
+
Parameters
|
979
|
+
----------
|
980
|
+
stids: string or list of int, optional
|
981
|
+
The Stations for which to compute.
|
982
|
+
Can either be "all", for all possible stations
|
983
|
+
or a list with the Station IDs.
|
984
|
+
The default is "all".
|
985
|
+
**kwargs: optional keyword arguments
|
986
|
+
Those keyword arguments are passed to the get_df function of the station class.
|
987
|
+
Possible parameters are period, agg_to, kinds.
|
988
|
+
Furthermore the kwargs are passed to the get_stations method.
|
989
|
+
|
990
|
+
Returns
|
991
|
+
-------
|
992
|
+
pd.Dataframe
|
993
|
+
A DataFrame with the timeseries for the selected stations, kind(s) and the given period.
|
994
|
+
If multiple columns are selected, the columns in this DataFrame is a MultiIndex with the station IDs as first level and the kind as second level.
|
995
|
+
"""
|
996
|
+
if "kinds" in kwargs and "kind" in kwargs:
|
997
|
+
raise ValueError("Either enter kind or kinds, not both.")
|
998
|
+
if "kind" in kwargs:
|
999
|
+
kinds=[kwargs.pop("kind")]
|
1000
|
+
else:
|
1001
|
+
kinds=kwargs.pop("kinds")
|
1002
|
+
kwargs.update(dict(only_real=kwargs.get("only_real", False)))
|
1003
|
+
stats = self.get_stations(stids=stids, **kwargs)
|
1004
|
+
df_all = None
|
1005
|
+
for stat in pb.progressbar(stats, line_breaks=False):
|
1006
|
+
df = stat.get_df(kinds=kinds, **kwargs)
|
1007
|
+
if df is None:
|
1008
|
+
warnings.warn(
|
1009
|
+
f"There was no data for {stat._para_long} station {stat.id}!")
|
1010
|
+
continue
|
1011
|
+
if len(df.columns) == 1:
|
1012
|
+
df.rename(
|
1013
|
+
dict(zip(df.columns, [stat.id])),
|
1014
|
+
axis=1, inplace=True)
|
1015
|
+
else:
|
1016
|
+
df.columns = pd.MultiIndex.from_product(
|
1017
|
+
[[stat.id], df.columns],
|
1018
|
+
names=["Station ID", "kind"])
|
1019
|
+
df_all = pd.concat([df_all, df], axis=1)
|
1020
|
+
|
1021
|
+
return df_all
|