weatherdb 1.1.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- docker/Dockerfile +30 -0
- docker/docker-compose.yaml +58 -0
- docker/docker-compose_test.yaml +24 -0
- docker/start-docker-test.sh +6 -0
- docs/requirements.txt +10 -0
- docs/source/Changelog.md +2 -0
- docs/source/License.rst +7 -0
- docs/source/Methode.md +161 -0
- docs/source/_static/custom.css +8 -0
- docs/source/_static/favicon.ico +0 -0
- docs/source/_static/logo.png +0 -0
- docs/source/api/api.rst +15 -0
- docs/source/api/cli.rst +8 -0
- docs/source/api/weatherDB.broker.rst +10 -0
- docs/source/api/weatherDB.config.rst +7 -0
- docs/source/api/weatherDB.db.rst +23 -0
- docs/source/api/weatherDB.rst +22 -0
- docs/source/api/weatherDB.station.rst +56 -0
- docs/source/api/weatherDB.stations.rst +46 -0
- docs/source/api/weatherDB.utils.rst +22 -0
- docs/source/conf.py +137 -0
- docs/source/index.rst +33 -0
- docs/source/setup/Configuration.md +127 -0
- docs/source/setup/Hosting.md +9 -0
- docs/source/setup/Install.md +49 -0
- docs/source/setup/Quickstart.md +183 -0
- docs/source/setup/setup.rst +12 -0
- weatherdb/__init__.py +24 -0
- weatherdb/_version.py +1 -0
- weatherdb/alembic/README.md +8 -0
- weatherdb/alembic/alembic.ini +80 -0
- weatherdb/alembic/config.py +9 -0
- weatherdb/alembic/env.py +100 -0
- weatherdb/alembic/script.py.mako +26 -0
- weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
- weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
- weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
- weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
- weatherdb/broker.py +667 -0
- weatherdb/cli.py +214 -0
- weatherdb/config/ConfigParser.py +663 -0
- weatherdb/config/__init__.py +5 -0
- weatherdb/config/config_default.ini +162 -0
- weatherdb/db/__init__.py +3 -0
- weatherdb/db/connections.py +374 -0
- weatherdb/db/fixtures/RichterParameters.json +34 -0
- weatherdb/db/models.py +402 -0
- weatherdb/db/queries/get_quotient.py +155 -0
- weatherdb/db/views.py +165 -0
- weatherdb/station/GroupStation.py +710 -0
- weatherdb/station/StationBases.py +3108 -0
- weatherdb/station/StationET.py +111 -0
- weatherdb/station/StationP.py +807 -0
- weatherdb/station/StationPD.py +98 -0
- weatherdb/station/StationT.py +164 -0
- weatherdb/station/__init__.py +13 -0
- weatherdb/station/constants.py +21 -0
- weatherdb/stations/GroupStations.py +519 -0
- weatherdb/stations/StationsBase.py +1021 -0
- weatherdb/stations/StationsBaseTET.py +30 -0
- weatherdb/stations/StationsET.py +17 -0
- weatherdb/stations/StationsP.py +128 -0
- weatherdb/stations/StationsPD.py +24 -0
- weatherdb/stations/StationsT.py +21 -0
- weatherdb/stations/__init__.py +11 -0
- weatherdb/utils/TimestampPeriod.py +369 -0
- weatherdb/utils/__init__.py +3 -0
- weatherdb/utils/dwd.py +350 -0
- weatherdb/utils/geometry.py +69 -0
- weatherdb/utils/get_data.py +285 -0
- weatherdb/utils/logging.py +126 -0
- weatherdb-1.1.0.dist-info/LICENSE +674 -0
- weatherdb-1.1.0.dist-info/METADATA +765 -0
- weatherdb-1.1.0.dist-info/RECORD +77 -0
- weatherdb-1.1.0.dist-info/WHEEL +5 -0
- weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
- weatherdb-1.1.0.dist-info/top_level.txt +3 -0
weatherdb/utils/dwd.py
ADDED
@@ -0,0 +1,350 @@
|
|
1
|
+
"""
|
2
|
+
Some utilities functions to get data from the DWD-CDC server.
|
3
|
+
|
4
|
+
Based on `max_fun` package on https://github.com/maxschmi/max_fun
|
5
|
+
Created by Max Schmit, 2021
|
6
|
+
"""
|
7
|
+
# libraries
|
8
|
+
import dateutil
|
9
|
+
import ftplib
|
10
|
+
import pathlib
|
11
|
+
import geopandas as gpd
|
12
|
+
import pandas as pd
|
13
|
+
from zipfile import ZipFile
|
14
|
+
import re
|
15
|
+
from io import BytesIO, StringIO
|
16
|
+
import traceback
|
17
|
+
import logging
|
18
|
+
import time
|
19
|
+
import random
|
20
|
+
|
21
|
+
# DWD - CDC FTP Server
|
22
|
+
CDC_HOST = "opendata.dwd.de"
|
23
|
+
|
24
|
+
# logger
|
25
|
+
log = logging.getLogger(__name__)
|
26
|
+
|
27
|
+
# basic functions
|
28
|
+
# ----------------
|
29
|
+
def dwd_id_to_str(id):
|
30
|
+
"""
|
31
|
+
Convert a station id to normal DWD format as str.
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
id : int or str
|
36
|
+
The id of the station.
|
37
|
+
|
38
|
+
Returns
|
39
|
+
-------
|
40
|
+
str
|
41
|
+
string of normal DWD Station id.
|
42
|
+
|
43
|
+
"""
|
44
|
+
return f"{id:0>5}"
|
45
|
+
|
46
|
+
def _dwd_date_parser(date_ser):
|
47
|
+
"""
|
48
|
+
Parse the dates from a DWD table to datetime.
|
49
|
+
|
50
|
+
Parameters
|
51
|
+
----------
|
52
|
+
date_ser : pd.Series of str or str
|
53
|
+
the string from the DWD table. e.g. "20200101" or "2020010112"
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
datetime.datetime
|
58
|
+
The date as datetime.
|
59
|
+
|
60
|
+
"""
|
61
|
+
if not isinstance(date_ser, pd.Series):
|
62
|
+
raise ValueError("date_str must be a pd.Series of str")
|
63
|
+
|
64
|
+
# test if list or single str
|
65
|
+
char_num = len(date_ser.iloc[0])
|
66
|
+
|
67
|
+
# parse to correct datetime
|
68
|
+
if char_num == 8:
|
69
|
+
return pd.to_datetime(date_ser, format='%Y%m%d')
|
70
|
+
elif char_num == 10:
|
71
|
+
return pd.to_datetime(date_ser, format='%Y%m%d%H')
|
72
|
+
elif char_num == 12:
|
73
|
+
return pd.to_datetime(date_ser, format='%Y%m%d%H%M')
|
74
|
+
else:
|
75
|
+
raise ValueError("there was an error while converting the following to a correct datetime"+
|
76
|
+
date_ser.head())
|
77
|
+
|
78
|
+
# functions
|
79
|
+
# ---------
|
80
|
+
def get_ftp_file_list(ftp_conn, ftp_folders):
|
81
|
+
"""Get a list of files in the folders with their modification dates.
|
82
|
+
|
83
|
+
Parameters
|
84
|
+
----------
|
85
|
+
ftp_conn : ftplib.FTP
|
86
|
+
Ftp connection.
|
87
|
+
ftp_folders : list of str or pathlike object
|
88
|
+
The directories on the ftp server to look for files.
|
89
|
+
|
90
|
+
Returns
|
91
|
+
-------
|
92
|
+
list of tuples of strs
|
93
|
+
A list of Tuples. Every tuple stands for one file.
|
94
|
+
The tuple consists of (filepath, modification date).
|
95
|
+
"""
|
96
|
+
# check types
|
97
|
+
if isinstance(ftp_folders, str):
|
98
|
+
ftp_folders = [ftp_folders]
|
99
|
+
for i, ftp_folder in enumerate(ftp_folders):
|
100
|
+
if isinstance(ftp_folder, pathlib.Path):
|
101
|
+
ftp_folders[i] = ftp_folder.as_posix()
|
102
|
+
|
103
|
+
try:
|
104
|
+
ftp_conn.voidcmd("NOOP")
|
105
|
+
except ftplib.all_errors:
|
106
|
+
ftp_conn.connect()
|
107
|
+
|
108
|
+
# get files and modification dates
|
109
|
+
files = []
|
110
|
+
for ftp_folder in ftp_folders:
|
111
|
+
lines = []
|
112
|
+
ftp_conn.dir(ftp_folder, lines.append)
|
113
|
+
for line in lines:
|
114
|
+
parts = line.split(maxsplit=9)
|
115
|
+
filepath = ftp_folder + parts[8]
|
116
|
+
modtime = dateutil.parser.parse(parts[5] + " " + parts[6] + " " + parts[7])
|
117
|
+
files.append((filepath, modtime))
|
118
|
+
|
119
|
+
return files
|
120
|
+
|
121
|
+
def get_cdc_file_list(ftp_folders):
|
122
|
+
with ftplib.FTP(CDC_HOST) as ftp_con:
|
123
|
+
ftp_con.login()
|
124
|
+
files = get_ftp_file_list(ftp_con, ftp_folders)
|
125
|
+
return files
|
126
|
+
|
127
|
+
def get_dwd_file(zip_filepath):
|
128
|
+
"""
|
129
|
+
Get a DataFrame from one single (zip-)file from the DWD FTP server.
|
130
|
+
|
131
|
+
Parameters
|
132
|
+
----------
|
133
|
+
zip_filepath : str
|
134
|
+
Path to the file on the server. e.g.
|
135
|
+
- "/climate_environment/CDC/observations_germany/climate/10_minutes/air_temperature/recent/10minutenwerte_TU_00044_akt.zip"
|
136
|
+
- "/climate_environment/CDC/derived_germany/soil/daily/historical/derived_germany_soil_daily_historical_73.txt.gz"
|
137
|
+
|
138
|
+
Returns
|
139
|
+
-------
|
140
|
+
pandas.DataFrame
|
141
|
+
The DataFrame of the selected file in the zip folder.
|
142
|
+
|
143
|
+
"""
|
144
|
+
# get the compressed folder from dwd
|
145
|
+
with ftplib.FTP(CDC_HOST) as ftp:
|
146
|
+
ftp.login()
|
147
|
+
|
148
|
+
# download file
|
149
|
+
compressed_bin = BytesIO()
|
150
|
+
num_tried = 0
|
151
|
+
while num_tried < 10:
|
152
|
+
try:
|
153
|
+
ftp.retrbinary("RETR " + zip_filepath, compressed_bin.write)
|
154
|
+
break
|
155
|
+
except Exception as e:
|
156
|
+
if num_tried < 9:
|
157
|
+
num_tried += 1
|
158
|
+
time.sleep(random.randint(0,400)/100)
|
159
|
+
else:
|
160
|
+
raise e
|
161
|
+
|
162
|
+
# check folder to be derived or observation type import the data
|
163
|
+
if re.search("observations", zip_filepath):
|
164
|
+
# get zip folder and files
|
165
|
+
compressed_folder = ZipFile(compressed_bin)
|
166
|
+
compressed_folder_files = compressed_folder.namelist()
|
167
|
+
|
168
|
+
# test if one and only one file matches the pattern
|
169
|
+
files = list(filter(re.compile("produkt").search,
|
170
|
+
compressed_folder_files))
|
171
|
+
|
172
|
+
if len(files) == 0:
|
173
|
+
raise ValueError(
|
174
|
+
"There is no file matching the pattern: produkt " +
|
175
|
+
"in the zip files: \n- " +
|
176
|
+
"\n- ".join(compressed_folder_files))
|
177
|
+
elif len(files) > 1:
|
178
|
+
raise ValueError(
|
179
|
+
"There are more than one files matching the " +
|
180
|
+
"pattern: produkt\nin the zip file: " +
|
181
|
+
str(zip_filepath) +
|
182
|
+
"\nonly the first file is returned: " +
|
183
|
+
str(files[0]))
|
184
|
+
|
185
|
+
# extract the file from the zip folder and return it as pd.DataFrame
|
186
|
+
with compressed_folder.open(files[0]) as f:
|
187
|
+
df = pd.read_table(f, sep=";",
|
188
|
+
dtype={"Datum":str, "MESS_DATUM":str},
|
189
|
+
skipinitialspace=True,
|
190
|
+
na_values=[-999, -9999, "####", "#####", "######"])
|
191
|
+
|
192
|
+
elif re.search("derived", zip_filepath):
|
193
|
+
df = pd.read_table(f"ftp://{CDC_HOST}/{zip_filepath}",
|
194
|
+
compression="gzip",
|
195
|
+
sep=";",
|
196
|
+
skipinitialspace=True,
|
197
|
+
dtype={"Datum":str, "MESS_DATUM":str},
|
198
|
+
na_values=[-999, -9999, "####", "#####", "######"])
|
199
|
+
else:
|
200
|
+
raise ImportError("ERROR: No file could be imported, as there is " +
|
201
|
+
"just a setup for observation and derived datas")
|
202
|
+
|
203
|
+
# convert dates to datetime
|
204
|
+
for col in ["MESS_DATUM", "Datum"]:
|
205
|
+
if col in df.columns:
|
206
|
+
df[col] = _dwd_date_parser(df[col])
|
207
|
+
|
208
|
+
return df
|
209
|
+
|
210
|
+
def get_dwd_meta(ftp_folder):
|
211
|
+
"""
|
212
|
+
Get the meta file from the ftp_folder on the DWD server.
|
213
|
+
|
214
|
+
Downloads the meta file of a given folder.
|
215
|
+
Corrects the meta file of missing files. So if no file for the station is
|
216
|
+
in the folder the meta entry gets deleted.
|
217
|
+
Reset "von_datum" in meta file if there is a biger gap than max_hole_d.
|
218
|
+
Delets entries with less years than min_years.
|
219
|
+
|
220
|
+
Parameters
|
221
|
+
----------
|
222
|
+
ftp_folder : str
|
223
|
+
The path to the directory where to search for the meta file.
|
224
|
+
e.g. "climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/".
|
225
|
+
|
226
|
+
Returns
|
227
|
+
-------
|
228
|
+
geopandas.GeoDataFrame
|
229
|
+
a GeoDataFrame of the meta file
|
230
|
+
|
231
|
+
"""
|
232
|
+
# open ftp connection and get list of files in folder
|
233
|
+
with ftplib.FTP(CDC_HOST) as ftp:
|
234
|
+
ftp.login()
|
235
|
+
|
236
|
+
# get and check the meta_file name
|
237
|
+
ftp_files = ftp.nlst(ftp_folder)
|
238
|
+
pattern = r".*(?<!_mn4)((_stations_list)|(_Beschreibung_Stationen))+.txt$"
|
239
|
+
meta_file = list(filter(re.compile(pattern).match, ftp_files))
|
240
|
+
|
241
|
+
if len(meta_file) == 0:
|
242
|
+
log.info(
|
243
|
+
f"There is no file matching the pattern '{pattern}'"+
|
244
|
+
f"\nin the folder: ftp://{CDC_HOST}/{str(ftp_folder)}")
|
245
|
+
return None
|
246
|
+
elif len(meta_file) > 1:
|
247
|
+
log.info(
|
248
|
+
f"There are more than one files matching the pattern: {pattern}" +
|
249
|
+
f" in the folder:\nftp://{CDC_HOST}/{str(ftp_folder)}" +
|
250
|
+
f"\nonly the first file is returned: {meta_file[0]}")
|
251
|
+
|
252
|
+
# import meta file
|
253
|
+
try:
|
254
|
+
if re.search("observations", ftp_folder):
|
255
|
+
with ftplib.FTP(CDC_HOST) as ftp:
|
256
|
+
ftp.login()
|
257
|
+
with BytesIO() as bio, StringIO() as sio:
|
258
|
+
ftp.retrbinary("RETR " + meta_file[0], bio.write)
|
259
|
+
sio.write(bio.getvalue().decode("WINDOWS-1252").replace("\r\n", "\n"))
|
260
|
+
colnames = sio.getvalue().split("\n")[0].split()
|
261
|
+
sio.seek(0)
|
262
|
+
meta = pd.read_table(
|
263
|
+
sio,
|
264
|
+
skiprows=2,
|
265
|
+
lineterminator="\n",
|
266
|
+
sep=r"\s{2,}|(?<=\d|\))\s{1}(?=[\w])", # two or more white spaces or one space after digit and followed by word
|
267
|
+
names=colnames,
|
268
|
+
parse_dates=[col for col in colnames if "datum" in col.lower()],
|
269
|
+
index_col="Stations_id",
|
270
|
+
engine="python")
|
271
|
+
elif re.search("derived", ftp_folder):
|
272
|
+
meta = pd.read_table("ftp://opendata.dwd.de/" + meta_file[0],
|
273
|
+
encoding="WINDOWS-1252", sep=";", skiprows=1,
|
274
|
+
names=["Stations_id", "Stationshoehe",
|
275
|
+
"geoBreite", "geoLaenge",
|
276
|
+
"Stationsname", "Bundesland"],
|
277
|
+
index_col="Stations_id"
|
278
|
+
)
|
279
|
+
except:
|
280
|
+
traceback.print_exc()
|
281
|
+
print("URL Error: The URL could not be found:\n" +
|
282
|
+
"ftp://opendata.dwd.de/" + meta_file[0])
|
283
|
+
return None
|
284
|
+
|
285
|
+
try:
|
286
|
+
meta = gpd.GeoDataFrame(meta,
|
287
|
+
geometry=gpd.points_from_xy(meta.geoLaenge,
|
288
|
+
meta.geoBreite,
|
289
|
+
crs="EPSG:4326"))
|
290
|
+
meta = meta.drop(["geoLaenge", "geoBreite"], axis=1)
|
291
|
+
except:
|
292
|
+
traceback.print_exc()
|
293
|
+
print("Error while converting DataFrame to GeoDataFrame," +
|
294
|
+
" maybe the columns aren't named 'geoLaenge' and geoBreite'" +
|
295
|
+
"\nhere is the header of the DataFrame:\n")
|
296
|
+
print(meta.head())
|
297
|
+
return None
|
298
|
+
|
299
|
+
# delete entries where there is no file in the ftp-folder
|
300
|
+
rows_drop = []
|
301
|
+
str_ftp_files = str(ftp_files)
|
302
|
+
for i, row in meta.iterrows():
|
303
|
+
if not (re.search(r"[_\.]" + dwd_id_to_str(i) + r"[_\.]|" +
|
304
|
+
r"[_\.]" + str(i) + r"[_\.]", str_ftp_files)):
|
305
|
+
rows_drop.append(i)
|
306
|
+
meta = meta.drop(rows_drop)
|
307
|
+
|
308
|
+
# change meta date entries if the file has a different date
|
309
|
+
if ("observation" in ftp_folder) \
|
310
|
+
and ("bis_datum" and "von_datum" in meta) \
|
311
|
+
and ("recent" not in ftp_folder):
|
312
|
+
zip_files = list(filter(re.compile(r".+\d+_\d+_\d+_hist.zip").match,
|
313
|
+
ftp_files))
|
314
|
+
zip_files.sort()
|
315
|
+
zip_files.append(zip_files[0]) # else the last entry won't get tested
|
316
|
+
last_sid, last_from_date, last_to_date = None, None, None
|
317
|
+
|
318
|
+
for zip_file in zip_files:
|
319
|
+
# get new files dates
|
320
|
+
filename = zip_file.split("/")[-1]
|
321
|
+
_, kind, sid, from_date, to_date, _ = filename.split("_")
|
322
|
+
if kind in ["niedereder"]:
|
323
|
+
continue
|
324
|
+
from_date = pd.Timestamp(from_date)
|
325
|
+
to_date = pd.Timestamp(to_date)
|
326
|
+
sid = int(sid)
|
327
|
+
|
328
|
+
# compare with previous file's dates
|
329
|
+
if last_sid and (sid == last_sid):
|
330
|
+
last_to_date = to_date
|
331
|
+
else:
|
332
|
+
# compare last values with meta file dates
|
333
|
+
if last_sid and (last_sid in meta.index):
|
334
|
+
if last_from_date > meta.loc[last_sid, "von_datum"]:
|
335
|
+
meta.loc[last_sid, "von_datum"] = last_from_date
|
336
|
+
if last_to_date < meta.loc[last_sid, "bis_datum"]:
|
337
|
+
meta.loc[last_sid, "bis_datum"] = last_to_date
|
338
|
+
|
339
|
+
# set values as last values
|
340
|
+
last_to_date = to_date
|
341
|
+
last_from_date = from_date
|
342
|
+
last_sid = sid
|
343
|
+
|
344
|
+
# trim whitespace in string columns
|
345
|
+
for dtype, col in zip(meta.dtypes, meta.columns):
|
346
|
+
if pd.api.types.is_string_dtype(dtype) and col != "geometry":
|
347
|
+
meta[col] = meta[col].str.strip()
|
348
|
+
|
349
|
+
# return
|
350
|
+
return meta
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""A collection of geometry functions.
|
4
|
+
|
5
|
+
Based on `max_fun` package on https://github.com/maxschmi/max_fun
|
6
|
+
Created by Max Schmit, 2021
|
7
|
+
"""
|
8
|
+
# libraries
|
9
|
+
import numpy as np
|
10
|
+
from shapely.geometry import Point, LineString
|
11
|
+
import geopandas as gpd
|
12
|
+
import rasterio as rio
|
13
|
+
|
14
|
+
# functions
|
15
|
+
def polar_line(center_xy, radius, angle):
|
16
|
+
"""Create a LineString with polar coodinates.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
center_xy : list, array or tuple of int or floats
|
21
|
+
The X and Y coordinates of the center.
|
22
|
+
radius : int or float
|
23
|
+
The radius of the circle.
|
24
|
+
angle : int
|
25
|
+
The angle of the portion of the circle in degrees.
|
26
|
+
0 means east.
|
27
|
+
|
28
|
+
Returns
|
29
|
+
-------
|
30
|
+
shapely.geometry.LineString
|
31
|
+
LineString.
|
32
|
+
"""
|
33
|
+
coords = [center_xy]
|
34
|
+
coords.append([
|
35
|
+
center_xy[0] + np.cos(np.deg2rad(angle)) * radius,
|
36
|
+
center_xy[1] + np.sin(np.deg2rad(angle)) * radius
|
37
|
+
])
|
38
|
+
|
39
|
+
return LineString(coords)
|
40
|
+
|
41
|
+
def raster2points(raster_np, transform, crs=None):
|
42
|
+
"""Polygonize raster array to GeoDataFrame.
|
43
|
+
|
44
|
+
Until now this only works for rasters with one band.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
raster_np : np.array
|
49
|
+
The imported raster array.
|
50
|
+
transform : rio.Affine
|
51
|
+
The Affine transformation of the raster.
|
52
|
+
crs : str or crs-type, optional
|
53
|
+
The coordinate reference system for the raster, by default None
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
geopandas.GeoDataFrame
|
58
|
+
The raster Data is in the data column.
|
59
|
+
"""
|
60
|
+
mask = ~np.isnan(raster_np[0])
|
61
|
+
cols, rows = mask.nonzero()
|
62
|
+
coords = rio.transform.xy(transform, cols, rows)
|
63
|
+
|
64
|
+
geoms = [Point(xy) for xy in list(zip(*coords))]
|
65
|
+
|
66
|
+
return gpd.GeoDataFrame(
|
67
|
+
{"data": raster_np[0][mask]},
|
68
|
+
geometry=geoms,
|
69
|
+
crs=crs)
|