weatherdb 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docker/Dockerfile +30 -0
- docker/docker-compose.yaml +58 -0
- docker/docker-compose_test.yaml +24 -0
- docker/start-docker-test.sh +6 -0
- docs/requirements.txt +10 -0
- docs/source/Changelog.md +2 -0
- docs/source/License.rst +7 -0
- docs/source/Methode.md +161 -0
- docs/source/_static/custom.css +8 -0
- docs/source/_static/favicon.ico +0 -0
- docs/source/_static/logo.png +0 -0
- docs/source/api/api.rst +15 -0
- docs/source/api/cli.rst +8 -0
- docs/source/api/weatherDB.broker.rst +10 -0
- docs/source/api/weatherDB.config.rst +7 -0
- docs/source/api/weatherDB.db.rst +23 -0
- docs/source/api/weatherDB.rst +22 -0
- docs/source/api/weatherDB.station.rst +56 -0
- docs/source/api/weatherDB.stations.rst +46 -0
- docs/source/api/weatherDB.utils.rst +22 -0
- docs/source/conf.py +137 -0
- docs/source/index.rst +33 -0
- docs/source/setup/Configuration.md +127 -0
- docs/source/setup/Hosting.md +9 -0
- docs/source/setup/Install.md +49 -0
- docs/source/setup/Quickstart.md +183 -0
- docs/source/setup/setup.rst +12 -0
- weatherdb/__init__.py +24 -0
- weatherdb/_version.py +1 -0
- weatherdb/alembic/README.md +8 -0
- weatherdb/alembic/alembic.ini +80 -0
- weatherdb/alembic/config.py +9 -0
- weatherdb/alembic/env.py +100 -0
- weatherdb/alembic/script.py.mako +26 -0
- weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
- weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
- weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
- weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
- weatherdb/broker.py +667 -0
- weatherdb/cli.py +214 -0
- weatherdb/config/ConfigParser.py +663 -0
- weatherdb/config/__init__.py +5 -0
- weatherdb/config/config_default.ini +162 -0
- weatherdb/db/__init__.py +3 -0
- weatherdb/db/connections.py +374 -0
- weatherdb/db/fixtures/RichterParameters.json +34 -0
- weatherdb/db/models.py +402 -0
- weatherdb/db/queries/get_quotient.py +155 -0
- weatherdb/db/views.py +165 -0
- weatherdb/station/GroupStation.py +710 -0
- weatherdb/station/StationBases.py +3108 -0
- weatherdb/station/StationET.py +111 -0
- weatherdb/station/StationP.py +807 -0
- weatherdb/station/StationPD.py +98 -0
- weatherdb/station/StationT.py +164 -0
- weatherdb/station/__init__.py +13 -0
- weatherdb/station/constants.py +21 -0
- weatherdb/stations/GroupStations.py +519 -0
- weatherdb/stations/StationsBase.py +1021 -0
- weatherdb/stations/StationsBaseTET.py +30 -0
- weatherdb/stations/StationsET.py +17 -0
- weatherdb/stations/StationsP.py +128 -0
- weatherdb/stations/StationsPD.py +24 -0
- weatherdb/stations/StationsT.py +21 -0
- weatherdb/stations/__init__.py +11 -0
- weatherdb/utils/TimestampPeriod.py +369 -0
- weatherdb/utils/__init__.py +3 -0
- weatherdb/utils/dwd.py +350 -0
- weatherdb/utils/geometry.py +69 -0
- weatherdb/utils/get_data.py +285 -0
- weatherdb/utils/logging.py +126 -0
- weatherdb-1.1.0.dist-info/LICENSE +674 -0
- weatherdb-1.1.0.dist-info/METADATA +765 -0
- weatherdb-1.1.0.dist-info/RECORD +77 -0
- weatherdb-1.1.0.dist-info/WHEEL +5 -0
- weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
- weatherdb-1.1.0.dist-info/top_level.txt +3 -0
weatherdb/utils/dwd.py
ADDED
@@ -0,0 +1,350 @@
|
|
1
|
+
"""
|
2
|
+
Some utilities functions to get data from the DWD-CDC server.
|
3
|
+
|
4
|
+
Based on `max_fun` package on https://github.com/maxschmi/max_fun
|
5
|
+
Created by Max Schmit, 2021
|
6
|
+
"""
|
7
|
+
# libraries
|
8
|
+
import dateutil
|
9
|
+
import ftplib
|
10
|
+
import pathlib
|
11
|
+
import geopandas as gpd
|
12
|
+
import pandas as pd
|
13
|
+
from zipfile import ZipFile
|
14
|
+
import re
|
15
|
+
from io import BytesIO, StringIO
|
16
|
+
import traceback
|
17
|
+
import logging
|
18
|
+
import time
|
19
|
+
import random
|
20
|
+
|
21
|
+
# DWD - CDC FTP Server
|
22
|
+
CDC_HOST = "opendata.dwd.de"
|
23
|
+
|
24
|
+
# logger
|
25
|
+
log = logging.getLogger(__name__)
|
26
|
+
|
27
|
+
# basic functions
|
28
|
+
# ----------------
|
29
|
+
def dwd_id_to_str(id):
|
30
|
+
"""
|
31
|
+
Convert a station id to normal DWD format as str.
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
id : int or str
|
36
|
+
The id of the station.
|
37
|
+
|
38
|
+
Returns
|
39
|
+
-------
|
40
|
+
str
|
41
|
+
string of normal DWD Station id.
|
42
|
+
|
43
|
+
"""
|
44
|
+
return f"{id:0>5}"
|
45
|
+
|
46
|
+
def _dwd_date_parser(date_ser):
|
47
|
+
"""
|
48
|
+
Parse the dates from a DWD table to datetime.
|
49
|
+
|
50
|
+
Parameters
|
51
|
+
----------
|
52
|
+
date_ser : pd.Series of str or str
|
53
|
+
the string from the DWD table. e.g. "20200101" or "2020010112"
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
datetime.datetime
|
58
|
+
The date as datetime.
|
59
|
+
|
60
|
+
"""
|
61
|
+
if not isinstance(date_ser, pd.Series):
|
62
|
+
raise ValueError("date_str must be a pd.Series of str")
|
63
|
+
|
64
|
+
# test if list or single str
|
65
|
+
char_num = len(date_ser.iloc[0])
|
66
|
+
|
67
|
+
# parse to correct datetime
|
68
|
+
if char_num == 8:
|
69
|
+
return pd.to_datetime(date_ser, format='%Y%m%d')
|
70
|
+
elif char_num == 10:
|
71
|
+
return pd.to_datetime(date_ser, format='%Y%m%d%H')
|
72
|
+
elif char_num == 12:
|
73
|
+
return pd.to_datetime(date_ser, format='%Y%m%d%H%M')
|
74
|
+
else:
|
75
|
+
raise ValueError("there was an error while converting the following to a correct datetime"+
|
76
|
+
date_ser.head())
|
77
|
+
|
78
|
+
# functions
|
79
|
+
# ---------
|
80
|
+
def get_ftp_file_list(ftp_conn, ftp_folders):
|
81
|
+
"""Get a list of files in the folders with their modification dates.
|
82
|
+
|
83
|
+
Parameters
|
84
|
+
----------
|
85
|
+
ftp_conn : ftplib.FTP
|
86
|
+
Ftp connection.
|
87
|
+
ftp_folders : list of str or pathlike object
|
88
|
+
The directories on the ftp server to look for files.
|
89
|
+
|
90
|
+
Returns
|
91
|
+
-------
|
92
|
+
list of tuples of strs
|
93
|
+
A list of Tuples. Every tuple stands for one file.
|
94
|
+
The tuple consists of (filepath, modification date).
|
95
|
+
"""
|
96
|
+
# check types
|
97
|
+
if isinstance(ftp_folders, str):
|
98
|
+
ftp_folders = [ftp_folders]
|
99
|
+
for i, ftp_folder in enumerate(ftp_folders):
|
100
|
+
if isinstance(ftp_folder, pathlib.Path):
|
101
|
+
ftp_folders[i] = ftp_folder.as_posix()
|
102
|
+
|
103
|
+
try:
|
104
|
+
ftp_conn.voidcmd("NOOP")
|
105
|
+
except ftplib.all_errors:
|
106
|
+
ftp_conn.connect()
|
107
|
+
|
108
|
+
# get files and modification dates
|
109
|
+
files = []
|
110
|
+
for ftp_folder in ftp_folders:
|
111
|
+
lines = []
|
112
|
+
ftp_conn.dir(ftp_folder, lines.append)
|
113
|
+
for line in lines:
|
114
|
+
parts = line.split(maxsplit=9)
|
115
|
+
filepath = ftp_folder + parts[8]
|
116
|
+
modtime = dateutil.parser.parse(parts[5] + " " + parts[6] + " " + parts[7])
|
117
|
+
files.append((filepath, modtime))
|
118
|
+
|
119
|
+
return files
|
120
|
+
|
121
|
+
def get_cdc_file_list(ftp_folders):
|
122
|
+
with ftplib.FTP(CDC_HOST) as ftp_con:
|
123
|
+
ftp_con.login()
|
124
|
+
files = get_ftp_file_list(ftp_con, ftp_folders)
|
125
|
+
return files
|
126
|
+
|
127
|
+
def get_dwd_file(zip_filepath):
|
128
|
+
"""
|
129
|
+
Get a DataFrame from one single (zip-)file from the DWD FTP server.
|
130
|
+
|
131
|
+
Parameters
|
132
|
+
----------
|
133
|
+
zip_filepath : str
|
134
|
+
Path to the file on the server. e.g.
|
135
|
+
- "/climate_environment/CDC/observations_germany/climate/10_minutes/air_temperature/recent/10minutenwerte_TU_00044_akt.zip"
|
136
|
+
- "/climate_environment/CDC/derived_germany/soil/daily/historical/derived_germany_soil_daily_historical_73.txt.gz"
|
137
|
+
|
138
|
+
Returns
|
139
|
+
-------
|
140
|
+
pandas.DataFrame
|
141
|
+
The DataFrame of the selected file in the zip folder.
|
142
|
+
|
143
|
+
"""
|
144
|
+
# get the compressed folder from dwd
|
145
|
+
with ftplib.FTP(CDC_HOST) as ftp:
|
146
|
+
ftp.login()
|
147
|
+
|
148
|
+
# download file
|
149
|
+
compressed_bin = BytesIO()
|
150
|
+
num_tried = 0
|
151
|
+
while num_tried < 10:
|
152
|
+
try:
|
153
|
+
ftp.retrbinary("RETR " + zip_filepath, compressed_bin.write)
|
154
|
+
break
|
155
|
+
except Exception as e:
|
156
|
+
if num_tried < 9:
|
157
|
+
num_tried += 1
|
158
|
+
time.sleep(random.randint(0,400)/100)
|
159
|
+
else:
|
160
|
+
raise e
|
161
|
+
|
162
|
+
# check folder to be derived or observation type import the data
|
163
|
+
if re.search("observations", zip_filepath):
|
164
|
+
# get zip folder and files
|
165
|
+
compressed_folder = ZipFile(compressed_bin)
|
166
|
+
compressed_folder_files = compressed_folder.namelist()
|
167
|
+
|
168
|
+
# test if one and only one file matches the pattern
|
169
|
+
files = list(filter(re.compile("produkt").search,
|
170
|
+
compressed_folder_files))
|
171
|
+
|
172
|
+
if len(files) == 0:
|
173
|
+
raise ValueError(
|
174
|
+
"There is no file matching the pattern: produkt " +
|
175
|
+
"in the zip files: \n- " +
|
176
|
+
"\n- ".join(compressed_folder_files))
|
177
|
+
elif len(files) > 1:
|
178
|
+
raise ValueError(
|
179
|
+
"There are more than one files matching the " +
|
180
|
+
"pattern: produkt\nin the zip file: " +
|
181
|
+
str(zip_filepath) +
|
182
|
+
"\nonly the first file is returned: " +
|
183
|
+
str(files[0]))
|
184
|
+
|
185
|
+
# extract the file from the zip folder and return it as pd.DataFrame
|
186
|
+
with compressed_folder.open(files[0]) as f:
|
187
|
+
df = pd.read_table(f, sep=";",
|
188
|
+
dtype={"Datum":str, "MESS_DATUM":str},
|
189
|
+
skipinitialspace=True,
|
190
|
+
na_values=[-999, -9999, "####", "#####", "######"])
|
191
|
+
|
192
|
+
elif re.search("derived", zip_filepath):
|
193
|
+
df = pd.read_table(f"ftp://{CDC_HOST}/{zip_filepath}",
|
194
|
+
compression="gzip",
|
195
|
+
sep=";",
|
196
|
+
skipinitialspace=True,
|
197
|
+
dtype={"Datum":str, "MESS_DATUM":str},
|
198
|
+
na_values=[-999, -9999, "####", "#####", "######"])
|
199
|
+
else:
|
200
|
+
raise ImportError("ERROR: No file could be imported, as there is " +
|
201
|
+
"just a setup for observation and derived datas")
|
202
|
+
|
203
|
+
# convert dates to datetime
|
204
|
+
for col in ["MESS_DATUM", "Datum"]:
|
205
|
+
if col in df.columns:
|
206
|
+
df[col] = _dwd_date_parser(df[col])
|
207
|
+
|
208
|
+
return df
|
209
|
+
|
210
|
+
def get_dwd_meta(ftp_folder):
|
211
|
+
"""
|
212
|
+
Get the meta file from the ftp_folder on the DWD server.
|
213
|
+
|
214
|
+
Downloads the meta file of a given folder.
|
215
|
+
Corrects the meta file of missing files. So if no file for the station is
|
216
|
+
in the folder the meta entry gets deleted.
|
217
|
+
Reset "von_datum" in meta file if there is a biger gap than max_hole_d.
|
218
|
+
Delets entries with less years than min_years.
|
219
|
+
|
220
|
+
Parameters
|
221
|
+
----------
|
222
|
+
ftp_folder : str
|
223
|
+
The path to the directory where to search for the meta file.
|
224
|
+
e.g. "climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/".
|
225
|
+
|
226
|
+
Returns
|
227
|
+
-------
|
228
|
+
geopandas.GeoDataFrame
|
229
|
+
a GeoDataFrame of the meta file
|
230
|
+
|
231
|
+
"""
|
232
|
+
# open ftp connection and get list of files in folder
|
233
|
+
with ftplib.FTP(CDC_HOST) as ftp:
|
234
|
+
ftp.login()
|
235
|
+
|
236
|
+
# get and check the meta_file name
|
237
|
+
ftp_files = ftp.nlst(ftp_folder)
|
238
|
+
pattern = r".*(?<!_mn4)((_stations_list)|(_Beschreibung_Stationen))+.txt$"
|
239
|
+
meta_file = list(filter(re.compile(pattern).match, ftp_files))
|
240
|
+
|
241
|
+
if len(meta_file) == 0:
|
242
|
+
log.info(
|
243
|
+
f"There is no file matching the pattern '{pattern}'"+
|
244
|
+
f"\nin the folder: ftp://{CDC_HOST}/{str(ftp_folder)}")
|
245
|
+
return None
|
246
|
+
elif len(meta_file) > 1:
|
247
|
+
log.info(
|
248
|
+
f"There are more than one files matching the pattern: {pattern}" +
|
249
|
+
f" in the folder:\nftp://{CDC_HOST}/{str(ftp_folder)}" +
|
250
|
+
f"\nonly the first file is returned: {meta_file[0]}")
|
251
|
+
|
252
|
+
# import meta file
|
253
|
+
try:
|
254
|
+
if re.search("observations", ftp_folder):
|
255
|
+
with ftplib.FTP(CDC_HOST) as ftp:
|
256
|
+
ftp.login()
|
257
|
+
with BytesIO() as bio, StringIO() as sio:
|
258
|
+
ftp.retrbinary("RETR " + meta_file[0], bio.write)
|
259
|
+
sio.write(bio.getvalue().decode("WINDOWS-1252").replace("\r\n", "\n"))
|
260
|
+
colnames = sio.getvalue().split("\n")[0].split()
|
261
|
+
sio.seek(0)
|
262
|
+
meta = pd.read_table(
|
263
|
+
sio,
|
264
|
+
skiprows=2,
|
265
|
+
lineterminator="\n",
|
266
|
+
sep=r"\s{2,}|(?<=\d|\))\s{1}(?=[\w])", # two or more white spaces or one space after digit and followed by word
|
267
|
+
names=colnames,
|
268
|
+
parse_dates=[col for col in colnames if "datum" in col.lower()],
|
269
|
+
index_col="Stations_id",
|
270
|
+
engine="python")
|
271
|
+
elif re.search("derived", ftp_folder):
|
272
|
+
meta = pd.read_table("ftp://opendata.dwd.de/" + meta_file[0],
|
273
|
+
encoding="WINDOWS-1252", sep=";", skiprows=1,
|
274
|
+
names=["Stations_id", "Stationshoehe",
|
275
|
+
"geoBreite", "geoLaenge",
|
276
|
+
"Stationsname", "Bundesland"],
|
277
|
+
index_col="Stations_id"
|
278
|
+
)
|
279
|
+
except:
|
280
|
+
traceback.print_exc()
|
281
|
+
print("URL Error: The URL could not be found:\n" +
|
282
|
+
"ftp://opendata.dwd.de/" + meta_file[0])
|
283
|
+
return None
|
284
|
+
|
285
|
+
try:
|
286
|
+
meta = gpd.GeoDataFrame(meta,
|
287
|
+
geometry=gpd.points_from_xy(meta.geoLaenge,
|
288
|
+
meta.geoBreite,
|
289
|
+
crs="EPSG:4326"))
|
290
|
+
meta = meta.drop(["geoLaenge", "geoBreite"], axis=1)
|
291
|
+
except:
|
292
|
+
traceback.print_exc()
|
293
|
+
print("Error while converting DataFrame to GeoDataFrame," +
|
294
|
+
" maybe the columns aren't named 'geoLaenge' and geoBreite'" +
|
295
|
+
"\nhere is the header of the DataFrame:\n")
|
296
|
+
print(meta.head())
|
297
|
+
return None
|
298
|
+
|
299
|
+
# delete entries where there is no file in the ftp-folder
|
300
|
+
rows_drop = []
|
301
|
+
str_ftp_files = str(ftp_files)
|
302
|
+
for i, row in meta.iterrows():
|
303
|
+
if not (re.search(r"[_\.]" + dwd_id_to_str(i) + r"[_\.]|" +
|
304
|
+
r"[_\.]" + str(i) + r"[_\.]", str_ftp_files)):
|
305
|
+
rows_drop.append(i)
|
306
|
+
meta = meta.drop(rows_drop)
|
307
|
+
|
308
|
+
# change meta date entries if the file has a different date
|
309
|
+
if ("observation" in ftp_folder) \
|
310
|
+
and ("bis_datum" and "von_datum" in meta) \
|
311
|
+
and ("recent" not in ftp_folder):
|
312
|
+
zip_files = list(filter(re.compile(r".+\d+_\d+_\d+_hist.zip").match,
|
313
|
+
ftp_files))
|
314
|
+
zip_files.sort()
|
315
|
+
zip_files.append(zip_files[0]) # else the last entry won't get tested
|
316
|
+
last_sid, last_from_date, last_to_date = None, None, None
|
317
|
+
|
318
|
+
for zip_file in zip_files:
|
319
|
+
# get new files dates
|
320
|
+
filename = zip_file.split("/")[-1]
|
321
|
+
_, kind, sid, from_date, to_date, _ = filename.split("_")
|
322
|
+
if kind in ["niedereder"]:
|
323
|
+
continue
|
324
|
+
from_date = pd.Timestamp(from_date)
|
325
|
+
to_date = pd.Timestamp(to_date)
|
326
|
+
sid = int(sid)
|
327
|
+
|
328
|
+
# compare with previous file's dates
|
329
|
+
if last_sid and (sid == last_sid):
|
330
|
+
last_to_date = to_date
|
331
|
+
else:
|
332
|
+
# compare last values with meta file dates
|
333
|
+
if last_sid and (last_sid in meta.index):
|
334
|
+
if last_from_date > meta.loc[last_sid, "von_datum"]:
|
335
|
+
meta.loc[last_sid, "von_datum"] = last_from_date
|
336
|
+
if last_to_date < meta.loc[last_sid, "bis_datum"]:
|
337
|
+
meta.loc[last_sid, "bis_datum"] = last_to_date
|
338
|
+
|
339
|
+
# set values as last values
|
340
|
+
last_to_date = to_date
|
341
|
+
last_from_date = from_date
|
342
|
+
last_sid = sid
|
343
|
+
|
344
|
+
# trim whitespace in string columns
|
345
|
+
for dtype, col in zip(meta.dtypes, meta.columns):
|
346
|
+
if pd.api.types.is_string_dtype(dtype) and col != "geometry":
|
347
|
+
meta[col] = meta[col].str.strip()
|
348
|
+
|
349
|
+
# return
|
350
|
+
return meta
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""A collection of geometry functions.
|
4
|
+
|
5
|
+
Based on `max_fun` package on https://github.com/maxschmi/max_fun
|
6
|
+
Created by Max Schmit, 2021
|
7
|
+
"""
|
8
|
+
# libraries
|
9
|
+
import numpy as np
|
10
|
+
from shapely.geometry import Point, LineString
|
11
|
+
import geopandas as gpd
|
12
|
+
import rasterio as rio
|
13
|
+
|
14
|
+
# functions
|
15
|
+
def polar_line(center_xy, radius, angle):
|
16
|
+
"""Create a LineString with polar coodinates.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
center_xy : list, array or tuple of int or floats
|
21
|
+
The X and Y coordinates of the center.
|
22
|
+
radius : int or float
|
23
|
+
The radius of the circle.
|
24
|
+
angle : int
|
25
|
+
The angle of the portion of the circle in degrees.
|
26
|
+
0 means east.
|
27
|
+
|
28
|
+
Returns
|
29
|
+
-------
|
30
|
+
shapely.geometry.LineString
|
31
|
+
LineString.
|
32
|
+
"""
|
33
|
+
coords = [center_xy]
|
34
|
+
coords.append([
|
35
|
+
center_xy[0] + np.cos(np.deg2rad(angle)) * radius,
|
36
|
+
center_xy[1] + np.sin(np.deg2rad(angle)) * radius
|
37
|
+
])
|
38
|
+
|
39
|
+
return LineString(coords)
|
40
|
+
|
41
|
+
def raster2points(raster_np, transform, crs=None):
|
42
|
+
"""Polygonize raster array to GeoDataFrame.
|
43
|
+
|
44
|
+
Until now this only works for rasters with one band.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
raster_np : np.array
|
49
|
+
The imported raster array.
|
50
|
+
transform : rio.Affine
|
51
|
+
The Affine transformation of the raster.
|
52
|
+
crs : str or crs-type, optional
|
53
|
+
The coordinate reference system for the raster, by default None
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
geopandas.GeoDataFrame
|
58
|
+
The raster Data is in the data column.
|
59
|
+
"""
|
60
|
+
mask = ~np.isnan(raster_np[0])
|
61
|
+
cols, rows = mask.nonzero()
|
62
|
+
coords = rio.transform.xy(transform, cols, rows)
|
63
|
+
|
64
|
+
geoms = [Point(xy) for xy in list(zip(*coords))]
|
65
|
+
|
66
|
+
return gpd.GeoDataFrame(
|
67
|
+
{"data": raster_np[0][mask]},
|
68
|
+
geometry=geoms,
|
69
|
+
crs=crs)
|