weatherdb 1.1.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. docker/Dockerfile +30 -0
  2. docker/docker-compose.yaml +58 -0
  3. docker/docker-compose_test.yaml +24 -0
  4. docker/start-docker-test.sh +6 -0
  5. docs/requirements.txt +10 -0
  6. docs/source/Changelog.md +2 -0
  7. docs/source/License.rst +7 -0
  8. docs/source/Methode.md +161 -0
  9. docs/source/_static/custom.css +8 -0
  10. docs/source/_static/favicon.ico +0 -0
  11. docs/source/_static/logo.png +0 -0
  12. docs/source/api/api.rst +15 -0
  13. docs/source/api/cli.rst +8 -0
  14. docs/source/api/weatherDB.broker.rst +10 -0
  15. docs/source/api/weatherDB.config.rst +7 -0
  16. docs/source/api/weatherDB.db.rst +23 -0
  17. docs/source/api/weatherDB.rst +22 -0
  18. docs/source/api/weatherDB.station.rst +56 -0
  19. docs/source/api/weatherDB.stations.rst +46 -0
  20. docs/source/api/weatherDB.utils.rst +22 -0
  21. docs/source/conf.py +137 -0
  22. docs/source/index.rst +33 -0
  23. docs/source/setup/Configuration.md +127 -0
  24. docs/source/setup/Hosting.md +9 -0
  25. docs/source/setup/Install.md +49 -0
  26. docs/source/setup/Quickstart.md +183 -0
  27. docs/source/setup/setup.rst +12 -0
  28. weatherdb/__init__.py +24 -0
  29. weatherdb/_version.py +1 -0
  30. weatherdb/alembic/README.md +8 -0
  31. weatherdb/alembic/alembic.ini +80 -0
  32. weatherdb/alembic/config.py +9 -0
  33. weatherdb/alembic/env.py +100 -0
  34. weatherdb/alembic/script.py.mako +26 -0
  35. weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
  36. weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
  37. weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
  38. weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
  39. weatherdb/broker.py +667 -0
  40. weatherdb/cli.py +214 -0
  41. weatherdb/config/ConfigParser.py +663 -0
  42. weatherdb/config/__init__.py +5 -0
  43. weatherdb/config/config_default.ini +162 -0
  44. weatherdb/db/__init__.py +3 -0
  45. weatherdb/db/connections.py +374 -0
  46. weatherdb/db/fixtures/RichterParameters.json +34 -0
  47. weatherdb/db/models.py +402 -0
  48. weatherdb/db/queries/get_quotient.py +155 -0
  49. weatherdb/db/views.py +165 -0
  50. weatherdb/station/GroupStation.py +710 -0
  51. weatherdb/station/StationBases.py +3108 -0
  52. weatherdb/station/StationET.py +111 -0
  53. weatherdb/station/StationP.py +807 -0
  54. weatherdb/station/StationPD.py +98 -0
  55. weatherdb/station/StationT.py +164 -0
  56. weatherdb/station/__init__.py +13 -0
  57. weatherdb/station/constants.py +21 -0
  58. weatherdb/stations/GroupStations.py +519 -0
  59. weatherdb/stations/StationsBase.py +1021 -0
  60. weatherdb/stations/StationsBaseTET.py +30 -0
  61. weatherdb/stations/StationsET.py +17 -0
  62. weatherdb/stations/StationsP.py +128 -0
  63. weatherdb/stations/StationsPD.py +24 -0
  64. weatherdb/stations/StationsT.py +21 -0
  65. weatherdb/stations/__init__.py +11 -0
  66. weatherdb/utils/TimestampPeriod.py +369 -0
  67. weatherdb/utils/__init__.py +3 -0
  68. weatherdb/utils/dwd.py +350 -0
  69. weatherdb/utils/geometry.py +69 -0
  70. weatherdb/utils/get_data.py +285 -0
  71. weatherdb/utils/logging.py +126 -0
  72. weatherdb-1.1.0.dist-info/LICENSE +674 -0
  73. weatherdb-1.1.0.dist-info/METADATA +765 -0
  74. weatherdb-1.1.0.dist-info/RECORD +77 -0
  75. weatherdb-1.1.0.dist-info/WHEEL +5 -0
  76. weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
  77. weatherdb-1.1.0.dist-info/top_level.txt +3 -0
weatherdb/utils/dwd.py ADDED
@@ -0,0 +1,350 @@
1
+ """
2
+ Some utilities functions to get data from the DWD-CDC server.
3
+
4
+ Based on `max_fun` package on https://github.com/maxschmi/max_fun
5
+ Created by Max Schmit, 2021
6
+ """
7
+ # libraries
8
+ import dateutil
9
+ import ftplib
10
+ import pathlib
11
+ import geopandas as gpd
12
+ import pandas as pd
13
+ from zipfile import ZipFile
14
+ import re
15
+ from io import BytesIO, StringIO
16
+ import traceback
17
+ import logging
18
+ import time
19
+ import random
20
+
21
+ # DWD - CDC FTP Server
22
+ CDC_HOST = "opendata.dwd.de"
23
+
24
+ # logger
25
+ log = logging.getLogger(__name__)
26
+
27
+ # basic functions
28
+ # ----------------
29
+ def dwd_id_to_str(id):
30
+ """
31
+ Convert a station id to normal DWD format as str.
32
+
33
+ Parameters
34
+ ----------
35
+ id : int or str
36
+ The id of the station.
37
+
38
+ Returns
39
+ -------
40
+ str
41
+ string of normal DWD Station id.
42
+
43
+ """
44
+ return f"{id:0>5}"
45
+
46
+ def _dwd_date_parser(date_ser):
47
+ """
48
+ Parse the dates from a DWD table to datetime.
49
+
50
+ Parameters
51
+ ----------
52
+ date_ser : pd.Series of str or str
53
+ the string from the DWD table. e.g. "20200101" or "2020010112"
54
+
55
+ Returns
56
+ -------
57
+ datetime.datetime
58
+ The date as datetime.
59
+
60
+ """
61
+ if not isinstance(date_ser, pd.Series):
62
+ raise ValueError("date_str must be a pd.Series of str")
63
+
64
+ # test if list or single str
65
+ char_num = len(date_ser.iloc[0])
66
+
67
+ # parse to correct datetime
68
+ if char_num == 8:
69
+ return pd.to_datetime(date_ser, format='%Y%m%d')
70
+ elif char_num == 10:
71
+ return pd.to_datetime(date_ser, format='%Y%m%d%H')
72
+ elif char_num == 12:
73
+ return pd.to_datetime(date_ser, format='%Y%m%d%H%M')
74
+ else:
75
+ raise ValueError("there was an error while converting the following to a correct datetime"+
76
+ date_ser.head())
77
+
78
+ # functions
79
+ # ---------
80
+ def get_ftp_file_list(ftp_conn, ftp_folders):
81
+ """Get a list of files in the folders with their modification dates.
82
+
83
+ Parameters
84
+ ----------
85
+ ftp_conn : ftplib.FTP
86
+ Ftp connection.
87
+ ftp_folders : list of str or pathlike object
88
+ The directories on the ftp server to look for files.
89
+
90
+ Returns
91
+ -------
92
+ list of tuples of strs
93
+ A list of Tuples. Every tuple stands for one file.
94
+ The tuple consists of (filepath, modification date).
95
+ """
96
+ # check types
97
+ if isinstance(ftp_folders, str):
98
+ ftp_folders = [ftp_folders]
99
+ for i, ftp_folder in enumerate(ftp_folders):
100
+ if isinstance(ftp_folder, pathlib.Path):
101
+ ftp_folders[i] = ftp_folder.as_posix()
102
+
103
+ try:
104
+ ftp_conn.voidcmd("NOOP")
105
+ except ftplib.all_errors:
106
+ ftp_conn.connect()
107
+
108
+ # get files and modification dates
109
+ files = []
110
+ for ftp_folder in ftp_folders:
111
+ lines = []
112
+ ftp_conn.dir(ftp_folder, lines.append)
113
+ for line in lines:
114
+ parts = line.split(maxsplit=9)
115
+ filepath = ftp_folder + parts[8]
116
+ modtime = dateutil.parser.parse(parts[5] + " " + parts[6] + " " + parts[7])
117
+ files.append((filepath, modtime))
118
+
119
+ return files
120
+
121
+ def get_cdc_file_list(ftp_folders):
122
+ with ftplib.FTP(CDC_HOST) as ftp_con:
123
+ ftp_con.login()
124
+ files = get_ftp_file_list(ftp_con, ftp_folders)
125
+ return files
126
+
127
+ def get_dwd_file(zip_filepath):
128
+ """
129
+ Get a DataFrame from one single (zip-)file from the DWD FTP server.
130
+
131
+ Parameters
132
+ ----------
133
+ zip_filepath : str
134
+ Path to the file on the server. e.g.
135
+ - "/climate_environment/CDC/observations_germany/climate/10_minutes/air_temperature/recent/10minutenwerte_TU_00044_akt.zip"
136
+ - "/climate_environment/CDC/derived_germany/soil/daily/historical/derived_germany_soil_daily_historical_73.txt.gz"
137
+
138
+ Returns
139
+ -------
140
+ pandas.DataFrame
141
+ The DataFrame of the selected file in the zip folder.
142
+
143
+ """
144
+ # get the compressed folder from dwd
145
+ with ftplib.FTP(CDC_HOST) as ftp:
146
+ ftp.login()
147
+
148
+ # download file
149
+ compressed_bin = BytesIO()
150
+ num_tried = 0
151
+ while num_tried < 10:
152
+ try:
153
+ ftp.retrbinary("RETR " + zip_filepath, compressed_bin.write)
154
+ break
155
+ except Exception as e:
156
+ if num_tried < 9:
157
+ num_tried += 1
158
+ time.sleep(random.randint(0,400)/100)
159
+ else:
160
+ raise e
161
+
162
+ # check folder to be derived or observation type import the data
163
+ if re.search("observations", zip_filepath):
164
+ # get zip folder and files
165
+ compressed_folder = ZipFile(compressed_bin)
166
+ compressed_folder_files = compressed_folder.namelist()
167
+
168
+ # test if one and only one file matches the pattern
169
+ files = list(filter(re.compile("produkt").search,
170
+ compressed_folder_files))
171
+
172
+ if len(files) == 0:
173
+ raise ValueError(
174
+ "There is no file matching the pattern: produkt " +
175
+ "in the zip files: \n- " +
176
+ "\n- ".join(compressed_folder_files))
177
+ elif len(files) > 1:
178
+ raise ValueError(
179
+ "There are more than one files matching the " +
180
+ "pattern: produkt\nin the zip file: " +
181
+ str(zip_filepath) +
182
+ "\nonly the first file is returned: " +
183
+ str(files[0]))
184
+
185
+ # extract the file from the zip folder and return it as pd.DataFrame
186
+ with compressed_folder.open(files[0]) as f:
187
+ df = pd.read_table(f, sep=";",
188
+ dtype={"Datum":str, "MESS_DATUM":str},
189
+ skipinitialspace=True,
190
+ na_values=[-999, -9999, "####", "#####", "######"])
191
+
192
+ elif re.search("derived", zip_filepath):
193
+ df = pd.read_table(f"ftp://{CDC_HOST}/{zip_filepath}",
194
+ compression="gzip",
195
+ sep=";",
196
+ skipinitialspace=True,
197
+ dtype={"Datum":str, "MESS_DATUM":str},
198
+ na_values=[-999, -9999, "####", "#####", "######"])
199
+ else:
200
+ raise ImportError("ERROR: No file could be imported, as there is " +
201
+ "just a setup for observation and derived datas")
202
+
203
+ # convert dates to datetime
204
+ for col in ["MESS_DATUM", "Datum"]:
205
+ if col in df.columns:
206
+ df[col] = _dwd_date_parser(df[col])
207
+
208
+ return df
209
+
210
+ def get_dwd_meta(ftp_folder):
211
+ """
212
+ Get the meta file from the ftp_folder on the DWD server.
213
+
214
+ Downloads the meta file of a given folder.
215
+ Corrects the meta file of missing files. So if no file for the station is
216
+ in the folder the meta entry gets deleted.
217
+ Reset "von_datum" in meta file if there is a biger gap than max_hole_d.
218
+ Delets entries with less years than min_years.
219
+
220
+ Parameters
221
+ ----------
222
+ ftp_folder : str
223
+ The path to the directory where to search for the meta file.
224
+ e.g. "climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/".
225
+
226
+ Returns
227
+ -------
228
+ geopandas.GeoDataFrame
229
+ a GeoDataFrame of the meta file
230
+
231
+ """
232
+ # open ftp connection and get list of files in folder
233
+ with ftplib.FTP(CDC_HOST) as ftp:
234
+ ftp.login()
235
+
236
+ # get and check the meta_file name
237
+ ftp_files = ftp.nlst(ftp_folder)
238
+ pattern = r".*(?<!_mn4)((_stations_list)|(_Beschreibung_Stationen))+.txt$"
239
+ meta_file = list(filter(re.compile(pattern).match, ftp_files))
240
+
241
+ if len(meta_file) == 0:
242
+ log.info(
243
+ f"There is no file matching the pattern '{pattern}'"+
244
+ f"\nin the folder: ftp://{CDC_HOST}/{str(ftp_folder)}")
245
+ return None
246
+ elif len(meta_file) > 1:
247
+ log.info(
248
+ f"There are more than one files matching the pattern: {pattern}" +
249
+ f" in the folder:\nftp://{CDC_HOST}/{str(ftp_folder)}" +
250
+ f"\nonly the first file is returned: {meta_file[0]}")
251
+
252
+ # import meta file
253
+ try:
254
+ if re.search("observations", ftp_folder):
255
+ with ftplib.FTP(CDC_HOST) as ftp:
256
+ ftp.login()
257
+ with BytesIO() as bio, StringIO() as sio:
258
+ ftp.retrbinary("RETR " + meta_file[0], bio.write)
259
+ sio.write(bio.getvalue().decode("WINDOWS-1252").replace("\r\n", "\n"))
260
+ colnames = sio.getvalue().split("\n")[0].split()
261
+ sio.seek(0)
262
+ meta = pd.read_table(
263
+ sio,
264
+ skiprows=2,
265
+ lineterminator="\n",
266
+ sep=r"\s{2,}|(?<=\d|\))\s{1}(?=[\w])", # two or more white spaces or one space after digit and followed by word
267
+ names=colnames,
268
+ parse_dates=[col for col in colnames if "datum" in col.lower()],
269
+ index_col="Stations_id",
270
+ engine="python")
271
+ elif re.search("derived", ftp_folder):
272
+ meta = pd.read_table("ftp://opendata.dwd.de/" + meta_file[0],
273
+ encoding="WINDOWS-1252", sep=";", skiprows=1,
274
+ names=["Stations_id", "Stationshoehe",
275
+ "geoBreite", "geoLaenge",
276
+ "Stationsname", "Bundesland"],
277
+ index_col="Stations_id"
278
+ )
279
+ except:
280
+ traceback.print_exc()
281
+ print("URL Error: The URL could not be found:\n" +
282
+ "ftp://opendata.dwd.de/" + meta_file[0])
283
+ return None
284
+
285
+ try:
286
+ meta = gpd.GeoDataFrame(meta,
287
+ geometry=gpd.points_from_xy(meta.geoLaenge,
288
+ meta.geoBreite,
289
+ crs="EPSG:4326"))
290
+ meta = meta.drop(["geoLaenge", "geoBreite"], axis=1)
291
+ except:
292
+ traceback.print_exc()
293
+ print("Error while converting DataFrame to GeoDataFrame," +
294
+ " maybe the columns aren't named 'geoLaenge' and geoBreite'" +
295
+ "\nhere is the header of the DataFrame:\n")
296
+ print(meta.head())
297
+ return None
298
+
299
+ # delete entries where there is no file in the ftp-folder
300
+ rows_drop = []
301
+ str_ftp_files = str(ftp_files)
302
+ for i, row in meta.iterrows():
303
+ if not (re.search(r"[_\.]" + dwd_id_to_str(i) + r"[_\.]|" +
304
+ r"[_\.]" + str(i) + r"[_\.]", str_ftp_files)):
305
+ rows_drop.append(i)
306
+ meta = meta.drop(rows_drop)
307
+
308
+ # change meta date entries if the file has a different date
309
+ if ("observation" in ftp_folder) \
310
+ and ("bis_datum" and "von_datum" in meta) \
311
+ and ("recent" not in ftp_folder):
312
+ zip_files = list(filter(re.compile(r".+\d+_\d+_\d+_hist.zip").match,
313
+ ftp_files))
314
+ zip_files.sort()
315
+ zip_files.append(zip_files[0]) # else the last entry won't get tested
316
+ last_sid, last_from_date, last_to_date = None, None, None
317
+
318
+ for zip_file in zip_files:
319
+ # get new files dates
320
+ filename = zip_file.split("/")[-1]
321
+ _, kind, sid, from_date, to_date, _ = filename.split("_")
322
+ if kind in ["niedereder"]:
323
+ continue
324
+ from_date = pd.Timestamp(from_date)
325
+ to_date = pd.Timestamp(to_date)
326
+ sid = int(sid)
327
+
328
+ # compare with previous file's dates
329
+ if last_sid and (sid == last_sid):
330
+ last_to_date = to_date
331
+ else:
332
+ # compare last values with meta file dates
333
+ if last_sid and (last_sid in meta.index):
334
+ if last_from_date > meta.loc[last_sid, "von_datum"]:
335
+ meta.loc[last_sid, "von_datum"] = last_from_date
336
+ if last_to_date < meta.loc[last_sid, "bis_datum"]:
337
+ meta.loc[last_sid, "bis_datum"] = last_to_date
338
+
339
+ # set values as last values
340
+ last_to_date = to_date
341
+ last_from_date = from_date
342
+ last_sid = sid
343
+
344
+ # trim whitespace in string columns
345
+ for dtype, col in zip(meta.dtypes, meta.columns):
346
+ if pd.api.types.is_string_dtype(dtype) and col != "geometry":
347
+ meta[col] = meta[col].str.strip()
348
+
349
+ # return
350
+ return meta
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """A collection of geometry functions.
4
+
5
+ Based on `max_fun` package on https://github.com/maxschmi/max_fun
6
+ Created by Max Schmit, 2021
7
+ """
8
+ # libraries
9
+ import numpy as np
10
+ from shapely.geometry import Point, LineString
11
+ import geopandas as gpd
12
+ import rasterio as rio
13
+
14
+ # functions
15
+ def polar_line(center_xy, radius, angle):
16
+ """Create a LineString with polar coodinates.
17
+
18
+ Parameters
19
+ ----------
20
+ center_xy : list, array or tuple of int or floats
21
+ The X and Y coordinates of the center.
22
+ radius : int or float
23
+ The radius of the circle.
24
+ angle : int
25
+ The angle of the portion of the circle in degrees.
26
+ 0 means east.
27
+
28
+ Returns
29
+ -------
30
+ shapely.geometry.LineString
31
+ LineString.
32
+ """
33
+ coords = [center_xy]
34
+ coords.append([
35
+ center_xy[0] + np.cos(np.deg2rad(angle)) * radius,
36
+ center_xy[1] + np.sin(np.deg2rad(angle)) * radius
37
+ ])
38
+
39
+ return LineString(coords)
40
+
41
+ def raster2points(raster_np, transform, crs=None):
42
+ """Polygonize raster array to GeoDataFrame.
43
+
44
+ Until now this only works for rasters with one band.
45
+
46
+ Parameters
47
+ ----------
48
+ raster_np : np.array
49
+ The imported raster array.
50
+ transform : rio.Affine
51
+ The Affine transformation of the raster.
52
+ crs : str or crs-type, optional
53
+ The coordinate reference system for the raster, by default None
54
+
55
+ Returns
56
+ -------
57
+ geopandas.GeoDataFrame
58
+ The raster Data is in the data column.
59
+ """
60
+ mask = ~np.isnan(raster_np[0])
61
+ cols, rows = mask.nonzero()
62
+ coords = rio.transform.xy(transform, cols, rows)
63
+
64
+ geoms = [Point(xy) for xy in list(zip(*coords))]
65
+
66
+ return gpd.GeoDataFrame(
67
+ {"data": raster_np[0][mask]},
68
+ geometry=geoms,
69
+ crs=crs)