weatherdb 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. docker/Dockerfile +30 -0
  2. docker/docker-compose.yaml +58 -0
  3. docker/docker-compose_test.yaml +24 -0
  4. docker/start-docker-test.sh +6 -0
  5. docs/requirements.txt +10 -0
  6. docs/source/Changelog.md +2 -0
  7. docs/source/License.rst +7 -0
  8. docs/source/Methode.md +161 -0
  9. docs/source/_static/custom.css +8 -0
  10. docs/source/_static/favicon.ico +0 -0
  11. docs/source/_static/logo.png +0 -0
  12. docs/source/api/api.rst +15 -0
  13. docs/source/api/cli.rst +8 -0
  14. docs/source/api/weatherDB.broker.rst +10 -0
  15. docs/source/api/weatherDB.config.rst +7 -0
  16. docs/source/api/weatherDB.db.rst +23 -0
  17. docs/source/api/weatherDB.rst +22 -0
  18. docs/source/api/weatherDB.station.rst +56 -0
  19. docs/source/api/weatherDB.stations.rst +46 -0
  20. docs/source/api/weatherDB.utils.rst +22 -0
  21. docs/source/conf.py +137 -0
  22. docs/source/index.rst +33 -0
  23. docs/source/setup/Configuration.md +127 -0
  24. docs/source/setup/Hosting.md +9 -0
  25. docs/source/setup/Install.md +49 -0
  26. docs/source/setup/Quickstart.md +183 -0
  27. docs/source/setup/setup.rst +12 -0
  28. weatherdb/__init__.py +24 -0
  29. weatherdb/_version.py +1 -0
  30. weatherdb/alembic/README.md +8 -0
  31. weatherdb/alembic/alembic.ini +80 -0
  32. weatherdb/alembic/config.py +9 -0
  33. weatherdb/alembic/env.py +100 -0
  34. weatherdb/alembic/script.py.mako +26 -0
  35. weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
  36. weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
  37. weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
  38. weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
  39. weatherdb/broker.py +667 -0
  40. weatherdb/cli.py +214 -0
  41. weatherdb/config/ConfigParser.py +663 -0
  42. weatherdb/config/__init__.py +5 -0
  43. weatherdb/config/config_default.ini +162 -0
  44. weatherdb/db/__init__.py +3 -0
  45. weatherdb/db/connections.py +374 -0
  46. weatherdb/db/fixtures/RichterParameters.json +34 -0
  47. weatherdb/db/models.py +402 -0
  48. weatherdb/db/queries/get_quotient.py +155 -0
  49. weatherdb/db/views.py +165 -0
  50. weatherdb/station/GroupStation.py +710 -0
  51. weatherdb/station/StationBases.py +3108 -0
  52. weatherdb/station/StationET.py +111 -0
  53. weatherdb/station/StationP.py +807 -0
  54. weatherdb/station/StationPD.py +98 -0
  55. weatherdb/station/StationT.py +164 -0
  56. weatherdb/station/__init__.py +13 -0
  57. weatherdb/station/constants.py +21 -0
  58. weatherdb/stations/GroupStations.py +519 -0
  59. weatherdb/stations/StationsBase.py +1021 -0
  60. weatherdb/stations/StationsBaseTET.py +30 -0
  61. weatherdb/stations/StationsET.py +17 -0
  62. weatherdb/stations/StationsP.py +128 -0
  63. weatherdb/stations/StationsPD.py +24 -0
  64. weatherdb/stations/StationsT.py +21 -0
  65. weatherdb/stations/__init__.py +11 -0
  66. weatherdb/utils/TimestampPeriod.py +369 -0
  67. weatherdb/utils/__init__.py +3 -0
  68. weatherdb/utils/dwd.py +350 -0
  69. weatherdb/utils/geometry.py +69 -0
  70. weatherdb/utils/get_data.py +285 -0
  71. weatherdb/utils/logging.py +126 -0
  72. weatherdb-1.1.0.dist-info/LICENSE +674 -0
  73. weatherdb-1.1.0.dist-info/METADATA +765 -0
  74. weatherdb-1.1.0.dist-info/RECORD +77 -0
  75. weatherdb-1.1.0.dist-info/WHEEL +5 -0
  76. weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
  77. weatherdb-1.1.0.dist-info/top_level.txt +3 -0
weatherdb/utils/dwd.py ADDED
@@ -0,0 +1,350 @@
1
+ """
2
+ Some utilities functions to get data from the DWD-CDC server.
3
+
4
+ Based on `max_fun` package on https://github.com/maxschmi/max_fun
5
+ Created by Max Schmit, 2021
6
+ """
7
+ # libraries
8
+ import dateutil
9
+ import ftplib
10
+ import pathlib
11
+ import geopandas as gpd
12
+ import pandas as pd
13
+ from zipfile import ZipFile
14
+ import re
15
+ from io import BytesIO, StringIO
16
+ import traceback
17
+ import logging
18
+ import time
19
+ import random
20
+
21
+ # DWD - CDC FTP Server
22
+ CDC_HOST = "opendata.dwd.de"
23
+
24
+ # logger
25
+ log = logging.getLogger(__name__)
26
+
27
+ # basic functions
28
+ # ----------------
29
+ def dwd_id_to_str(id):
30
+ """
31
+ Convert a station id to normal DWD format as str.
32
+
33
+ Parameters
34
+ ----------
35
+ id : int or str
36
+ The id of the station.
37
+
38
+ Returns
39
+ -------
40
+ str
41
+ string of normal DWD Station id.
42
+
43
+ """
44
+ return f"{id:0>5}"
45
+
46
+ def _dwd_date_parser(date_ser):
47
+ """
48
+ Parse the dates from a DWD table to datetime.
49
+
50
+ Parameters
51
+ ----------
52
+ date_ser : pd.Series of str or str
53
+ the string from the DWD table. e.g. "20200101" or "2020010112"
54
+
55
+ Returns
56
+ -------
57
+ datetime.datetime
58
+ The date as datetime.
59
+
60
+ """
61
+ if not isinstance(date_ser, pd.Series):
62
+ raise ValueError("date_str must be a pd.Series of str")
63
+
64
+ # test if list or single str
65
+ char_num = len(date_ser.iloc[0])
66
+
67
+ # parse to correct datetime
68
+ if char_num == 8:
69
+ return pd.to_datetime(date_ser, format='%Y%m%d')
70
+ elif char_num == 10:
71
+ return pd.to_datetime(date_ser, format='%Y%m%d%H')
72
+ elif char_num == 12:
73
+ return pd.to_datetime(date_ser, format='%Y%m%d%H%M')
74
+ else:
75
+ raise ValueError("there was an error while converting the following to a correct datetime"+
76
+ date_ser.head())
77
+
78
+ # functions
79
+ # ---------
80
+ def get_ftp_file_list(ftp_conn, ftp_folders):
81
+ """Get a list of files in the folders with their modification dates.
82
+
83
+ Parameters
84
+ ----------
85
+ ftp_conn : ftplib.FTP
86
+ Ftp connection.
87
+ ftp_folders : list of str or pathlike object
88
+ The directories on the ftp server to look for files.
89
+
90
+ Returns
91
+ -------
92
+ list of tuples of strs
93
+ A list of Tuples. Every tuple stands for one file.
94
+ The tuple consists of (filepath, modification date).
95
+ """
96
+ # check types
97
+ if isinstance(ftp_folders, str):
98
+ ftp_folders = [ftp_folders]
99
+ for i, ftp_folder in enumerate(ftp_folders):
100
+ if isinstance(ftp_folder, pathlib.Path):
101
+ ftp_folders[i] = ftp_folder.as_posix()
102
+
103
+ try:
104
+ ftp_conn.voidcmd("NOOP")
105
+ except ftplib.all_errors:
106
+ ftp_conn.connect()
107
+
108
+ # get files and modification dates
109
+ files = []
110
+ for ftp_folder in ftp_folders:
111
+ lines = []
112
+ ftp_conn.dir(ftp_folder, lines.append)
113
+ for line in lines:
114
+ parts = line.split(maxsplit=9)
115
+ filepath = ftp_folder + parts[8]
116
+ modtime = dateutil.parser.parse(parts[5] + " " + parts[6] + " " + parts[7])
117
+ files.append((filepath, modtime))
118
+
119
+ return files
120
+
121
+ def get_cdc_file_list(ftp_folders):
122
+ with ftplib.FTP(CDC_HOST) as ftp_con:
123
+ ftp_con.login()
124
+ files = get_ftp_file_list(ftp_con, ftp_folders)
125
+ return files
126
+
127
+ def get_dwd_file(zip_filepath):
128
+ """
129
+ Get a DataFrame from one single (zip-)file from the DWD FTP server.
130
+
131
+ Parameters
132
+ ----------
133
+ zip_filepath : str
134
+ Path to the file on the server. e.g.
135
+ - "/climate_environment/CDC/observations_germany/climate/10_minutes/air_temperature/recent/10minutenwerte_TU_00044_akt.zip"
136
+ - "/climate_environment/CDC/derived_germany/soil/daily/historical/derived_germany_soil_daily_historical_73.txt.gz"
137
+
138
+ Returns
139
+ -------
140
+ pandas.DataFrame
141
+ The DataFrame of the selected file in the zip folder.
142
+
143
+ """
144
+ # get the compressed folder from dwd
145
+ with ftplib.FTP(CDC_HOST) as ftp:
146
+ ftp.login()
147
+
148
+ # download file
149
+ compressed_bin = BytesIO()
150
+ num_tried = 0
151
+ while num_tried < 10:
152
+ try:
153
+ ftp.retrbinary("RETR " + zip_filepath, compressed_bin.write)
154
+ break
155
+ except Exception as e:
156
+ if num_tried < 9:
157
+ num_tried += 1
158
+ time.sleep(random.randint(0,400)/100)
159
+ else:
160
+ raise e
161
+
162
+ # check folder to be derived or observation type import the data
163
+ if re.search("observations", zip_filepath):
164
+ # get zip folder and files
165
+ compressed_folder = ZipFile(compressed_bin)
166
+ compressed_folder_files = compressed_folder.namelist()
167
+
168
+ # test if one and only one file matches the pattern
169
+ files = list(filter(re.compile("produkt").search,
170
+ compressed_folder_files))
171
+
172
+ if len(files) == 0:
173
+ raise ValueError(
174
+ "There is no file matching the pattern: produkt " +
175
+ "in the zip files: \n- " +
176
+ "\n- ".join(compressed_folder_files))
177
+ elif len(files) > 1:
178
+ raise ValueError(
179
+ "There are more than one files matching the " +
180
+ "pattern: produkt\nin the zip file: " +
181
+ str(zip_filepath) +
182
+ "\nonly the first file is returned: " +
183
+ str(files[0]))
184
+
185
+ # extract the file from the zip folder and return it as pd.DataFrame
186
+ with compressed_folder.open(files[0]) as f:
187
+ df = pd.read_table(f, sep=";",
188
+ dtype={"Datum":str, "MESS_DATUM":str},
189
+ skipinitialspace=True,
190
+ na_values=[-999, -9999, "####", "#####", "######"])
191
+
192
+ elif re.search("derived", zip_filepath):
193
+ df = pd.read_table(f"ftp://{CDC_HOST}/{zip_filepath}",
194
+ compression="gzip",
195
+ sep=";",
196
+ skipinitialspace=True,
197
+ dtype={"Datum":str, "MESS_DATUM":str},
198
+ na_values=[-999, -9999, "####", "#####", "######"])
199
+ else:
200
+ raise ImportError("ERROR: No file could be imported, as there is " +
201
+ "just a setup for observation and derived datas")
202
+
203
+ # convert dates to datetime
204
+ for col in ["MESS_DATUM", "Datum"]:
205
+ if col in df.columns:
206
+ df[col] = _dwd_date_parser(df[col])
207
+
208
+ return df
209
+
210
+ def get_dwd_meta(ftp_folder):
211
+ """
212
+ Get the meta file from the ftp_folder on the DWD server.
213
+
214
+ Downloads the meta file of a given folder.
215
+ Corrects the meta file of missing files. So if no file for the station is
216
+ in the folder the meta entry gets deleted.
217
+ Reset "von_datum" in meta file if there is a biger gap than max_hole_d.
218
+ Delets entries with less years than min_years.
219
+
220
+ Parameters
221
+ ----------
222
+ ftp_folder : str
223
+ The path to the directory where to search for the meta file.
224
+ e.g. "climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/".
225
+
226
+ Returns
227
+ -------
228
+ geopandas.GeoDataFrame
229
+ a GeoDataFrame of the meta file
230
+
231
+ """
232
+ # open ftp connection and get list of files in folder
233
+ with ftplib.FTP(CDC_HOST) as ftp:
234
+ ftp.login()
235
+
236
+ # get and check the meta_file name
237
+ ftp_files = ftp.nlst(ftp_folder)
238
+ pattern = r".*(?<!_mn4)((_stations_list)|(_Beschreibung_Stationen))+.txt$"
239
+ meta_file = list(filter(re.compile(pattern).match, ftp_files))
240
+
241
+ if len(meta_file) == 0:
242
+ log.info(
243
+ f"There is no file matching the pattern '{pattern}'"+
244
+ f"\nin the folder: ftp://{CDC_HOST}/{str(ftp_folder)}")
245
+ return None
246
+ elif len(meta_file) > 1:
247
+ log.info(
248
+ f"There are more than one files matching the pattern: {pattern}" +
249
+ f" in the folder:\nftp://{CDC_HOST}/{str(ftp_folder)}" +
250
+ f"\nonly the first file is returned: {meta_file[0]}")
251
+
252
+ # import meta file
253
+ try:
254
+ if re.search("observations", ftp_folder):
255
+ with ftplib.FTP(CDC_HOST) as ftp:
256
+ ftp.login()
257
+ with BytesIO() as bio, StringIO() as sio:
258
+ ftp.retrbinary("RETR " + meta_file[0], bio.write)
259
+ sio.write(bio.getvalue().decode("WINDOWS-1252").replace("\r\n", "\n"))
260
+ colnames = sio.getvalue().split("\n")[0].split()
261
+ sio.seek(0)
262
+ meta = pd.read_table(
263
+ sio,
264
+ skiprows=2,
265
+ lineterminator="\n",
266
+ sep=r"\s{2,}|(?<=\d|\))\s{1}(?=[\w])", # two or more white spaces or one space after digit and followed by word
267
+ names=colnames,
268
+ parse_dates=[col for col in colnames if "datum" in col.lower()],
269
+ index_col="Stations_id",
270
+ engine="python")
271
+ elif re.search("derived", ftp_folder):
272
+ meta = pd.read_table("ftp://opendata.dwd.de/" + meta_file[0],
273
+ encoding="WINDOWS-1252", sep=";", skiprows=1,
274
+ names=["Stations_id", "Stationshoehe",
275
+ "geoBreite", "geoLaenge",
276
+ "Stationsname", "Bundesland"],
277
+ index_col="Stations_id"
278
+ )
279
+ except:
280
+ traceback.print_exc()
281
+ print("URL Error: The URL could not be found:\n" +
282
+ "ftp://opendata.dwd.de/" + meta_file[0])
283
+ return None
284
+
285
+ try:
286
+ meta = gpd.GeoDataFrame(meta,
287
+ geometry=gpd.points_from_xy(meta.geoLaenge,
288
+ meta.geoBreite,
289
+ crs="EPSG:4326"))
290
+ meta = meta.drop(["geoLaenge", "geoBreite"], axis=1)
291
+ except:
292
+ traceback.print_exc()
293
+ print("Error while converting DataFrame to GeoDataFrame," +
294
+ " maybe the columns aren't named 'geoLaenge' and geoBreite'" +
295
+ "\nhere is the header of the DataFrame:\n")
296
+ print(meta.head())
297
+ return None
298
+
299
+ # delete entries where there is no file in the ftp-folder
300
+ rows_drop = []
301
+ str_ftp_files = str(ftp_files)
302
+ for i, row in meta.iterrows():
303
+ if not (re.search(r"[_\.]" + dwd_id_to_str(i) + r"[_\.]|" +
304
+ r"[_\.]" + str(i) + r"[_\.]", str_ftp_files)):
305
+ rows_drop.append(i)
306
+ meta = meta.drop(rows_drop)
307
+
308
+ # change meta date entries if the file has a different date
309
+ if ("observation" in ftp_folder) \
310
+ and ("bis_datum" and "von_datum" in meta) \
311
+ and ("recent" not in ftp_folder):
312
+ zip_files = list(filter(re.compile(r".+\d+_\d+_\d+_hist.zip").match,
313
+ ftp_files))
314
+ zip_files.sort()
315
+ zip_files.append(zip_files[0]) # else the last entry won't get tested
316
+ last_sid, last_from_date, last_to_date = None, None, None
317
+
318
+ for zip_file in zip_files:
319
+ # get new files dates
320
+ filename = zip_file.split("/")[-1]
321
+ _, kind, sid, from_date, to_date, _ = filename.split("_")
322
+ if kind in ["niedereder"]:
323
+ continue
324
+ from_date = pd.Timestamp(from_date)
325
+ to_date = pd.Timestamp(to_date)
326
+ sid = int(sid)
327
+
328
+ # compare with previous file's dates
329
+ if last_sid and (sid == last_sid):
330
+ last_to_date = to_date
331
+ else:
332
+ # compare last values with meta file dates
333
+ if last_sid and (last_sid in meta.index):
334
+ if last_from_date > meta.loc[last_sid, "von_datum"]:
335
+ meta.loc[last_sid, "von_datum"] = last_from_date
336
+ if last_to_date < meta.loc[last_sid, "bis_datum"]:
337
+ meta.loc[last_sid, "bis_datum"] = last_to_date
338
+
339
+ # set values as last values
340
+ last_to_date = to_date
341
+ last_from_date = from_date
342
+ last_sid = sid
343
+
344
+ # trim whitespace in string columns
345
+ for dtype, col in zip(meta.dtypes, meta.columns):
346
+ if pd.api.types.is_string_dtype(dtype) and col != "geometry":
347
+ meta[col] = meta[col].str.strip()
348
+
349
+ # return
350
+ return meta
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """A collection of geometry functions.
4
+
5
+ Based on `max_fun` package on https://github.com/maxschmi/max_fun
6
+ Created by Max Schmit, 2021
7
+ """
8
+ # libraries
9
+ import numpy as np
10
+ from shapely.geometry import Point, LineString
11
+ import geopandas as gpd
12
+ import rasterio as rio
13
+
14
+ # functions
15
+ def polar_line(center_xy, radius, angle):
16
+ """Create a LineString with polar coodinates.
17
+
18
+ Parameters
19
+ ----------
20
+ center_xy : list, array or tuple of int or floats
21
+ The X and Y coordinates of the center.
22
+ radius : int or float
23
+ The radius of the circle.
24
+ angle : int
25
+ The angle of the portion of the circle in degrees.
26
+ 0 means east.
27
+
28
+ Returns
29
+ -------
30
+ shapely.geometry.LineString
31
+ LineString.
32
+ """
33
+ coords = [center_xy]
34
+ coords.append([
35
+ center_xy[0] + np.cos(np.deg2rad(angle)) * radius,
36
+ center_xy[1] + np.sin(np.deg2rad(angle)) * radius
37
+ ])
38
+
39
+ return LineString(coords)
40
+
41
+ def raster2points(raster_np, transform, crs=None):
42
+ """Polygonize raster array to GeoDataFrame.
43
+
44
+ Until now this only works for rasters with one band.
45
+
46
+ Parameters
47
+ ----------
48
+ raster_np : np.array
49
+ The imported raster array.
50
+ transform : rio.Affine
51
+ The Affine transformation of the raster.
52
+ crs : str or crs-type, optional
53
+ The coordinate reference system for the raster, by default None
54
+
55
+ Returns
56
+ -------
57
+ geopandas.GeoDataFrame
58
+ The raster Data is in the data column.
59
+ """
60
+ mask = ~np.isnan(raster_np[0])
61
+ cols, rows = mask.nonzero()
62
+ coords = rio.transform.xy(transform, cols, rows)
63
+
64
+ geoms = [Point(xy) for xy in list(zip(*coords))]
65
+
66
+ return gpd.GeoDataFrame(
67
+ {"data": raster_np[0][mask]},
68
+ geometry=geoms,
69
+ crs=crs)