weatherdb 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docker/Dockerfile +30 -0
 - docker/docker-compose.yaml +58 -0
 - docker/docker-compose_test.yaml +24 -0
 - docker/start-docker-test.sh +6 -0
 - docs/requirements.txt +10 -0
 - docs/source/Changelog.md +2 -0
 - docs/source/License.rst +7 -0
 - docs/source/Methode.md +161 -0
 - docs/source/_static/custom.css +8 -0
 - docs/source/_static/favicon.ico +0 -0
 - docs/source/_static/logo.png +0 -0
 - docs/source/api/api.rst +15 -0
 - docs/source/api/cli.rst +8 -0
 - docs/source/api/weatherDB.broker.rst +10 -0
 - docs/source/api/weatherDB.config.rst +7 -0
 - docs/source/api/weatherDB.db.rst +23 -0
 - docs/source/api/weatherDB.rst +22 -0
 - docs/source/api/weatherDB.station.rst +56 -0
 - docs/source/api/weatherDB.stations.rst +46 -0
 - docs/source/api/weatherDB.utils.rst +22 -0
 - docs/source/conf.py +137 -0
 - docs/source/index.rst +33 -0
 - docs/source/setup/Configuration.md +127 -0
 - docs/source/setup/Hosting.md +9 -0
 - docs/source/setup/Install.md +49 -0
 - docs/source/setup/Quickstart.md +183 -0
 - docs/source/setup/setup.rst +12 -0
 - weatherdb/__init__.py +24 -0
 - weatherdb/_version.py +1 -0
 - weatherdb/alembic/README.md +8 -0
 - weatherdb/alembic/alembic.ini +80 -0
 - weatherdb/alembic/config.py +9 -0
 - weatherdb/alembic/env.py +100 -0
 - weatherdb/alembic/script.py.mako +26 -0
 - weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
 - weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
 - weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
 - weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
 - weatherdb/broker.py +667 -0
 - weatherdb/cli.py +214 -0
 - weatherdb/config/ConfigParser.py +663 -0
 - weatherdb/config/__init__.py +5 -0
 - weatherdb/config/config_default.ini +162 -0
 - weatherdb/db/__init__.py +3 -0
 - weatherdb/db/connections.py +374 -0
 - weatherdb/db/fixtures/RichterParameters.json +34 -0
 - weatherdb/db/models.py +402 -0
 - weatherdb/db/queries/get_quotient.py +155 -0
 - weatherdb/db/views.py +165 -0
 - weatherdb/station/GroupStation.py +710 -0
 - weatherdb/station/StationBases.py +3108 -0
 - weatherdb/station/StationET.py +111 -0
 - weatherdb/station/StationP.py +807 -0
 - weatherdb/station/StationPD.py +98 -0
 - weatherdb/station/StationT.py +164 -0
 - weatherdb/station/__init__.py +13 -0
 - weatherdb/station/constants.py +21 -0
 - weatherdb/stations/GroupStations.py +519 -0
 - weatherdb/stations/StationsBase.py +1021 -0
 - weatherdb/stations/StationsBaseTET.py +30 -0
 - weatherdb/stations/StationsET.py +17 -0
 - weatherdb/stations/StationsP.py +128 -0
 - weatherdb/stations/StationsPD.py +24 -0
 - weatherdb/stations/StationsT.py +21 -0
 - weatherdb/stations/__init__.py +11 -0
 - weatherdb/utils/TimestampPeriod.py +369 -0
 - weatherdb/utils/__init__.py +3 -0
 - weatherdb/utils/dwd.py +350 -0
 - weatherdb/utils/geometry.py +69 -0
 - weatherdb/utils/get_data.py +285 -0
 - weatherdb/utils/logging.py +126 -0
 - weatherdb-1.1.0.dist-info/LICENSE +674 -0
 - weatherdb-1.1.0.dist-info/METADATA +765 -0
 - weatherdb-1.1.0.dist-info/RECORD +77 -0
 - weatherdb-1.1.0.dist-info/WHEEL +5 -0
 - weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
 - weatherdb-1.1.0.dist-info/top_level.txt +3 -0
 
    
        weatherdb/utils/dwd.py
    ADDED
    
    | 
         @@ -0,0 +1,350 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            """
         
     | 
| 
      
 2 
     | 
    
         
            +
            Some utilities functions to get data from the DWD-CDC server.
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            Based on `max_fun` package on https://github.com/maxschmi/max_fun
         
     | 
| 
      
 5 
     | 
    
         
            +
            Created by Max Schmit, 2021
         
     | 
| 
      
 6 
     | 
    
         
            +
            """
         
     | 
| 
      
 7 
     | 
    
         
            +
            # libraries
         
     | 
| 
      
 8 
     | 
    
         
            +
            import dateutil
         
     | 
| 
      
 9 
     | 
    
         
            +
            import ftplib
         
     | 
| 
      
 10 
     | 
    
         
            +
            import pathlib
         
     | 
| 
      
 11 
     | 
    
         
            +
            import geopandas as gpd
         
     | 
| 
      
 12 
     | 
    
         
            +
            import pandas as pd
         
     | 
| 
      
 13 
     | 
    
         
            +
            from zipfile import ZipFile
         
     | 
| 
      
 14 
     | 
    
         
            +
            import re
         
     | 
| 
      
 15 
     | 
    
         
            +
            from io import BytesIO, StringIO
         
     | 
| 
      
 16 
     | 
    
         
            +
            import traceback
         
     | 
| 
      
 17 
     | 
    
         
            +
            import logging
         
     | 
| 
      
 18 
     | 
    
         
            +
            import time
         
     | 
| 
      
 19 
     | 
    
         
            +
            import random
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            # DWD - CDC FTP Server
         
     | 
| 
      
 22 
     | 
    
         
            +
            CDC_HOST = "opendata.dwd.de"
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            # logger
         
     | 
| 
      
 25 
     | 
    
         
            +
            log = logging.getLogger(__name__)
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            # basic functions
         
     | 
| 
      
 28 
     | 
    
         
            +
            # ----------------
         
     | 
| 
      
 29 
     | 
    
         
            +
            def dwd_id_to_str(id):
         
     | 
| 
      
 30 
     | 
    
         
            +
                """
         
     | 
| 
      
 31 
     | 
    
         
            +
                Convert a station id to normal DWD format as str.
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 34 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 35 
     | 
    
         
            +
                id : int or str
         
     | 
| 
      
 36 
     | 
    
         
            +
                    The id of the station.
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 39 
     | 
    
         
            +
                -------
         
     | 
| 
      
 40 
     | 
    
         
            +
                str
         
     | 
| 
      
 41 
     | 
    
         
            +
                    string of normal DWD Station id.
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                """
         
     | 
| 
      
 44 
     | 
    
         
            +
                return f"{id:0>5}"
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            def _dwd_date_parser(date_ser):
         
     | 
| 
      
 47 
     | 
    
         
            +
                """
         
     | 
| 
      
 48 
     | 
    
         
            +
                Parse the dates from a DWD table to datetime.
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 51 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 52 
     | 
    
         
            +
                date_ser : pd.Series of str or str
         
     | 
| 
      
 53 
     | 
    
         
            +
                    the string from the DWD table. e.g. "20200101" or "2020010112"
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 56 
     | 
    
         
            +
                -------
         
     | 
| 
      
 57 
     | 
    
         
            +
                datetime.datetime
         
     | 
| 
      
 58 
     | 
    
         
            +
                    The date as datetime.
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                """
         
     | 
| 
      
 61 
     | 
    
         
            +
                if not isinstance(date_ser, pd.Series):
         
     | 
| 
      
 62 
     | 
    
         
            +
                    raise ValueError("date_str must be a pd.Series of str")
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                # test if list or single str
         
     | 
| 
      
 65 
     | 
    
         
            +
                char_num = len(date_ser.iloc[0])
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                # parse to correct datetime
         
     | 
| 
      
 68 
     | 
    
         
            +
                if char_num == 8:
         
     | 
| 
      
 69 
     | 
    
         
            +
                    return pd.to_datetime(date_ser, format='%Y%m%d')
         
     | 
| 
      
 70 
     | 
    
         
            +
                elif char_num == 10:
         
     | 
| 
      
 71 
     | 
    
         
            +
                    return pd.to_datetime(date_ser, format='%Y%m%d%H')
         
     | 
| 
      
 72 
     | 
    
         
            +
                elif char_num == 12:
         
     | 
| 
      
 73 
     | 
    
         
            +
                    return pd.to_datetime(date_ser, format='%Y%m%d%H%M')
         
     | 
| 
      
 74 
     | 
    
         
            +
                else:
         
     | 
| 
      
 75 
     | 
    
         
            +
                    raise ValueError("there was an error while converting the following  to a correct datetime"+
         
     | 
| 
      
 76 
     | 
    
         
            +
                                     date_ser.head())
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
            # functions
         
     | 
| 
      
 79 
     | 
    
         
            +
            # ---------
         
     | 
| 
      
 80 
     | 
    
         
            +
            def get_ftp_file_list(ftp_conn, ftp_folders):
         
     | 
| 
      
 81 
     | 
    
         
            +
                """Get a list of files in the folders with their modification dates.
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 84 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 85 
     | 
    
         
            +
                ftp_conn : ftplib.FTP
         
     | 
| 
      
 86 
     | 
    
         
            +
                    Ftp connection.
         
     | 
| 
      
 87 
     | 
    
         
            +
                ftp_folders : list of str or pathlike object
         
     | 
| 
      
 88 
     | 
    
         
            +
                    The directories on the ftp server to look for files.
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 91 
     | 
    
         
            +
                -------
         
     | 
| 
      
 92 
     | 
    
         
            +
                list of tuples of strs
         
     | 
| 
      
 93 
     | 
    
         
            +
                    A list of Tuples. Every tuple stands for one file.
         
     | 
| 
      
 94 
     | 
    
         
            +
                    The tuple consists of (filepath, modification date).
         
     | 
| 
      
 95 
     | 
    
         
            +
                """
         
     | 
| 
      
 96 
     | 
    
         
            +
                # check types
         
     | 
| 
      
 97 
     | 
    
         
            +
                if isinstance(ftp_folders, str):
         
     | 
| 
      
 98 
     | 
    
         
            +
                    ftp_folders = [ftp_folders]
         
     | 
| 
      
 99 
     | 
    
         
            +
                for i, ftp_folder in enumerate(ftp_folders):
         
     | 
| 
      
 100 
     | 
    
         
            +
                    if isinstance(ftp_folder, pathlib.Path):
         
     | 
| 
      
 101 
     | 
    
         
            +
                        ftp_folders[i] = ftp_folder.as_posix()
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
                try:
         
     | 
| 
      
 104 
     | 
    
         
            +
                    ftp_conn.voidcmd("NOOP")
         
     | 
| 
      
 105 
     | 
    
         
            +
                except ftplib.all_errors:
         
     | 
| 
      
 106 
     | 
    
         
            +
                    ftp_conn.connect()
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                # get files and modification dates
         
     | 
| 
      
 109 
     | 
    
         
            +
                files = []
         
     | 
| 
      
 110 
     | 
    
         
            +
                for ftp_folder in ftp_folders:
         
     | 
| 
      
 111 
     | 
    
         
            +
                    lines = []
         
     | 
| 
      
 112 
     | 
    
         
            +
                    ftp_conn.dir(ftp_folder, lines.append)
         
     | 
| 
      
 113 
     | 
    
         
            +
                    for line in lines:
         
     | 
| 
      
 114 
     | 
    
         
            +
                        parts = line.split(maxsplit=9)
         
     | 
| 
      
 115 
     | 
    
         
            +
                        filepath = ftp_folder + parts[8]
         
     | 
| 
      
 116 
     | 
    
         
            +
                        modtime = dateutil.parser.parse(parts[5] + " " + parts[6] + " " + parts[7])
         
     | 
| 
      
 117 
     | 
    
         
            +
                        files.append((filepath, modtime))
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                return files
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
            def get_cdc_file_list(ftp_folders):
         
     | 
| 
      
 122 
     | 
    
         
            +
                with ftplib.FTP(CDC_HOST) as ftp_con:
         
     | 
| 
      
 123 
     | 
    
         
            +
                    ftp_con.login()
         
     | 
| 
      
 124 
     | 
    
         
            +
                    files = get_ftp_file_list(ftp_con, ftp_folders)
         
     | 
| 
      
 125 
     | 
    
         
            +
                return files
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
            def get_dwd_file(zip_filepath):
         
     | 
| 
      
 128 
     | 
    
         
            +
                """
         
     | 
| 
      
 129 
     | 
    
         
            +
                Get a DataFrame from one single (zip-)file from the DWD FTP server.
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 132 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 133 
     | 
    
         
            +
                zip_filepath : str
         
     | 
| 
      
 134 
     | 
    
         
            +
                    Path to the file on the server. e.g.
         
     | 
| 
      
 135 
     | 
    
         
            +
                    - "/climate_environment/CDC/observations_germany/climate/10_minutes/air_temperature/recent/10minutenwerte_TU_00044_akt.zip"
         
     | 
| 
      
 136 
     | 
    
         
            +
                    - "/climate_environment/CDC/derived_germany/soil/daily/historical/derived_germany_soil_daily_historical_73.txt.gz"
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 139 
     | 
    
         
            +
                -------
         
     | 
| 
      
 140 
     | 
    
         
            +
                pandas.DataFrame
         
     | 
| 
      
 141 
     | 
    
         
            +
                    The DataFrame of the selected file in the zip folder.
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                """
         
     | 
| 
      
 144 
     | 
    
         
            +
                # get the compressed folder from dwd
         
     | 
| 
      
 145 
     | 
    
         
            +
                with ftplib.FTP(CDC_HOST) as ftp:
         
     | 
| 
      
 146 
     | 
    
         
            +
                    ftp.login()
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                    # download file
         
     | 
| 
      
 149 
     | 
    
         
            +
                    compressed_bin = BytesIO()
         
     | 
| 
      
 150 
     | 
    
         
            +
                    num_tried = 0
         
     | 
| 
      
 151 
     | 
    
         
            +
                    while num_tried < 10:
         
     | 
| 
      
 152 
     | 
    
         
            +
                        try:
         
     | 
| 
      
 153 
     | 
    
         
            +
                            ftp.retrbinary("RETR " + zip_filepath, compressed_bin.write)
         
     | 
| 
      
 154 
     | 
    
         
            +
                            break
         
     | 
| 
      
 155 
     | 
    
         
            +
                        except Exception as e:
         
     | 
| 
      
 156 
     | 
    
         
            +
                            if num_tried < 9:
         
     | 
| 
      
 157 
     | 
    
         
            +
                                num_tried += 1
         
     | 
| 
      
 158 
     | 
    
         
            +
                                time.sleep(random.randint(0,400)/100)
         
     | 
| 
      
 159 
     | 
    
         
            +
                            else:
         
     | 
| 
      
 160 
     | 
    
         
            +
                                raise e
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
                # check folder to be derived or observation type import the data
         
     | 
| 
      
 163 
     | 
    
         
            +
                if re.search("observations", zip_filepath):
         
     | 
| 
      
 164 
     | 
    
         
            +
                    # get zip folder and files
         
     | 
| 
      
 165 
     | 
    
         
            +
                    compressed_folder = ZipFile(compressed_bin)
         
     | 
| 
      
 166 
     | 
    
         
            +
                    compressed_folder_files = compressed_folder.namelist()
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
                    # test if one and only one file matches the pattern
         
     | 
| 
      
 169 
     | 
    
         
            +
                    files = list(filter(re.compile("produkt").search,
         
     | 
| 
      
 170 
     | 
    
         
            +
                                        compressed_folder_files))
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
                    if len(files) == 0:
         
     | 
| 
      
 173 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 174 
     | 
    
         
            +
                            "There is no file matching the pattern: produkt " +
         
     | 
| 
      
 175 
     | 
    
         
            +
                            "in the zip files: \n- " +
         
     | 
| 
      
 176 
     | 
    
         
            +
                            "\n- ".join(compressed_folder_files))
         
     | 
| 
      
 177 
     | 
    
         
            +
                    elif len(files) > 1:
         
     | 
| 
      
 178 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 179 
     | 
    
         
            +
                            "There are more than one files matching the " +
         
     | 
| 
      
 180 
     | 
    
         
            +
                            "pattern: produkt\nin the zip file: " +
         
     | 
| 
      
 181 
     | 
    
         
            +
                            str(zip_filepath) +
         
     | 
| 
      
 182 
     | 
    
         
            +
                            "\nonly the first file is returned: " +
         
     | 
| 
      
 183 
     | 
    
         
            +
                            str(files[0]))
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                    # extract the file from the zip folder and return it as pd.DataFrame
         
     | 
| 
      
 186 
     | 
    
         
            +
                    with compressed_folder.open(files[0]) as f:
         
     | 
| 
      
 187 
     | 
    
         
            +
                        df = pd.read_table(f, sep=";",
         
     | 
| 
      
 188 
     | 
    
         
            +
                                         dtype={"Datum":str, "MESS_DATUM":str},
         
     | 
| 
      
 189 
     | 
    
         
            +
                                           skipinitialspace=True,
         
     | 
| 
      
 190 
     | 
    
         
            +
                                           na_values=[-999, -9999, "####", "#####", "######"])
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
                elif re.search("derived", zip_filepath):
         
     | 
| 
      
 193 
     | 
    
         
            +
                    df = pd.read_table(f"ftp://{CDC_HOST}/{zip_filepath}",
         
     | 
| 
      
 194 
     | 
    
         
            +
                                         compression="gzip",
         
     | 
| 
      
 195 
     | 
    
         
            +
                                         sep=";",
         
     | 
| 
      
 196 
     | 
    
         
            +
                                         skipinitialspace=True,
         
     | 
| 
      
 197 
     | 
    
         
            +
                                         dtype={"Datum":str, "MESS_DATUM":str},
         
     | 
| 
      
 198 
     | 
    
         
            +
                                         na_values=[-999, -9999, "####", "#####", "######"])
         
     | 
| 
      
 199 
     | 
    
         
            +
                else:
         
     | 
| 
      
 200 
     | 
    
         
            +
                    raise ImportError("ERROR: No file could be imported, as there is " +
         
     | 
| 
      
 201 
     | 
    
         
            +
                                      "just a setup for observation and derived datas")
         
     | 
| 
      
 202 
     | 
    
         
            +
             
     | 
| 
      
 203 
     | 
    
         
            +
                # convert dates to datetime
         
     | 
| 
      
 204 
     | 
    
         
            +
                for col in ["MESS_DATUM", "Datum"]:
         
     | 
| 
      
 205 
     | 
    
         
            +
                    if col in df.columns:
         
     | 
| 
      
 206 
     | 
    
         
            +
                        df[col] = _dwd_date_parser(df[col])
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                return df
         
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
            def get_dwd_meta(ftp_folder):
         
     | 
| 
      
 211 
     | 
    
         
            +
                """
         
     | 
| 
      
 212 
     | 
    
         
            +
                Get the meta file from the ftp_folder on the DWD server.
         
     | 
| 
      
 213 
     | 
    
         
            +
             
     | 
| 
      
 214 
     | 
    
         
            +
                Downloads the meta file of a given folder.
         
     | 
| 
      
 215 
     | 
    
         
            +
                Corrects the meta file of missing files. So if no file for the station is
         
     | 
| 
      
 216 
     | 
    
         
            +
                in the folder the meta entry gets deleted.
         
     | 
| 
      
 217 
     | 
    
         
            +
                Reset "von_datum" in meta file if there is a biger gap than max_hole_d.
         
     | 
| 
      
 218 
     | 
    
         
            +
                Delets entries with less years than min_years.
         
     | 
| 
      
 219 
     | 
    
         
            +
             
     | 
| 
      
 220 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 221 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 222 
     | 
    
         
            +
                ftp_folder : str
         
     | 
| 
      
 223 
     | 
    
         
            +
                    The path to the directory where to search for the meta file.
         
     | 
| 
      
 224 
     | 
    
         
            +
                    e.g. "climate_environment/CDC/observations_germany/climate/hourly/precipitation/recent/".
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 227 
     | 
    
         
            +
                -------
         
     | 
| 
      
 228 
     | 
    
         
            +
                geopandas.GeoDataFrame
         
     | 
| 
      
 229 
     | 
    
         
            +
                    a GeoDataFrame of the meta file
         
     | 
| 
      
 230 
     | 
    
         
            +
             
     | 
| 
      
 231 
     | 
    
         
            +
                """
         
     | 
| 
      
 232 
     | 
    
         
            +
                # open ftp connection and get list of files in folder
         
     | 
| 
      
 233 
     | 
    
         
            +
                with ftplib.FTP(CDC_HOST) as ftp:
         
     | 
| 
      
 234 
     | 
    
         
            +
                    ftp.login()
         
     | 
| 
      
 235 
     | 
    
         
            +
             
     | 
| 
      
 236 
     | 
    
         
            +
                    # get and check the meta_file name
         
     | 
| 
      
 237 
     | 
    
         
            +
                    ftp_files = ftp.nlst(ftp_folder)
         
     | 
| 
      
 238 
     | 
    
         
            +
                    pattern = r".*(?<!_mn4)((_stations_list)|(_Beschreibung_Stationen))+.txt$"
         
     | 
| 
      
 239 
     | 
    
         
            +
                    meta_file = list(filter(re.compile(pattern).match, ftp_files))
         
     | 
| 
      
 240 
     | 
    
         
            +
             
     | 
| 
      
 241 
     | 
    
         
            +
                if len(meta_file) == 0:
         
     | 
| 
      
 242 
     | 
    
         
            +
                    log.info(
         
     | 
| 
      
 243 
     | 
    
         
            +
                        f"There is no file matching the pattern '{pattern}'"+
         
     | 
| 
      
 244 
     | 
    
         
            +
                        f"\nin the folder: ftp://{CDC_HOST}/{str(ftp_folder)}")
         
     | 
| 
      
 245 
     | 
    
         
            +
                    return None
         
     | 
| 
      
 246 
     | 
    
         
            +
                elif len(meta_file) > 1:
         
     | 
| 
      
 247 
     | 
    
         
            +
                    log.info(
         
     | 
| 
      
 248 
     | 
    
         
            +
                        f"There are more than one files matching the pattern: {pattern}" +
         
     | 
| 
      
 249 
     | 
    
         
            +
                        f" in the folder:\nftp://{CDC_HOST}/{str(ftp_folder)}" +
         
     | 
| 
      
 250 
     | 
    
         
            +
                        f"\nonly the first file is returned: {meta_file[0]}")
         
     | 
| 
      
 251 
     | 
    
         
            +
             
     | 
| 
      
 252 
     | 
    
         
            +
                # import meta file
         
     | 
| 
      
 253 
     | 
    
         
            +
                try:
         
     | 
| 
      
 254 
     | 
    
         
            +
                    if re.search("observations", ftp_folder):
         
     | 
| 
      
 255 
     | 
    
         
            +
                        with ftplib.FTP(CDC_HOST) as ftp:
         
     | 
| 
      
 256 
     | 
    
         
            +
                            ftp.login()
         
     | 
| 
      
 257 
     | 
    
         
            +
                            with BytesIO() as bio, StringIO() as sio:
         
     | 
| 
      
 258 
     | 
    
         
            +
                                ftp.retrbinary("RETR " + meta_file[0], bio.write)
         
     | 
| 
      
 259 
     | 
    
         
            +
                                sio.write(bio.getvalue().decode("WINDOWS-1252").replace("\r\n", "\n"))
         
     | 
| 
      
 260 
     | 
    
         
            +
                                colnames = sio.getvalue().split("\n")[0].split()
         
     | 
| 
      
 261 
     | 
    
         
            +
                                sio.seek(0)
         
     | 
| 
      
 262 
     | 
    
         
            +
                                meta = pd.read_table(
         
     | 
| 
      
 263 
     | 
    
         
            +
                                    sio,
         
     | 
| 
      
 264 
     | 
    
         
            +
                                    skiprows=2,
         
     | 
| 
      
 265 
     | 
    
         
            +
                                    lineterminator="\n",
         
     | 
| 
      
 266 
     | 
    
         
            +
                                    sep=r"\s{2,}|(?<=\d|\))\s{1}(?=[\w])",  # two or more white spaces or one space after digit and followed by word
         
     | 
| 
      
 267 
     | 
    
         
            +
                                    names=colnames,
         
     | 
| 
      
 268 
     | 
    
         
            +
                                    parse_dates=[col for col in colnames if "datum" in col.lower()],
         
     | 
| 
      
 269 
     | 
    
         
            +
                                    index_col="Stations_id",
         
     | 
| 
      
 270 
     | 
    
         
            +
                                    engine="python")
         
     | 
| 
      
 271 
     | 
    
         
            +
                    elif re.search("derived", ftp_folder):
         
     | 
| 
      
 272 
     | 
    
         
            +
                        meta = pd.read_table("ftp://opendata.dwd.de/" + meta_file[0],
         
     | 
| 
      
 273 
     | 
    
         
            +
                                             encoding="WINDOWS-1252", sep=";", skiprows=1,
         
     | 
| 
      
 274 
     | 
    
         
            +
                                             names=["Stations_id", "Stationshoehe",
         
     | 
| 
      
 275 
     | 
    
         
            +
                                                    "geoBreite", "geoLaenge",
         
     | 
| 
      
 276 
     | 
    
         
            +
                                                    "Stationsname", "Bundesland"],
         
     | 
| 
      
 277 
     | 
    
         
            +
                                             index_col="Stations_id"
         
     | 
| 
      
 278 
     | 
    
         
            +
                                             )
         
     | 
| 
      
 279 
     | 
    
         
            +
                except:
         
     | 
| 
      
 280 
     | 
    
         
            +
                    traceback.print_exc()
         
     | 
| 
      
 281 
     | 
    
         
            +
                    print("URL Error: The URL could not be found:\n" +
         
     | 
| 
      
 282 
     | 
    
         
            +
                          "ftp://opendata.dwd.de/" + meta_file[0])
         
     | 
| 
      
 283 
     | 
    
         
            +
                    return None
         
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
      
 285 
     | 
    
         
            +
                try:
         
     | 
| 
      
 286 
     | 
    
         
            +
                    meta = gpd.GeoDataFrame(meta,
         
     | 
| 
      
 287 
     | 
    
         
            +
                                            geometry=gpd.points_from_xy(meta.geoLaenge,
         
     | 
| 
      
 288 
     | 
    
         
            +
                                                                        meta.geoBreite,
         
     | 
| 
      
 289 
     | 
    
         
            +
                                                                        crs="EPSG:4326"))
         
     | 
| 
      
 290 
     | 
    
         
            +
                    meta = meta.drop(["geoLaenge", "geoBreite"], axis=1)
         
     | 
| 
      
 291 
     | 
    
         
            +
                except:
         
     | 
| 
      
 292 
     | 
    
         
            +
                    traceback.print_exc()
         
     | 
| 
      
 293 
     | 
    
         
            +
                    print("Error while converting DataFrame to GeoDataFrame," +
         
     | 
| 
      
 294 
     | 
    
         
            +
                          " maybe the columns aren't named 'geoLaenge' and geoBreite'" +
         
     | 
| 
      
 295 
     | 
    
         
            +
                          "\nhere is the header of the DataFrame:\n")
         
     | 
| 
      
 296 
     | 
    
         
            +
                    print(meta.head())
         
     | 
| 
      
 297 
     | 
    
         
            +
                    return None
         
     | 
| 
      
 298 
     | 
    
         
            +
             
     | 
| 
      
 299 
     | 
    
         
            +
                # delete entries where there is no file in the ftp-folder
         
     | 
| 
      
 300 
     | 
    
         
            +
                rows_drop = []
         
     | 
| 
      
 301 
     | 
    
         
            +
                str_ftp_files = str(ftp_files)
         
     | 
| 
      
 302 
     | 
    
         
            +
                for i, row in meta.iterrows():
         
     | 
| 
      
 303 
     | 
    
         
            +
                    if not (re.search(r"[_\.]" + dwd_id_to_str(i) + r"[_\.]|" +
         
     | 
| 
      
 304 
     | 
    
         
            +
                                      r"[_\.]" + str(i) + r"[_\.]", str_ftp_files)):
         
     | 
| 
      
 305 
     | 
    
         
            +
                        rows_drop.append(i)
         
     | 
| 
      
 306 
     | 
    
         
            +
                meta = meta.drop(rows_drop)
         
     | 
| 
      
 307 
     | 
    
         
            +
             
     | 
| 
      
 308 
     | 
    
         
            +
                # change meta date entries if the file has a different date
         
     | 
| 
      
 309 
     | 
    
         
            +
                if ("observation" in ftp_folder) \
         
     | 
| 
      
 310 
     | 
    
         
            +
                        and ("bis_datum" and "von_datum" in meta) \
         
     | 
| 
      
 311 
     | 
    
         
            +
                        and ("recent" not in ftp_folder):
         
     | 
| 
      
 312 
     | 
    
         
            +
                    zip_files = list(filter(re.compile(r".+\d+_\d+_\d+_hist.zip").match,
         
     | 
| 
      
 313 
     | 
    
         
            +
                                            ftp_files))
         
     | 
| 
      
 314 
     | 
    
         
            +
                    zip_files.sort()
         
     | 
| 
      
 315 
     | 
    
         
            +
                    zip_files.append(zip_files[0])  # else the last entry won't get tested
         
     | 
| 
      
 316 
     | 
    
         
            +
                    last_sid, last_from_date, last_to_date = None, None, None
         
     | 
| 
      
 317 
     | 
    
         
            +
             
     | 
| 
      
 318 
     | 
    
         
            +
                    for zip_file in zip_files:
         
     | 
| 
      
 319 
     | 
    
         
            +
                        # get new files dates
         
     | 
| 
      
 320 
     | 
    
         
            +
                        filename = zip_file.split("/")[-1]
         
     | 
| 
      
 321 
     | 
    
         
            +
                        _, kind, sid, from_date, to_date, _ = filename.split("_")
         
     | 
| 
      
 322 
     | 
    
         
            +
                        if kind in ["niedereder"]:
         
     | 
| 
      
 323 
     | 
    
         
            +
                            continue
         
     | 
| 
      
 324 
     | 
    
         
            +
                        from_date = pd.Timestamp(from_date)
         
     | 
| 
      
 325 
     | 
    
         
            +
                        to_date = pd.Timestamp(to_date)
         
     | 
| 
      
 326 
     | 
    
         
            +
                        sid = int(sid)
         
     | 
| 
      
 327 
     | 
    
         
            +
             
     | 
| 
      
 328 
     | 
    
         
            +
                        # compare with previous file's dates
         
     | 
| 
      
 329 
     | 
    
         
            +
                        if last_sid and (sid == last_sid):
         
     | 
| 
      
 330 
     | 
    
         
            +
                            last_to_date = to_date
         
     | 
| 
      
 331 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 332 
     | 
    
         
            +
                            # compare last values with meta file dates
         
     | 
| 
      
 333 
     | 
    
         
            +
                            if last_sid and (last_sid in meta.index):
         
     | 
| 
      
 334 
     | 
    
         
            +
                                if last_from_date > meta.loc[last_sid, "von_datum"]:
         
     | 
| 
      
 335 
     | 
    
         
            +
                                    meta.loc[last_sid, "von_datum"] = last_from_date
         
     | 
| 
      
 336 
     | 
    
         
            +
                                if last_to_date < meta.loc[last_sid, "bis_datum"]:
         
     | 
| 
      
 337 
     | 
    
         
            +
                                    meta.loc[last_sid, "bis_datum"] = last_to_date
         
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
      
 339 
     | 
    
         
            +
                            # set values as last values
         
     | 
| 
      
 340 
     | 
    
         
            +
                            last_to_date = to_date
         
     | 
| 
      
 341 
     | 
    
         
            +
                            last_from_date = from_date
         
     | 
| 
      
 342 
     | 
    
         
            +
                            last_sid = sid
         
     | 
| 
      
 343 
     | 
    
         
            +
             
     | 
| 
      
 344 
     | 
    
         
            +
                # trim whitespace in string columns
         
     | 
| 
      
 345 
     | 
    
         
            +
                for dtype, col in zip(meta.dtypes, meta.columns):
         
     | 
| 
      
 346 
     | 
    
         
            +
                    if pd.api.types.is_string_dtype(dtype) and col != "geometry":
         
     | 
| 
      
 347 
     | 
    
         
            +
                        meta[col] = meta[col].str.strip()
         
     | 
| 
      
 348 
     | 
    
         
            +
             
     | 
| 
      
 349 
     | 
    
         
            +
                # return
         
     | 
| 
      
 350 
     | 
    
         
            +
                return meta
         
     | 
| 
         @@ -0,0 +1,69 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env python
         
     | 
| 
      
 2 
     | 
    
         
            +
            # -*- coding: utf-8 -*-
         
     | 
| 
      
 3 
     | 
    
         
            +
            """A collection of geometry functions.
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Based on `max_fun` package on https://github.com/maxschmi/max_fun
         
     | 
| 
      
 6 
     | 
    
         
            +
            Created by Max Schmit, 2021
         
     | 
| 
      
 7 
     | 
    
         
            +
            """
         
     | 
| 
      
 8 
     | 
    
         
            +
            # libraries
         
     | 
| 
      
 9 
     | 
    
         
            +
            import numpy as np
         
     | 
| 
      
 10 
     | 
    
         
            +
            from shapely.geometry import Point, LineString
         
     | 
| 
      
 11 
     | 
    
         
            +
            import geopandas as gpd
         
     | 
| 
      
 12 
     | 
    
         
            +
            import rasterio as rio
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            # functions
         
     | 
| 
      
 15 
     | 
    
         
            +
            def polar_line(center_xy, radius, angle):
         
     | 
| 
      
 16 
     | 
    
         
            +
                """Create a LineString with polar coodinates.
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 19 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 20 
     | 
    
         
            +
                center_xy : list, array or tuple of int or floats
         
     | 
| 
      
 21 
     | 
    
         
            +
                    The X and Y coordinates of the center.
         
     | 
| 
      
 22 
     | 
    
         
            +
                radius : int or float
         
     | 
| 
      
 23 
     | 
    
         
            +
                    The radius of the circle.
         
     | 
| 
      
 24 
     | 
    
         
            +
                angle : int
         
     | 
| 
      
 25 
     | 
    
         
            +
                    The angle of the portion of the circle in degrees.
         
     | 
| 
      
 26 
     | 
    
         
            +
                    0 means east.
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 29 
     | 
    
         
            +
                -------
         
     | 
| 
      
 30 
     | 
    
         
            +
                shapely.geometry.LineString
         
     | 
| 
      
 31 
     | 
    
         
            +
                    LineString.
         
     | 
| 
      
 32 
     | 
    
         
            +
                """
         
     | 
| 
      
 33 
     | 
    
         
            +
                coords = [center_xy]
         
     | 
| 
      
 34 
     | 
    
         
            +
                coords.append([
         
     | 
| 
      
 35 
     | 
    
         
            +
                        center_xy[0] + np.cos(np.deg2rad(angle)) * radius,
         
     | 
| 
      
 36 
     | 
    
         
            +
                        center_xy[1] + np.sin(np.deg2rad(angle)) * radius
         
     | 
| 
      
 37 
     | 
    
         
            +
                    ])
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                return LineString(coords)
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            def raster2points(raster_np, transform, crs=None):
         
     | 
| 
      
 42 
     | 
    
         
            +
                """Polygonize raster array to GeoDataFrame.
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                Until now this only works for rasters with one band.
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 47 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 48 
     | 
    
         
            +
                raster_np : np.array
         
     | 
| 
      
 49 
     | 
    
         
            +
                    The imported raster array.
         
     | 
| 
      
 50 
     | 
    
         
            +
                transform : rio.Affine
         
     | 
| 
      
 51 
     | 
    
         
            +
                    The Affine transformation of the raster.
         
     | 
| 
      
 52 
     | 
    
         
            +
                crs : str or crs-type, optional
         
     | 
| 
      
 53 
     | 
    
         
            +
                    The coordinate reference system for the raster, by default None
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 56 
     | 
    
         
            +
                -------
         
     | 
| 
      
 57 
     | 
    
         
            +
                geopandas.GeoDataFrame
         
     | 
| 
      
 58 
     | 
    
         
            +
                    The raster Data is in the data column.
         
     | 
| 
      
 59 
     | 
    
         
            +
                """
         
     | 
| 
      
 60 
     | 
    
         
            +
                mask = ~np.isnan(raster_np[0])
         
     | 
| 
      
 61 
     | 
    
         
            +
                cols, rows =  mask.nonzero()
         
     | 
| 
      
 62 
     | 
    
         
            +
                coords = rio.transform.xy(transform, cols, rows)
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                geoms = [Point(xy) for xy in list(zip(*coords))]
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                return gpd.GeoDataFrame(
         
     | 
| 
      
 67 
     | 
    
         
            +
                    {"data": raster_np[0][mask]},
         
     | 
| 
      
 68 
     | 
    
         
            +
                    geometry=geoms,
         
     | 
| 
      
 69 
     | 
    
         
            +
                    crs=crs)
         
     |