weatherdb 1.1.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. docker/Dockerfile +30 -0
  2. docker/docker-compose.yaml +58 -0
  3. docker/docker-compose_test.yaml +24 -0
  4. docker/start-docker-test.sh +6 -0
  5. docs/requirements.txt +10 -0
  6. docs/source/Changelog.md +2 -0
  7. docs/source/License.rst +7 -0
  8. docs/source/Methode.md +161 -0
  9. docs/source/_static/custom.css +8 -0
  10. docs/source/_static/favicon.ico +0 -0
  11. docs/source/_static/logo.png +0 -0
  12. docs/source/api/api.rst +15 -0
  13. docs/source/api/cli.rst +8 -0
  14. docs/source/api/weatherDB.broker.rst +10 -0
  15. docs/source/api/weatherDB.config.rst +7 -0
  16. docs/source/api/weatherDB.db.rst +23 -0
  17. docs/source/api/weatherDB.rst +22 -0
  18. docs/source/api/weatherDB.station.rst +56 -0
  19. docs/source/api/weatherDB.stations.rst +46 -0
  20. docs/source/api/weatherDB.utils.rst +22 -0
  21. docs/source/conf.py +137 -0
  22. docs/source/index.rst +33 -0
  23. docs/source/setup/Configuration.md +127 -0
  24. docs/source/setup/Hosting.md +9 -0
  25. docs/source/setup/Install.md +49 -0
  26. docs/source/setup/Quickstart.md +183 -0
  27. docs/source/setup/setup.rst +12 -0
  28. weatherdb/__init__.py +24 -0
  29. weatherdb/_version.py +1 -0
  30. weatherdb/alembic/README.md +8 -0
  31. weatherdb/alembic/alembic.ini +80 -0
  32. weatherdb/alembic/config.py +9 -0
  33. weatherdb/alembic/env.py +100 -0
  34. weatherdb/alembic/script.py.mako +26 -0
  35. weatherdb/alembic/versions/V1.0.0_initial_database_creation.py +898 -0
  36. weatherdb/alembic/versions/V1.0.2_more_charachters_for_settings+term_station_ma_raster.py +88 -0
  37. weatherdb/alembic/versions/V1.0.5_fix-ma-raster-values.py +152 -0
  38. weatherdb/alembic/versions/V1.0.6_update-views.py +22 -0
  39. weatherdb/broker.py +667 -0
  40. weatherdb/cli.py +214 -0
  41. weatherdb/config/ConfigParser.py +663 -0
  42. weatherdb/config/__init__.py +5 -0
  43. weatherdb/config/config_default.ini +162 -0
  44. weatherdb/db/__init__.py +3 -0
  45. weatherdb/db/connections.py +374 -0
  46. weatherdb/db/fixtures/RichterParameters.json +34 -0
  47. weatherdb/db/models.py +402 -0
  48. weatherdb/db/queries/get_quotient.py +155 -0
  49. weatherdb/db/views.py +165 -0
  50. weatherdb/station/GroupStation.py +710 -0
  51. weatherdb/station/StationBases.py +3108 -0
  52. weatherdb/station/StationET.py +111 -0
  53. weatherdb/station/StationP.py +807 -0
  54. weatherdb/station/StationPD.py +98 -0
  55. weatherdb/station/StationT.py +164 -0
  56. weatherdb/station/__init__.py +13 -0
  57. weatherdb/station/constants.py +21 -0
  58. weatherdb/stations/GroupStations.py +519 -0
  59. weatherdb/stations/StationsBase.py +1021 -0
  60. weatherdb/stations/StationsBaseTET.py +30 -0
  61. weatherdb/stations/StationsET.py +17 -0
  62. weatherdb/stations/StationsP.py +128 -0
  63. weatherdb/stations/StationsPD.py +24 -0
  64. weatherdb/stations/StationsT.py +21 -0
  65. weatherdb/stations/__init__.py +11 -0
  66. weatherdb/utils/TimestampPeriod.py +369 -0
  67. weatherdb/utils/__init__.py +3 -0
  68. weatherdb/utils/dwd.py +350 -0
  69. weatherdb/utils/geometry.py +69 -0
  70. weatherdb/utils/get_data.py +285 -0
  71. weatherdb/utils/logging.py +126 -0
  72. weatherdb-1.1.0.dist-info/LICENSE +674 -0
  73. weatherdb-1.1.0.dist-info/METADATA +765 -0
  74. weatherdb-1.1.0.dist-info/RECORD +77 -0
  75. weatherdb-1.1.0.dist-info/WHEEL +5 -0
  76. weatherdb-1.1.0.dist-info/entry_points.txt +2 -0
  77. weatherdb-1.1.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,710 @@
1
+ # libraries
2
+ import logging
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ import warnings
6
+ import zipfile
7
+ from packaging import version
8
+ import pandas as pd
9
+
10
+ from ..utils.TimestampPeriod import TimestampPeriod
11
+ from .StationBases import StationBase, AGG_TO
12
+ from . import StationP, StationT, StationET
13
+
14
+ # set settings
15
+ # ############
16
+ __all__ = ["GroupStation"]
17
+ log = logging.getLogger(__name__)
18
+
19
+ # class definition
20
+ ##################
21
+ class GroupStation(object):
22
+ """A class to group all possible parameters of one station.
23
+
24
+ So if you want to create the input files for a simulation, where you need T, ET and N, use this class to download the data for one station.
25
+ """
26
+
27
+ def __init__(self, id, error_if_missing=True, **kwargs):
28
+ self.id = id
29
+ self.station_parts = []
30
+ self._error_if_missing = error_if_missing
31
+ for StatClass in [StationP, StationT, StationET]:
32
+ try:
33
+ self.station_parts.append(
34
+ StatClass(id=id, **kwargs)
35
+ )
36
+ except Exception as e:
37
+ if error_if_missing:
38
+ raise e
39
+ self.paras_available = [stat._para for stat in self.station_parts]
40
+
41
+ def _check_paras(self, paras):
42
+ if isinstance(paras, str) and paras != "all":
43
+ paras = [paras,]
44
+
45
+ if isinstance(paras, str) and (paras == "all"):
46
+ return self.paras_available
47
+ else:
48
+ paras_new = []
49
+ for para in paras:
50
+ if para in self.paras_available:
51
+ paras_new.append(para)
52
+ elif self._error_if_missing:
53
+ raise ValueError(
54
+ f"The parameter {para} you asked for is not available for station {self.id}")
55
+ return paras_new
56
+
57
+ @staticmethod
58
+ def _check_kinds(kinds):
59
+ # type cast kinds
60
+ if isinstance(kinds, str):
61
+ kinds = [kinds]
62
+ else:
63
+ kinds = kinds.copy()
64
+ return kinds
65
+
66
+ def get_available_paras(self, short=False):
67
+ """Get the possible parameters for this station.
68
+
69
+ Parameters
70
+ ----------
71
+ short : bool, optional
72
+ Should the short name of the parameters be returned.
73
+ The default is "long".
74
+
75
+ Returns
76
+ -------
77
+ list of str
78
+ A list of the long parameter names that are possible for this station to get.
79
+ """
80
+ paras = []
81
+ attr_name = "_para" if short else "_para_long"
82
+ for stat in self.station_parts:
83
+ paras.append(getattr(stat, attr_name))
84
+
85
+ return paras
86
+
87
+ def get_filled_period(self, kinds="best", from_meta=True, join_how="inner"):
88
+ """Get the combined filled period for all 3 stations.
89
+
90
+ This is the maximum possible timerange for these stations.
91
+
92
+ Parameters
93
+ ----------
94
+ kind : str
95
+ The data kind to look for filled period.
96
+ Must be a column in the timeseries DB.
97
+ Must be one of "raw", "qc", "filled", "adj".
98
+ If "best" is given, then depending on the parameter of the station the best kind is selected.
99
+ For Precipitation this is "corr" and for the other this is "filled".
100
+ For the precipitation also "qn" and "corr" are valid.
101
+ from_meta : bool, optional
102
+ Should the period be from the meta table?
103
+ If False: the period is returned from the timeserie. In this case this function is only a wrapper for .get_period_meta.
104
+ The default is True.
105
+ join_how : str, optional
106
+ How should the different periods get joined.
107
+ If "inner" then the minimal period that is inside of all the filled_periods is returned.
108
+ If "outer" then the maximal possible period is returned.
109
+ The default is "inner".
110
+
111
+ Returns
112
+ -------
113
+ TimestampPeriod
114
+ The maximum filled period for the 3 parameters for this station.
115
+ """
116
+ kinds = self._check_kinds(kinds)
117
+ for kind in ["filled_by", "adj"]:
118
+ if kind in kinds:
119
+ kinds.remove(kind)
120
+
121
+ # get filled_period
122
+ for kind in kinds:
123
+ for stat in self.station_parts:
124
+ new_filled_period = stat.get_filled_period(
125
+ kind=kind, from_meta=from_meta)
126
+
127
+ if "filled_period" not in locals():
128
+ filled_period = new_filled_period.copy()
129
+ else:
130
+ filled_period = filled_period.union(
131
+ new_filled_period, how=join_how)
132
+
133
+ return filled_period
134
+
135
+ def get_df(self, period=(None, None), kinds="best", paras="all",
136
+ agg_to="day", nas_allowed=True, add_na_share=False,
137
+ add_t_min=False, add_t_max=False, **kwargs):
138
+ """Get a DataFrame with the corresponding data.
139
+
140
+ Parameters
141
+ ----------
142
+ period : TimestampPeriod or (tuple or list of datetime.datetime or None), optional
143
+ The minimum and maximum Timestamp for which to get the timeseries.
144
+ If None is given, the maximum or minimal possible Timestamp is taken.
145
+ The default is (None, None).
146
+ kinds : str or list of str
147
+ The data kind to look for filled period.
148
+ Must be a column in the timeseries DB.
149
+ Must be one of "raw", "qc", "filled", "adj", "filled_by", "best"("corr" for N and "filled" for T and ET).
150
+ If "best" is given, then depending on the parameter of the station the best kind is selected.
151
+ For Precipitation this is "corr" and for the other this is "filled".
152
+ For the precipitation also "qn" and "corr" are valid.
153
+ agg_to : str, optional
154
+ To what aggregation level should the timeseries get aggregated to.
155
+ The minimum aggregation for Temperatur and ET is daily and for the precipitation it is 10 minutes.
156
+ If a smaller aggregation is selected the minimum possible aggregation for the respective parameter is returned.
157
+ So if 10 minutes is selected, than precipitation is returned in 10 minuets and T and ET as daily.
158
+ The default is "10 min".
159
+ nas_allowed : bool, optional
160
+ Should NAs be allowed?
161
+ If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
162
+ If False, then the minimal filled period is returned.
163
+ The default is True.
164
+ paras : list of str or str, optional
165
+ Give the parameters for which to get the meta information.
166
+ Can be "n", "t", "et" or "all".
167
+ If "all", then every available station parameter is returned.
168
+ The default is "all"
169
+ add_na_share : bool, optional
170
+ Should one or several columns be added to the Dataframe with the share of NAs in the data.
171
+ This is especially important, when the stations data get aggregated, because the aggregation doesn't make sense if there are a lot of NAs in the original data.
172
+ If True, one column per asked kind is added with the respective share of NAs, if the aggregation step is not the smallest.
173
+ The "kind"_na_share column is in percentage.
174
+ The default is False.
175
+ add_t_min : bool, optional
176
+ Should the minimal temperature value get added?
177
+ The default is False.
178
+ add_t_max : bool, optional
179
+ Should the maximal temperature value get added?
180
+ The default is False.
181
+
182
+ Returns
183
+ -------
184
+ pd.Dataframe
185
+ A DataFrame with the timeseries for this station and the given period.
186
+ """
187
+ paras = self._check_paras(paras)
188
+
189
+ # download dataframes
190
+ dfs = []
191
+ for stat in self.station_parts:
192
+ if stat._para in paras:
193
+ # check if min and max for temperature should get added
194
+ use_kinds = kinds.copy()
195
+ if stat._para == "t":
196
+ if isinstance(use_kinds, str):
197
+ use_kinds=[use_kinds]
198
+ if "best" in use_kinds:
199
+ use_kinds.insert(use_kinds.index("best"), "filled")
200
+ use_kinds.remove("best")
201
+ for k in ["raw", "filled"]:
202
+ if k in use_kinds:
203
+ if add_t_max:
204
+ use_kinds.insert(
205
+ use_kinds.index(k)+1,
206
+ f"{k}_max")
207
+ if add_t_min:
208
+ use_kinds.insert(
209
+ use_kinds.index(k)+1,
210
+ f"{k}_min")
211
+
212
+ # get the data from station object
213
+ df = stat.get_df(
214
+ period=period,
215
+ kinds=use_kinds,
216
+ agg_to=agg_to,
217
+ nas_allowed=nas_allowed,
218
+ add_na_share=add_na_share,
219
+ **kwargs)
220
+ df = df.rename(dict(zip(
221
+ df.columns,
222
+ [stat._para.upper() + "_" + col for col in df.columns])),
223
+ axis=1)
224
+ dfs.append(df)
225
+
226
+ # concat the dfs
227
+ if len(dfs) > 1:
228
+ df_all = pd.concat(dfs, axis=1)
229
+ elif len(dfs) == 1 :
230
+ df_all = dfs[0]
231
+ else:
232
+ raise ValueError("No timeserie was found for {paras} and Station {stid}".format(
233
+ paras=", ".join(paras),
234
+ stid=self.id))
235
+
236
+ return df_all
237
+
238
+ @classmethod
239
+ def get_meta_explanation(cls, infos="all"):
240
+ """Get the explanations of the available meta fields.
241
+
242
+ Parameters
243
+ ----------
244
+ infos : list or string, optional
245
+ The infos you wish to get an explanation for.
246
+ If "all" then all the available information get returned.
247
+ The default is "all"
248
+
249
+ Returns
250
+ -------
251
+ pd.Series
252
+ a pandas Series with the information names as index and the explanation as values.
253
+ """
254
+ return StationBase.get_meta_explanation(infos=infos)
255
+
256
+ def get_max_period(self, kinds, nas_allowed=False):
257
+ """Get the maximum available period for this stations timeseries.
258
+
259
+ If nas_allowed is True, then the maximum range of the timeserie is returned.
260
+ Else the minimal filled period is returned
261
+
262
+ Parameters
263
+ ----------
264
+ kinds : str or list of str
265
+ The data kinds to update.
266
+ Must be a column in the timeseries DB.
267
+ Must be one of "raw", "qc", "filled", "adj".
268
+ For the precipitation also "qn" and "corr" are valid.
269
+ nas_allowed : bool, optional
270
+ Should NAs be allowed?
271
+ If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
272
+ If False, then the minimal filled period is returned.
273
+ The default is False.
274
+
275
+ Returns
276
+ -------
277
+ utils.TimestampPeriod
278
+ The maximum Timestamp Period
279
+ """
280
+ kinds = self._check_kinds(kinds)
281
+ max_period = None
282
+ for stat in self.station_parts:
283
+ max_period_i = stat.get_max_period(
284
+ kinds=kinds, nas_allowed=nas_allowed)
285
+ if max_period is None:
286
+ max_period = max_period_i
287
+ else:
288
+ max_period = max_period.union(
289
+ max_period_i,
290
+ how="outer" if nas_allowed else "inner")
291
+
292
+ return max_period
293
+
294
+ def get_meta(self, paras="all", **kwargs):
295
+ """Get the meta information for every parameter of this station.
296
+
297
+ Parameters
298
+ ----------
299
+ paras : list of str or str, optional
300
+ Give the parameters for which to get the meta information.
301
+ Can be "n", "t", "et" or "all".
302
+ If "all", then every available station parameter is returned.
303
+ The default is "all"
304
+ **kwargs : dict, optional
305
+ The optional keyword arguments are handed to the single Station get_meta methods. Can be e.g. "info".
306
+
307
+ Returns
308
+ -------
309
+ dict
310
+ dict with the information.
311
+ there is one subdict per parameter.
312
+ If only one parameter is asked for, then there is no subdict, but only a single value.
313
+ """
314
+ paras = self._check_paras(paras)
315
+
316
+ for stat in self.station_parts:
317
+ if stat._para in paras:
318
+ meta_para = stat.get_meta(**kwargs)
319
+ if "meta_all" not in locals():
320
+ meta_all = {stat._para:meta_para}
321
+ else:
322
+ meta_all.update({stat._para:meta_para})
323
+ return meta_all
324
+
325
+ def get_geom(self, crs=None):
326
+ """Get the point geometry of the station.
327
+
328
+ Parameters
329
+ ----------
330
+ crs: str, int or None, optional
331
+ The coordinate reference system of the geometry.
332
+ If None, then the geometry is returned in WGS84 (EPSG:4326).
333
+ If string, then it should be in a pyproj readable format.
334
+ If int, then it should be the EPSG code.
335
+ The default is None.
336
+
337
+ Returns
338
+ -------
339
+ shapely.geometries.Point
340
+ The location of the station as shapely Point in the given coordinate reference system.
341
+ """
342
+ return self.station_parts[0].get_geom(crs=crs)
343
+
344
+ def get_name(self):
345
+ return self.station_parts[0].get_name()
346
+
347
+ def create_roger_ts(self, dir, period=(None, None),
348
+ kind="best", r_r0=1, add_t_min=False, add_t_max=False,
349
+ do_toolbox_format=False,
350
+ **kwargs):
351
+ """Create the timeserie files for roger as csv.
352
+
353
+ This is only a wrapper function for create_ts with some standard settings.
354
+
355
+ Parameters
356
+ ----------
357
+ dir : pathlib like object or zipfile.ZipFile
358
+ The directory or Zipfile to store the timeseries in.
359
+ If a zipfile is given a folder with the statiopns ID is added to the filepath.
360
+ period : TimestampPeriod like object, optional
361
+ The period for which to get the timeseries.
362
+ If (None, None) is entered, then the maximal possible period is computed.
363
+ The default is (None, None)
364
+ kind : str
365
+ The data kind to look for filled period.
366
+ Must be a column in the timeseries DB.
367
+ Must be one of "raw", "qc", "filled", "adj".
368
+ If "best" is given, then depending on the parameter of the station the best kind is selected.
369
+ For Precipitation this is "corr" and for the other this is "filled".
370
+ For the precipitation also "qn" and "corr" are valid.
371
+ r_r0 : int or float, list of int or float or None, optional
372
+ Should the ET timeserie contain a column with R/R0.
373
+ If None, then no column is added.
374
+ If int or float, then a R/R0 column is appended with this number as standard value.
375
+ If list of int or floats, then the list should have the same length as the ET-timeserie and is appanded to the Timeserie.
376
+ If pd.Series, then the index should be a timestamp index. The serie is then joined to the ET timeserie.
377
+ The default is 1.
378
+ add_t_min=False : bool, optional
379
+ Schould the minimal temperature value get added?
380
+ The default is False.
381
+ add_t_max=False : bool, optional
382
+ Schould the maximal temperature value get added?
383
+ The default is False.
384
+ do_toolbox_format : bool, optional
385
+ Should the timeseries be saved in the RoGeR toolbox format? (have a look at the RoGeR examples in https://github.com/Hydrology-IFH/roger)
386
+ The default is False.
387
+ **kwargs:
388
+ additional parameters for Station.get_df
389
+
390
+ Raises
391
+ ------
392
+ Warning
393
+ If there are NAs in the timeseries or the period got changed.
394
+ """
395
+ if do_toolbox_format:
396
+ return self.create_ts(
397
+ dir=dir, period=period, kinds=kind,
398
+ agg_to="10 min", r_r0=r_r0, split_date=True,
399
+ nas_allowed=False,
400
+ add_t_min=add_t_min, add_t_max=add_t_max,
401
+ file_names={"N":"PREC.txt", "T":"TA.txt", "ET":"PET.txt"},
402
+ col_names={"N":"PREC", "ET":"PET",
403
+ "T":"TA", "T_min":"TA_min", "T_max":"TA_max",
404
+ "Jahr":"YYYY", "Monat":"MM", "Tag":"DD",
405
+ "Stunde":"hh", "Minute":"mm"},
406
+ add_meta=False,
407
+ keep_date_parts=True,
408
+ **kwargs)
409
+ else:
410
+ return self.create_ts(
411
+ dir=dir, period=period, kinds=kind,
412
+ agg_to="10 min", r_r0=r_r0, split_date=True,
413
+ nas_allowed=False,
414
+ add_t_min=add_t_min, add_t_max=add_t_max,
415
+ **kwargs)
416
+
417
+ def create_ts(self, dir, period=(None, None),
418
+ kinds="best", paras="all",
419
+ agg_to="10 min", r_r0=None, split_date=False,
420
+ nas_allowed=True, add_na_share=False,
421
+ add_t_min=False, add_t_max=False,
422
+ add_meta=True, file_names={}, col_names={},
423
+ keep_date_parts=False,
424
+ **kwargs):
425
+ """Create the timeserie files as csv.
426
+
427
+ Parameters
428
+ ----------
429
+ dir : pathlib like object or zipfile.ZipFile
430
+ The directory or Zipfile to store the timeseries in.
431
+ If a zipfile is given a folder with the statiopns ID is added to the filepath.
432
+ period : TimestampPeriod like object, optional
433
+ The period for which to get the timeseries.
434
+ If (None, None) is entered, then the maximal possible period is computed.
435
+ The default is (None, None)
436
+ kinds : str or list of str
437
+ The data kinds to look for filled period.
438
+ Must be a column in the timeseries DB.
439
+ Must be one of "raw", "qc", "filled", "adj", "filled_by", "filled_share", "best".
440
+ If "best" is given, then depending on the parameter of the station the best kind is selected.
441
+ For precipitation this is "corr" and for the other this is "filled".
442
+ For the precipitation also "qn" and "corr" are valid.
443
+ If only one kind is asked for, then the columns get renamed to only have the parameter name as column name.
444
+ paras : list of str or str, optional
445
+ Give the parameters for which to get the meta information.
446
+ Can be "n", "t", "et" or "all".
447
+ If "all", then every available station parameter is returned.
448
+ The default is "all"
449
+ agg_to : str, optional
450
+ To what aggregation level should the timeseries get aggregated to.
451
+ The minimum aggregation for Temperatur and ET is daily and for the precipitation it is 10 minutes.
452
+ If a smaller aggregation is selected the minimum possible aggregation for the respective parameter is returned.
453
+ So if 10 minutes is selected, than precipitation is returned in 10 minuets and T and ET as daily.
454
+ The default is "10 min".
455
+ r_r0 : int or float or None or pd.Series or list, optional
456
+ Should the ET timeserie contain a column with R/R0.
457
+ If None, then no column is added.
458
+ If int, then a R/R0 column is appended with this number as standard value.
459
+ If list of int or floats, then the list should have the same length as the ET-timeserie and is appanded to the Timeserie.
460
+ If pd.Series, then the index should be a timestamp index. The serie is then joined to the ET timeserie.
461
+ The default is None.
462
+ split_date : bool, optional
463
+ Should the timestamp get splitted into parts, so one column for year, one for month etc.?
464
+ If False the timestamp is saved in one column as string.
465
+ nas_allowed : bool, optional
466
+ Should NAs be allowed?
467
+ If True, then the maximum possible period is returned, even if there are NAs in the timeserie.
468
+ If False, then the minimal filled period is returned.
469
+ The default is True.
470
+ add_na_share : bool, optional
471
+ Should one or several columns be added to the Dataframe with the share of NAs in the data.
472
+ This is especially important, when the stations data get aggregated, because the aggregation doesn't make sense if there are a lot of NAs in the original data.
473
+ If True, one column per asked kind is added with the respective share of NAs, if the aggregation step is not the smallest.
474
+ The "kind"_na_share column is in percentage.
475
+ The default is False.
476
+ add_t_min=False : bool, optional
477
+ Should the minimal temperature value get added?
478
+ The default is False.
479
+ add_t_max=False : bool, optional
480
+ Should the maximal temperature value get added?
481
+ The default is False.
482
+ add_meta : bool, optional
483
+ Should station Meta information like name and Location (lat, long) be added to the file?
484
+ The default is True.
485
+ file_names : dict, optional
486
+ A dictionary with the file names for the different parameters.
487
+ e.g.{"N":"PREC.txt", "T":"TA.txt", "ET":"ET.txt"}
488
+ If an empty dictionary is given, then the standard names are used.
489
+ The default is {}.
490
+ col_names : dict, optional
491
+ A dictionary with the column names for the different parameters.
492
+ e.g.{"N":"PREC", "T":"TA", "ET":"ET", "Jahr":"YYYY", "Monat":"MM", "Tag":"DD", "Stunde":"HH", "Minute":"MN"}
493
+ If an empty dictionary is given, then the standard names are used.
494
+ The default is {}.
495
+ keep_date_parts : bool, optional
496
+ only used if split_date is True.
497
+ Should the date parts that are not needed, e.g. hour value for daily timeseries, be kept?
498
+ If False, then the columns that are not needed are dropped.
499
+ The default is False.
500
+ **kwargs:
501
+ additional parameters for Station.get_df
502
+
503
+ Raises
504
+ ------
505
+ Warning
506
+ If there are NAs in the timeseries and nas_allowed is False
507
+ or the period got changed.
508
+ """
509
+ # check directory
510
+ dir = self._check_dir(dir)
511
+
512
+ # type cast kinds
513
+ kinds = self._check_kinds(kinds)
514
+ paras = self._check_paras(paras)
515
+
516
+ # get the period
517
+ if not ("_skip_period_check" in kwargs and kwargs["_skip_period_check"]):
518
+ period = TimestampPeriod._check_period(period).expand_to_timestamp()
519
+ period_filled = self.get_filled_period(
520
+ kinds=kinds,
521
+ join_how="outer" if nas_allowed else "inner")
522
+
523
+ if period.is_empty():
524
+ period = period_filled
525
+ else:
526
+ period_new = period_filled.union(
527
+ period,
528
+ how="inner")
529
+ if period_new != period:
530
+ warnings.warn(
531
+ f"The Period for Station {self.id} got changed from {str(period)} to {str(period_new)}.")
532
+ period = period_new
533
+ if "_skip_period_check" in kwargs:
534
+ del kwargs["_skip_period_check"]
535
+
536
+ # prepare loop
537
+ name_suffix = "_{stid:0>5}.txt".format(stid=self.id)
538
+ x, y = self.get_geom().coords.xy
539
+ name = self.get_name() + " (ID: {stid})".format(stid=self.id)
540
+ do_zip = isinstance(dir, zipfile.ZipFile)
541
+
542
+ for para in paras:
543
+ # get the timeserie
544
+ df = self.get_df(
545
+ period=period, kinds=kinds,
546
+ paras=[para], agg_to=agg_to,
547
+ nas_allowed=nas_allowed,
548
+ add_na_share=add_na_share,
549
+ add_t_min=add_t_min, add_t_max=add_t_max,
550
+ _skip_period_check=True,
551
+ **kwargs)
552
+
553
+ # rename columns
554
+ if len(kinds)==1 or ("filled_by" in kinds and len(kinds)==2):
555
+ if len(kinds)==1:
556
+ colname_base = [col for col in df.columns if len(col.split("_"))==2][0]
557
+ else:
558
+ colname_base = f"{para.upper()}_" + kinds[1-(kinds.index("filled_by"))]
559
+ df.rename(
560
+ {colname_base: para.upper(),
561
+ f"{colname_base}_min": f"{para.upper()}_min",
562
+ f"{colname_base}_max": f"{para.upper()}_max",},
563
+ axis=1, inplace=True)
564
+ else:
565
+ df.rename(
566
+ dict(zip(df.columns,
567
+ [col.replace(f"{para}_", f"{para.upper()}_")
568
+ for col in df.columns])),
569
+ axis=1, inplace=True)
570
+
571
+ # check for NAs
572
+ filled_cols = [col for col in df.columns if "filled_by" in col]
573
+ if not nas_allowed and df.drop(filled_cols, axis=1).isna().sum().sum() > 0:
574
+ warnings.warn("There were NAs in the timeserie for Station {stid}.".format(
575
+ stid=self.id))
576
+
577
+ # special operations for et
578
+ if para == "et" and r_r0 is not None:
579
+ if isinstance(r_r0, int) or isinstance(r_r0, float):
580
+ df = df.join(
581
+ pd.Series([r_r0]*len(df), name="R/R0", index=df.index))
582
+ elif isinstance(r_r0, pd.Series):
583
+ df = df.join(r_r0.rename("R_R0"))
584
+ elif isinstance(r_r0, list):
585
+ df = df.join(
586
+ pd.Series(r_r0, name="R/R0", index=df.index))
587
+
588
+ # create tables
589
+ if split_date:
590
+ n_parts = 5 if keep_date_parts else AGG_TO[agg_to]["split"][para]
591
+ df = self._split_date(df.index)\
592
+ .iloc[:, 0:n_parts]\
593
+ .join(df)
594
+ else:
595
+ df.reset_index(inplace=True)
596
+
597
+ # rename columns if user asked for
598
+ df.rename(col_names, axis=1, inplace=True)
599
+
600
+ # create header
601
+ if add_meta:
602
+ header = f"Name: {name}{"\t" * (len(df.columns)-1)}\n" +\
603
+ f"Lat: {y[0]} ,Lon: {x[0]}{"\t" * (len(df.columns)-1)}\n"
604
+ else:
605
+ header = ""
606
+
607
+ # get file name
608
+ if para.upper() in file_names:
609
+ file_name = file_names[para.upper()]
610
+ elif para in file_names:
611
+ file_name = file_names[para]
612
+ else:
613
+ file_name = para.upper() + name_suffix
614
+
615
+ # write table out
616
+ if version.parse(pd.__version__) > version.parse("1.5.0"):
617
+ to_csv_kwargs = dict(lineterminator="\n")
618
+ else:
619
+ to_csv_kwargs = dict(line_terminator="\n")
620
+ str_df = header + df.to_csv(
621
+ sep="\t", decimal=".", index=False, **to_csv_kwargs)
622
+
623
+ if do_zip:
624
+ dir.writestr(f"{self.id}/{file_name}", str_df)
625
+ else:
626
+ with open(dir.joinpath(file_name), "w") as f:
627
+ f.write(str_df)
628
+
629
+ @staticmethod
630
+ def _check_dir(dir):
631
+ """Checks if a directors is valid and empty.
632
+
633
+ If not existing the directory is created.
634
+
635
+ Parameters
636
+ ----------
637
+ dir : pathlib object or zipfile.ZipFile
638
+ The directory to check.
639
+
640
+ Raises
641
+ ------
642
+ ValueError
643
+ If the directory is not empty.
644
+ ValueError
645
+ If the directory is not valid. E.G. it is a file path.
646
+ """
647
+ # check types
648
+ if isinstance(dir, str):
649
+ dir = Path(dir)
650
+
651
+ # check directory
652
+ if isinstance(dir, Path):
653
+ if dir.is_dir():
654
+ if len(list(dir.iterdir())) > 0:
655
+ raise ValueError(
656
+ "The given directory '{dir}' is not empty.".format(
657
+ dir=str(dir)))
658
+ elif dir.suffix == "":
659
+ dir.mkdir()
660
+ else:
661
+ raise ValueError(
662
+ "The given directory '{dir}' is not a directory.".format(
663
+ dir=dir))
664
+ elif not isinstance(dir, zipfile.ZipFile):
665
+ raise ValueError(
666
+ "The given directory '{dir}' is not a directory or zipfile.".format(
667
+ dir=dir))
668
+
669
+ return dir
670
+
671
+ @staticmethod
672
+ def _split_date(dates):
673
+ """
674
+ Split datetime into parts.
675
+
676
+ Parameters
677
+ ----------
678
+ dates : pandas.DatetimeIndex or list of (datetime.dateime or pandas.Timestamp) or
679
+ pandas.DataFrame of (datetime.datetime or pandas.Timestamp)
680
+ The datetime's to split.
681
+
682
+ Returns
683
+ -------
684
+ pandas.DataFrame
685
+ A DataFrame with 5 columns (Jahr, Monat, Tag, Stunde, Minute).
686
+ """
687
+ # if dates is not a list make it a list
688
+ if isinstance(dates, datetime) or isinstance(dates, pd.Timestamp):
689
+ dates = pd.DatetimeIndex([dates])
690
+ index = range(0, len(dates))
691
+
692
+ elif isinstance(dates, pd.DatetimeIndex):
693
+ index = dates
694
+ else:
695
+ index = range(0, len(dates))
696
+
697
+ # check if date is datetime or Timestamp:
698
+ if not (isinstance(dates[0], pd.Timestamp) or
699
+ isinstance(dates[0], datetime)):
700
+ raise ValueError("Error: The given date is not in a datetime or " +
701
+ "Timestamp format.")
702
+
703
+ return pd.DataFrame(
704
+ {"Jahr": dates.year,
705
+ "Monat": dates.month,
706
+ "Tag": dates.day,
707
+ "Stunde": dates.hour,
708
+ "Minute": dates.minute},
709
+ dtype=int,
710
+ index=index)