wolfhece 2.2.37__py3-none-any.whl → 2.2.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2095 @@
1
+ import numpy as np
2
+ import csv
3
+ import time as time_mod
4
+ import sys # module to stop the program when an error is encountered
5
+ import json # mudule to use json file
6
+ import pandas as pd # module to write data in Excel file
7
+ from datetime import datetime as dt # module which contains objects treating dates
8
+ from datetime import timezone as tz # module which contains objects treating time zones
9
+ from datetime import timedelta as td # module which contains objects treating time deltas
10
+ import matplotlib.pyplot as plt
11
+ from dbfread import DBF # module to treat DBF files
12
+
13
+ from pathlib import Path
14
+ import logging
15
+ from tqdm import tqdm
16
+ import matplotlib.pyplot as plt
17
+
18
+ from functools import cached_property
19
+
20
+ import geopandas as gpd
21
+ from shapely.geometry import Point, Polygon
22
+ import seaborn as sns
23
+
24
+ from .constant import source_point_measurements, source_IRM, source_dist
25
+ from .read import check_path
26
+ from ..PyVertexvectors import Zones, zone, vector, wolfvertex
27
+ from ..PyTranslate import _
28
+ from .climate_data import read_all_data, read_historical_year_month, find_first_available_year_month, find_last_available_year_month, read_pixel_positions, convert_pixels_to_squares, read_between
29
+
30
+ class ClimateData_IRM:
31
+ """
32
+ Class to handle IRM climate data.
33
+
34
+ Data are available for academic purposes only on https://opendata.meteo.be/. Please read carefully the terms of use before using this data.
35
+ Without valid data, the class will not work.
36
+ """
37
+
38
+ def __init__(self, workingDir: str | Path = Path(r'P:\Donnees\Pluies\IRM\climateGrid')):
39
+ """
40
+ Initialize the ClimateData_IRM class.
41
+
42
+ :param workingDir: The directory where the IRM data is stored.
43
+ """
44
+
45
+ self.workingDir = Path(workingDir)
46
+ self._data:pd.DataFrame = None
47
+
48
+ self._grid: Zones = None
49
+ self._kdtree = None
50
+
51
+ # Check if the working directory exists
52
+ if not self.workingDir.exists():
53
+ logging.error(_("The working directory {} does not exist.").format(self.workingDir))
54
+ return
55
+
56
+ self._start_time = find_first_available_year_month(self.workingDir)
57
+ self._end_time = find_last_available_year_month(self.workingDir)
58
+
59
+ @property
60
+ def variables(self) -> list[str]:
61
+ """
62
+ Return the list of variables available in the IRM data.
63
+
64
+ Available variables should be :
65
+ - day
66
+ - temp_max
67
+ - temp_min
68
+ - temp_avg
69
+ - precip_quantity
70
+ - humidity_relative
71
+ - pressure
72
+ - sun_duration
73
+ - short_wave_from_sky
74
+ - evapotrans_ref
75
+
76
+ Values are from +0800 of the day, to +0800 of the next day.
77
+ """
78
+ if self._data is not None:
79
+ return self._data.columns.tolist()
80
+ else:
81
+ return []
82
+
83
+ @property
84
+ def data(self) -> pd.DataFrame:
85
+ """
86
+ Return the data as a pandas DataFrame.
87
+ If the data is not read yet, return None.
88
+ """
89
+ if self._data is None:
90
+ logging.error(_("Data not read yet. Please call read_data() first."))
91
+ return None
92
+
93
+ return self._data
94
+
95
+ def __str__(self):
96
+ ret = _("IRM Climate Data\n")
97
+ ret += _(" - Working Directory: {}\n").format(self.workingDir)
98
+ ret += _(" - Data from {} to {}\n").format(self._start_time, self._end_time)
99
+ ret += _(" - Available Variables: {}\n").format(", ".join(self.variables))
100
+ return ret
101
+
102
+ def read_data(self, all:bool = True, start_yearmonth:tuple[int, int] = None, end_yearmonth:tuple[int, int] = None):
103
+ """ Read the IRM data from the working directory. """
104
+
105
+ if all and (start_yearmonth is not None or end_yearmonth is not None):
106
+ logging.warning(_("You cannot specify start and end year/month when reading all data. Ignoring the specified dates."))
107
+
108
+ if all:
109
+ # Read all data from the working directory
110
+ self._data = read_all_data(self.workingDir)
111
+ else:
112
+ if start_yearmonth is None or end_yearmonth is None:
113
+ logging.error(_("You must provide start and end year/month to read data."))
114
+ return
115
+
116
+ if start_yearmonth is None:
117
+ start_yearmonth = self._start_time
118
+ if end_yearmonth is None:
119
+ end_yearmonth = self._end_time
120
+
121
+ # Read data for the specified year/month
122
+ self._data = read_between(self.workingDir, start_yearmonth[0], start_yearmonth[1], end_yearmonth[0], end_yearmonth[1])
123
+
124
+ def _create_zones(self):
125
+ """ Create zones for the pixels and their squares. """
126
+
127
+ self.pixels_id, self._xy = read_pixel_positions(self.workingDir)
128
+
129
+ if self.pixels_id is None or self._xy is None:
130
+ logging.error(_("No pixel positions found in the working directory {}.").format(self.workingDir))
131
+ return
132
+
133
+ squares, self._kdtree = convert_pixels_to_squares(self._xy)
134
+
135
+ self._grid = Zones(idx = 'climategrid')
136
+ pixel_zone = zone(name = 'pixels_footprint')
137
+ pixel_center = zone(name = 'pixels_center')
138
+
139
+ self._grid.add_zone(pixel_zone, forceparent = True)
140
+ self._grid.add_zone(pixel_center, forceparent = True)
141
+
142
+ for i, xy in enumerate(self._xy):
143
+ x, y = xy
144
+ # create a vertex for the pixel center
145
+ vec = vector(name = "Pixel_{}".format(i + 1))
146
+ vec.add_vertex(wolfvertex(x-50, y-50))
147
+ vec.add_vertex(wolfvertex(x+50, y-50))
148
+ vec.add_vertex(wolfvertex(x+50, y+50))
149
+ vec.add_vertex(wolfvertex(x-50, y+50))
150
+ vec.force_to_close()
151
+ vec._simplified_geometry = True
152
+ vec.myprop.color = (0, 0, 255) # blue color for the pixel center
153
+ vec.myprop.width = 2
154
+ pixel_center.add_vector(vec, forceparent= True)
155
+
156
+ # create square vectors
157
+ for i, square in enumerate(squares):
158
+ (x1, y1), (x2, y2), (x3, y3), (x4, y4) = square
159
+ vec = vector(name = "Pixel_{}".format(i + 1))
160
+ vec.add_vertex(wolfvertex(x1, y1))
161
+ vec.add_vertex(wolfvertex(x2, y2))
162
+ vec.add_vertex(wolfvertex(x3, y3))
163
+ vec.add_vertex(wolfvertex(x4, y4))
164
+ vec.force_to_close()
165
+
166
+ vec._simplified_geometry = True
167
+ vec.myprop.legendtext = str(i + 1) # set the legend text to the pixel id
168
+ vec.set_legend_position((x1 + x3)/2., (y1 + y3)/2.)
169
+ vec.myprop.legendvisible = True
170
+
171
+ vec.myprop.color = (255, 0, 0)
172
+ pixel_zone.add_vector(vec, forceparent= True)
173
+
174
+ def plot(self, figax=None, title:str = None, with_ids:bool = False):
175
+ """ Plot the GRID for IRM data. """
176
+
177
+ if self._grid is None:
178
+ self._create_zones()
179
+
180
+ if figax is None:
181
+ fig, ax = plt.subplots()
182
+ else:
183
+ fig, ax = figax
184
+
185
+ self._grid['pixels_footprint'].set_legend_visible(with_ids)
186
+
187
+ if self._grid is not None:
188
+ self._grid.plot_matplotlib(ax=ax)
189
+
190
+ if title is None:
191
+ title = _("IRM Climate Data Grid")
192
+
193
+ ax.set_aspect('equal')
194
+ ax.set_title(title)
195
+ fig.tight_layout()
196
+
197
+ return fig, ax
198
+
199
+ def as_zones(self) -> Zones:
200
+ """
201
+ Return the grid as a Zones object.
202
+ If the zones are not created yet, create them.
203
+ """
204
+ if self._grid is None:
205
+ self._create_zones()
206
+ return self._grid
207
+
208
+ def get_attribute4date(self, date:dt, variable:str) -> pd.DataFrame:
209
+ """ Return the attribute for a specific date. """
210
+ if self._data is None:
211
+ logging.error(_("No data available. Please read the data first."))
212
+ return None
213
+
214
+ if variable not in self.variables:
215
+ logging.error(_("The variable {} is not available in the data.").format(variable))
216
+ return None
217
+
218
+ # Check if the date is in the data
219
+ if date not in self._data.day.unique():
220
+ logging.error(_("The date {} is not available in the data.").format(date))
221
+ return None
222
+
223
+ return self._data[self._data.day == date][variable]
224
+
225
+ def get_attribute4daterange(self, date_start:dt, date_end:dt, variable:str) -> pd.DataFrame:
226
+ """ Return the attribute for a specific date range. """
227
+
228
+ if self._data is None:
229
+ logging.error(_("No data available. Please read the data first."))
230
+ return None
231
+
232
+ if variable not in self.variables:
233
+ logging.error(_("The variable {} is not available in the data.").format(variable))
234
+ return None
235
+
236
+ # Check if the date range is valid
237
+ if date_start > date_end:
238
+ logging.error(_("The start date {} is after the end date {}.").format(date_start, date_end))
239
+ return None
240
+
241
+ return self._data[(self._data.day >= date_start) & (self._data.day <= date_end)][['day', variable]]
242
+
243
+ def plot_spatial_attribute(self, date:dt, variable:str, figax=None, title:str = None, cmap:str = 'viridis'):
244
+ """ Plot the spatial distribution of the attribute for a specific date. """
245
+
246
+ assert variable in self.variables, _("The variable {} is not available in the data.").format(variable)
247
+ assert date in self._data.day.unique(), _("The date {} is not available in the data.").format(date)
248
+
249
+ if self._data is None:
250
+ logging.error(_("No data available. Please read the data first."))
251
+ return None, None
252
+
253
+ if figax is None:
254
+ fig, ax = plt.subplots()
255
+ else:
256
+ fig, ax = figax
257
+
258
+ # Plot the data
259
+ footprints = self._grid['pixels_footprint']
260
+ footprints.add_values(variable, self.get_attribute4date(date, variable).to_numpy())
261
+
262
+ footprints.set_colors_from_value(variable, cmap=cmap)
263
+ footprints.set_filled(True)
264
+ footprints.plot_matplotlib(ax)
265
+
266
+ # footprints.set_filled(False)
267
+ # footprints.plot_matplotlib(ax)
268
+
269
+ if title is None:
270
+ title = _("Spatial distribution of {} at {}").format(variable, date.strftime('%Y-%m-%d'))
271
+
272
+ ax.set_title(title)
273
+
274
+ return fig, ax
275
+
276
+ def animation_spatial_attribute(self, variable:str, figax=None, date_start:dt = 0, date_end:dt = -1, cmap:str = 'viridis'):
277
+ """
278
+ Create an animation of the spatial distribution of a specific attribute over time.
279
+ The animation will show the attribute data for each time step.
280
+
281
+ :param figax: A tuple (fig, ax) to use for the animation. If None, a new figure and axes will be created.
282
+ :param date_start: The starting date for the animation.
283
+ :param date_end: The ending date for the animation. If -1, it will use the last date.
284
+ :param cmap: The colormap to use for the attribute data.
285
+ :param interval_days: The interval between frames in days.
286
+ :return: The animation object.
287
+ """
288
+
289
+ import matplotlib.animation as animation
290
+
291
+ if figax is None:
292
+ fig, ax = plt.subplots()
293
+ else:
294
+ fig, ax = figax
295
+
296
+ def update(frame):
297
+ ax.clear()
298
+ self.plot_spatial_attribute(frame, variable, figax=(fig, ax), title=None, cmap=cmap)
299
+
300
+ if date_end == -1:
301
+ date_end = self._data.day.max()
302
+ if date_start == 0:
303
+ date_start = self._data.day.min()
304
+
305
+ unique_dates = self._data.day.unique()
306
+ all_dates = [date for date in unique_dates if date_start <= date <= date_end]
307
+
308
+ ani = animation.FuncAnimation(fig, update, frames=all_dates, interval=100)
309
+
310
+ return ani
311
+
312
+ def find_pixelid_from_X_Y(self, x:float, y:float) -> int:
313
+ """
314
+ Find the pixel id from the X and Y coordinates.
315
+ :param x: The X coordinate.
316
+ :param y: The Y coordinate.
317
+ :return: The pixel id or None if not found.
318
+ """
319
+ if self._kdtree is None:
320
+ self._create_zones()
321
+
322
+ if self._kdtree is None:
323
+ logging.error(_("No pixel positions found. Please read the data first."))
324
+ return None
325
+
326
+ return self._kdtree.query((x, y), k=1)[1] + 1
327
+
328
+ def plot_hyetogram(self, position:list[float, float] | tuple[float, float], date_start:dt = 0, date_end:dt = -1, figax = None):
329
+ """ Plot the hyetogram for a specific position over a date range.
330
+
331
+ :param position: The position (x, y) for which to plot the hyetogram.
332
+ :param date_start: The starting date for the hyetogram.
333
+ :param date_end: The ending date for the hyetogram. If -1, it will use the last date.
334
+ :param figax: A tuple (fig, ax) to use for the plot. If None, a new figure and axes will be created.
335
+ """
336
+
337
+ if figax is None:
338
+ fig, ax = plt.subplots()
339
+ else:
340
+ fig, ax = figax
341
+
342
+ if date_start == 0:
343
+ date_start = self._data.day.min()
344
+ if date_end == -1:
345
+ date_end = self._data.day.max()
346
+
347
+ pixel_id = self.find_pixelid_from_X_Y(position[0], position[1])
348
+
349
+ if pixel_id is None:
350
+ logging.error(_("No pixel found for position ({}, {}).").format(position[0], position[1]))
351
+ return
352
+
353
+
354
+ # Get the hyetogram data for the specified date range
355
+ hyetogram_data = self.get_attribute4daterange(date_start, date_end, variable='precip_quantity')
356
+ hyetogram_data = hyetogram_data[hyetogram_data.index == pixel_id]
357
+
358
+ # Plot the hyetogram
359
+ sns.barplot(data=hyetogram_data, x='day', y='precip_quantity',
360
+ ax=ax, color='blue', )
361
+ hyetogram_data.plot(x='day', y='precip_quantity', ax=ax, kind='bar', align='center', color='blue')
362
+ ax.set_xlabel(_("Date"))
363
+ ax.set_ylabel(_("Precipitation (mm)"))
364
+ ax.set_title(_("Hyetogram for position ({}, {})").format(position[0], position[1]))
365
+ ax.set_xticklabels(hyetogram_data['day'].dt.strftime('%Y-%m-%d'), rotation=45)
366
+ ax.set_ylim(bottom=0)
367
+ fig.tight_layout()
368
+
369
+ return fig, ax
370
+
371
+ class Rainfall_Gridded:
372
+ """
373
+ Class to handle gridded rainfall data.
374
+ It can read gridded rainfall data.
375
+
376
+ ATTENTION:
377
+ - The grid is provided as a shapefile in the "Grid" subdirectory of the working directory.
378
+ - The associated data are provided in the "data" or "IRM" subdirectory of the working directory.
379
+ - The link between each polygon and the data is done thorugh the index of the polygon in the shapefile.
380
+ BUT, as we come from Fortran-world, the index is supposed 1-based, not 0-based.
381
+ The increment of the index is done in the "PyVertexvectors" module and checked in the "_read_grid" routine.
382
+
383
+ """
384
+
385
+ def __init__(self, workingDir: str | Path, type_of_rain:int = source_IRM | source_dist):
386
+
387
+ self.workingDir = Path(workingDir)
388
+ self.type_of_rain = type_of_rain
389
+ self.is_binary = False
390
+
391
+ self._grid: Zones = None
392
+ self._data: dict[str | int, np.ndarray] = {}
393
+ self._times: list[dt] = []
394
+
395
+ assert self.type_of_rain in [source_IRM, source_dist], _("The type of rain is not supported. It should be either source_IRM or source_dist.")
396
+
397
+ # Test if "Grid" is a subdirectory of the working directory
398
+ self.gridDir = self.workingDir / "Grid"
399
+ if not self.gridDir.exists():
400
+ logging.warning(_("The directory {} does not exist.").format(self.gridDir))
401
+ logging.info(_("Trying to find the directory in the parent directory."))
402
+
403
+ # search the shapefiles in the parent directory
404
+ shps = self.workingDir.parent.rglob("*.shp")
405
+ shps = [x for x in shps if x.is_file() and x.suffix.lower() == '.shp']
406
+ if len(shps) > 0:
407
+ self.gridDir = shps[0].parent
408
+ logging.info(_("The directory {} has been found.").format(self.gridDir))
409
+
410
+ self.dataDir = self.workingDir / "data"
411
+ if not self.dataDir.exists():
412
+ logging.warning(_("The directory {} does not exist.").format(self.dataDir))
413
+ logging.info(_("Trying to find the directory containing rain data."))
414
+ # search the data directory in the parent directory
415
+ rains = self.workingDir.parent.rglob("*.rain")
416
+ dats = self.workingDir.parent.rglob("*.dat")
417
+
418
+ rains = [x for x in rains if x.is_file() and x.suffix.lower() == '.rain']
419
+ dats = [x for x in dats if x.is_file() and x.suffix.lower() == '.dat']
420
+ if len(rains) > 0:
421
+ self.dataDir = rains[0].parent
422
+ logging.info(_("The directory {} has been found.").format(self.dataDir))
423
+ self.is_binary = False
424
+ logging.info(_("The data are considered in ASCII format."))
425
+ if len(dats) > 0:
426
+ self.dataDir = dats[0].parent
427
+ logging.info(_("The directory {} has been found.").format(self.dataDir))
428
+ self.is_binary = True
429
+ logging.info(_("The data are considered in binary format."))
430
+
431
+ def as_zones(self) -> Zones:
432
+ """
433
+ Return the grid as a Zones object.
434
+ """
435
+ if self._grid is None:
436
+ self._read_grid()
437
+ return self._grid
438
+
439
+ @property
440
+ def time_steps(self) -> list[td]:
441
+ """
442
+ Return the time step between each data point.
443
+ """
444
+ if len(self._times) < 2:
445
+ return td(seconds=0)
446
+
447
+ # Calculate the time difference between the each consecutive times
448
+ return [self._times[i+1] - self._times[i] for i in range(len(self._times)-1)]
449
+
450
+ def has_uniform_time_step(self) -> bool:
451
+ """
452
+ Check if the time step is uniform.
453
+ """
454
+ if len(self._times) < 2:
455
+ return True
456
+
457
+ # Calculate the time difference between the each consecutive times
458
+ time_deltas = self.time_delta
459
+ return all(td == time_deltas[0] for td in time_deltas)
460
+
461
+ @property
462
+ def time_step(self) -> td:
463
+ """
464
+ Return the time step between the two first data.
465
+ """
466
+ if len(self._times) < 2:
467
+ return td(seconds=0)
468
+
469
+ # Return the first time delta if uniform
470
+ return self._times[1] - self._times[0]
471
+
472
+ def read(self):
473
+ """ Read grid and data from the working directory. """
474
+
475
+ try:
476
+ self._read_grid()
477
+ self._read_associated_data()
478
+ return True
479
+ except Exception as e:
480
+ logging.error(_("Error reading grid or data: {}").format(e))
481
+ return False
482
+
483
+ def _read_grid(self):
484
+ """
485
+ Read the grid data from the grid directory.
486
+ The grid data should be in a shapefile format.
487
+ """
488
+ if not self.gridDir.exists():
489
+ logging.error(_("The grid directory {} does not exist.").format(self.gridDir))
490
+ return None
491
+
492
+ # Read the shapefile
493
+ try:
494
+ shps = list(self.gridDir.glob("*.shp"))
495
+ if len(shps) == 0:
496
+ logging.error(_("No shapefile found in the directory {}.").format(self.gridDir))
497
+ return None
498
+ if len(shps) > 1:
499
+ logging.warning(_("Multiple shapefiles found in the directory {}.").format(self.gridDir))
500
+ logging.warning(_("Using the first shapefile found."))
501
+
502
+ self._grid = Zones(shps[0])
503
+ logging.info(_("Grid data read successfully from {}.").format(self.gridDir))
504
+
505
+ # check that the zone names are 1-based
506
+ no_error = True
507
+ for i, curzone in enumerate(self._grid.myzones):
508
+ if curzone.myname != str(i+1):
509
+ logging.error(_("The zone name {} is not 1-based.").format(curzone.myname))
510
+ logging.info(_("The zone name will be set to {}.").format(i+1))
511
+ no_error = False
512
+ curzone.myname = str(i+1) # set the zone name to 1-based index
513
+ if no_error:
514
+ logging.info(_("All zone names are 1-based."))
515
+
516
+ return self._grid
517
+
518
+ except Exception as e:
519
+ logging.error(_("Error reading grid data: {}").format(e))
520
+ return None
521
+
522
+ def _read_associated_data(self):
523
+ """ Read the date associeted to the grid. """
524
+
525
+ if not self.dataDir.exists():
526
+ logging.error(_("The data directory {} does not exist.").format(self.dataDir))
527
+ return None
528
+
529
+ # Read the data
530
+ try:
531
+ if self.is_binary:
532
+ files = list(self.dataDir.glob("*.dat"))
533
+ else:
534
+ files = list(self.dataDir.glob("*.rain"))
535
+
536
+ if len(files) == 0:
537
+ logging.error(_("No data file found in the directory {}.").format(self.dataDir))
538
+ return None
539
+
540
+ # we assume the same number of files that the number of grid cells
541
+ if len(files) != self._grid.nbzones:
542
+ logging.error(_("The number of data files ({}) does not match the number of grid cells ({}).").format(len(files), self._grid.nbzones))
543
+
544
+ if self.is_binary:
545
+ self._data = [self._read_rain_binary(file) for file in files]
546
+ else:
547
+ self._data = [self._read_rain_ascii(file) for file in files]
548
+
549
+ # Check if all times are the same
550
+ times = [data[2] for data in self._data]
551
+ if not all(t == times[0] for t in times):
552
+ logging.error(_("The times in the data files do not match."))
553
+ return None
554
+
555
+ self._times = times[0] # all times are the same, we can take the first one
556
+ # Convert data to dictionnary
557
+ self._data = {data[0]: data[1] for data in self._data}
558
+
559
+ # Check if keys are in the grid
560
+ for key in self._data.keys():
561
+ if key not in self._grid.zone_names:
562
+ logging.error(_("The key {} is not in the grid.").format(key))
563
+ return None
564
+
565
+ logging.info(_("Data read successfully from {}.").format(self.dataDir))
566
+ return self._data
567
+
568
+ except Exception as e:
569
+ logging.error(_("Error reading data: {}").format(e))
570
+ return None
571
+
572
+ def _read_rain_ascii(self, filename: str | Path) -> tuple[str | int, np.ndarray, list[dt]]:
573
+ """
574
+ Read data from an ASCII file.
575
+ The filename should end with .rain.
576
+
577
+ Structure of the ASCII file:
578
+ - 4 Header lines with :
579
+ - The first line is a name of the series.
580
+ - The second line is the number of data columns (n).
581
+ - The third line is the total number of columns (n + 6).
582
+ - The fourth line is the number of rows.
583
+ - Each line represents a time step.
584
+ - The first six columns are the day, month, year, hour, minute, and second.
585
+ - The last column is the rain value.
586
+
587
+ :param filename: The name of the file to read.
588
+
589
+ """
590
+
591
+ filename = Path(filename)
592
+
593
+ assert filename.suffix == '.rain', _("The file name must end with .rain")
594
+
595
+ with open(filename, 'r') as f:
596
+ lines = f.readlines()
597
+
598
+ # Read the header
599
+ name_serie = lines[0].strip()
600
+ ncols = int(lines[1].strip())
601
+ nrows = int(lines[3].strip())
602
+
603
+ data = np.zeros((nrows, ncols), dtype=np.float64)
604
+ times = []
605
+
606
+ # Read the data
607
+ for i in range(nrows):
608
+ line = lines[i + 4].strip().split('\t')
609
+ day, month, year, hour, minute, second, rain = line
610
+ times.append(dt(int(year), int(month), int(day), int(hour), int(minute), int(second), tzinfo=tz.utc))
611
+ data[i, 0] = float(rain)
612
+
613
+ # Convert to a 1D numpy array
614
+ data = data.flatten()
615
+
616
+ return name_serie, data, times
617
+
618
+ def _read_rain_binary(self, filename: str | Path) -> tuple[int, np.ndarray, list[dt]]:
619
+ """
620
+ Read data from a binary file.
621
+ The filename should end with .dat.
622
+
623
+ Structure of the binary file:
624
+ - 4 bytes for a "name" as integer
625
+ - 4 bytes for the number of data columns (n)
626
+ - 4 bytes for the total number of columns (n + 6)
627
+ - 4 bytes for the number of rows
628
+ - For each row:
629
+ - 1 byte for the day
630
+ - 1 byte for the month
631
+ - 2 bytes for the year
632
+ - 1 byte for the hour
633
+ - 1 byte for the minute
634
+ - 1 byte for the second
635
+ - n*8 bytes for the rain value as float
636
+
637
+ :param filename: The name of the file to read.
638
+ :return: A numpy array with the rain data.
639
+ """
640
+ import struct
641
+
642
+ filename = Path(filename)
643
+
644
+ assert filename.suffix == '.dat', _("The file name must end with .dat")
645
+
646
+ f = open(filename, 'rb')
647
+
648
+ # Read the header
649
+ name_serie = int.from_bytes(f.read(4), byteorder='little', signed=True)
650
+ ncols = int.from_bytes(f.read(4), byteorder='little', signed=True)
651
+ nrows = int.from_bytes(f.read(4), byteorder='little', signed=True)
652
+ n = int.from_bytes(f.read(4), byteorder='little', signed=True)
653
+
654
+ data = np.zeros((nrows, n), dtype=np.float64)
655
+
656
+ # Create a datetime array
657
+ times = []
658
+
659
+ # Read the data
660
+ for i in range(nrows):
661
+ day = int.from_bytes(f.read(1), byteorder='little', signed=True)
662
+ month = int.from_bytes(f.read(1), byteorder='little', signed=True)
663
+ year = int.from_bytes(f.read(2), byteorder='little', signed=True)
664
+ hour = int.from_bytes(f.read(1), byteorder='little', signed=True)
665
+ minute= int.from_bytes(f.read(1), byteorder='little', signed=True)
666
+ second= int.from_bytes(f.read(1), byteorder='little', signed=True)
667
+
668
+ times.append(dt(year, month, day, hour, minute, second, tzinfo=tz.utc))
669
+
670
+ # Read n floats
671
+ values = f.read(n * 8)
672
+ if len(values) != n * 8:
673
+ raise ValueError(_("The number of values read does not match the expected number."))
674
+ values = struct.unpack('<d' * n, values)
675
+ data[i, :] = values
676
+ f.close()
677
+
678
+ # Convert to a 1D numpy array
679
+ data = data.flatten()
680
+
681
+ return name_serie, data, times
682
+
683
+
684
+ def __getitem__(self, item) -> tuple[list[dt], np.ndarray, vector]:
685
+ """
686
+ Get the data for a given item.
687
+ The item should be the name of the zone or the id of the zone.
688
+ """
689
+ if isinstance(item, str) or isinstance(item, int):
690
+ try:
691
+ return self._times, self._data[str(item)], self._grid[item].myvectors[0]
692
+ except KeyError:
693
+ logging.error(_("The item {} is not in the data.").format(item))
694
+ return None, None, None
695
+ else:
696
+ raise ValueError(_("The item must be a string or an integer."))
697
+
698
+ def get_rain4index(self, index:int) -> dict[str | int, float]:
699
+ """
700
+ Get the rain data for a given index.
701
+ The index should be an integer representing the time step position.
702
+
703
+ :param index: The index time for which to get the rain data (0-based).
704
+ :return: A dictionary with the zone name as key and the rain value as value.
705
+ """
706
+ if not isinstance(index, int):
707
+ raise ValueError(_("The index must be an integer."))
708
+
709
+ if index < 0 or index >= len(self._times):
710
+ raise ValueError(_("Index out of range."))
711
+
712
+ rains = {}
713
+ for zone_name, rain_values in self._data.items():
714
+ rains[zone_name] = rain_values[index]
715
+
716
+ return rains
717
+
718
+ def plot_spatial_rain4index(self, index:int, figax=None, title:str = None, cmap:str = 'viridis'):
719
+ """ Plot the spatial distribution of rain for a given index. """
720
+
721
+ if not isinstance(index, int):
722
+ raise ValueError(_("The index must be an integer."))
723
+
724
+ if index < 0 or index >= len(self._times):
725
+ raise ValueError(_("The index is out of range."))
726
+
727
+ if figax is None:
728
+ fig, ax = plt.subplots()
729
+ else:
730
+ fig, ax = figax
731
+
732
+ # Plot the data
733
+ self._grid.add_values('rain', self.get_rain4index(index))
734
+
735
+ self._grid.set_colors_from_value('rain', cmap=cmap)
736
+ self._grid.set_filled(True)
737
+ self._grid.plot_matplotlib(ax)
738
+
739
+ self._grid.set_filled(False)
740
+ self._grid.plot_matplotlib(ax)
741
+
742
+ if title is None:
743
+ date = self._times[index]
744
+ title = _("Spatial distribution of rain at {}").format(date.strftime('%Y-%m-%d %H:%M:%S'))
745
+
746
+ ax.set_title(title)
747
+
748
+ return fig, ax
749
+
750
+ def animation_spatial_rain_index(self, figax=None, idx_start:int = 0, idx_end:int = -1, cmap:str = 'viridis', interval:int = 100):
751
+ """
752
+ Create an animation of the spatial distribution of rain over time.
753
+ The animation will show the rain data for each time step.
754
+
755
+ :param figax: A tuple (fig, ax) to use for the animation. If None, a new figure and axes will be created.
756
+ :param idx_start: The starting index for the animation (0-based).
757
+ :param idx_end: The ending index for the animation (0-based). If -1, it will use the last index.
758
+ :param cmap: The colormap to use for the rain data.
759
+ :param interval: The interval between frames in milliseconds.
760
+ :return: The animation object.
761
+ """
762
+ import matplotlib.animation as animation
763
+
764
+ if figax is None:
765
+ fig, ax = plt.subplots()
766
+ else:
767
+ fig, ax = figax
768
+
769
+ def update(frame):
770
+ ax.clear()
771
+ self.plot_spatial_rain4index(frame, figax=(fig, ax), title=None, cmap=cmap)
772
+
773
+ if idx_end == -1:
774
+ idx_end = len(self._times)
775
+
776
+ ani = animation.FuncAnimation(fig, update, frames=range(idx_start, idx_end), interval=interval)
777
+
778
+ return ani
779
+
780
+ def animation_spatial_rain_date(self, figax=None, date_start:dt = 0, date_end:int = -1, cmap:str = 'viridis', interval:int = 100):
781
+ """
782
+ Create an animation of the spatial distribution of rain over time.
783
+ The animation will show the rain data for each time step.
784
+
785
+ :param figax: A tuple (fig, ax) to use for the animation. If None, a new figure and axes will be created.
786
+ :param idx_start: The starting index for the animation (0-based).
787
+ :param idx_end: The ending index for the animation (0-based). If -1, it will use the last index.
788
+ :param cmap: The colormap to use for the rain data.
789
+ :param interval: The interval between frames in milliseconds.
790
+ :return: The animation object.
791
+ """
792
+ import matplotlib.animation as animation
793
+
794
+ if figax is None:
795
+ fig, ax = plt.subplots()
796
+ else:
797
+ fig, ax = figax
798
+
799
+ def update(frame):
800
+ ax.clear()
801
+ self.plot_spatial_rain4index(frame, figax=(fig, ax), title=None, cmap=cmap)
802
+
803
+ if date_start == 0:
804
+ date_start = self._times[0]
805
+ if date_end == -1:
806
+ date_end = self._times[-1]
807
+
808
+ # Convert dates to indices
809
+ idx_start = self._times.index(date_start)
810
+ idx_end = self._times.index(date_end) + 1 # +1 to include the end date
811
+ if idx_end > len(self._times):
812
+ idx_end = len(self._times)
813
+
814
+ ani = animation.FuncAnimation(fig, update, frames=range(idx_start, idx_end), interval=interval)
815
+
816
+ return ani
817
+
818
+ class Rainfall_Polygons:
819
+ """
820
+
821
+ For source_point_measurements:
822
+
823
+ « ind_unique.txt » : contient une matrice avec autant de lignes qu'il y a
824
+ de configurations et autant de colonne qu'il y a de stations.
825
+ Les lignes représentent le numéro de la configuration et les colonnes
826
+ représentent les stations. La variable stockée dans cette matrice est
827
+ l'indice dans la liste de pluies mesurées à une station auquel commencer
828
+ pour cette configuration.
829
+
830
+ « nb_ind_unique.txt » : vecteur contenant le nombre de pas de temps à considérer
831
+ pour chaque configuration.
832
+
833
+ « unique.txt » : représente le code de la configuration.
834
+ La valeur stockée est la conversion en entier sur 8 bytes du code binaire
835
+ dont le premier élément est la première station et sa valeur est égale à 1
836
+ si celle-ci est utilisée dans la configuration présente.
837
+ Cette définition a pour effet de limiter le nombre de stations exploitable à 64
838
+ éléments par bassin versant étudié. Dans l'attribution des pluies aux mailles,
839
+ toutes les configurations sont parcourues dans l'ordre des lignes de la matrice
840
+ contenue dans le fichier « ind_unique.txt » pour construire progressivement
841
+ les pluies de bassin.
842
+
843
+ For source_IRM:
844
+ Une seule configuration spatiale est présente. Pas de fichiers
845
+ « ind_unique.txt » et « nb_ind_unique.txt ».
846
+ """
847
+
848
+ def __init__(self, workingDir: str | Path, type_of_rain:int):
849
+
850
+ self.workingDir = Path(workingDir)
851
+
852
+ self.type_of_rain = type_of_rain
853
+
854
+ self.hyetoDict = {}
855
+ self.configs = {}
856
+
857
+ self._codes:dict[int, int] = {} # key if the Fortran index (1-based) and value is the code
858
+ self._nbsteps4code:dict[int,int] = {} # dict (code, number of steps)
859
+ self._steps4eachcode = np.array([]) # 2D array with the steps for each code (1-based)
860
+ self._geometries:dict[int, dict[str:Zones]] = {} # key is the code and value is a dict with 'all_polygons' and 'used_polygons'
861
+ self._hyetograms: dict[str, dict[str, np.ndarray]]= {} # key is the zone name and value is a dict with 'time' and 'rain'
862
+
863
+ if not self.type_of_rain in [source_point_measurements, source_IRM, source_dist]:
864
+ logging.error(_("The type of rain is not supported. It should be either source_point_measurements, source_IRM or source_dist."))
865
+ return
866
+
867
+ # Must be treated in this order
868
+ self._read_hyetograms()
869
+ self._read_configurations()
870
+ self._read_geometries()
871
+
872
+ self._checks()
873
+
874
+ def _checks(self):
875
+ """
876
+ Perform checks on the data.
877
+ """
878
+
879
+ lenghts = [len(self.get_computed_steps4code(code)) for code in self._codes.values()]
880
+
881
+ assert np.all(lenghts == list(self._nbsteps4code.values())), \
882
+ _("The number of steps for each code does not match the number of steps in the file.")
883
+
884
+ # check if all the hyetograms start at the same time
885
+ if len(self._hyetograms) > 0:
886
+ first_time = next(iter(self._hyetograms.values()))['time'][0]
887
+ for hyeto in self._hyetograms.values():
888
+ if hyeto['time'][0] != first_time:
889
+ raise ValueError(_("The hyetograms do not start at the same time."))
890
+
891
+
892
+ def get_computed_steps4code(self, code:int, base:int = 0) -> list[int]:
893
+ """
894
+ Get all computed time steps for a given code.
895
+
896
+ ATTENTION : it will return index positions. By default, the base is 0 (Python base).
897
+
898
+ :param code: The code for which to get the steps.
899
+ :param base: The base to use for the steps (default is 0 == Python, 1 == Fortran).
900
+ """
901
+
902
+ codes = list(self._codes.values())
903
+ if code in codes:
904
+ col = codes.index(code)
905
+ else:
906
+ raise ValueError(_("The code {} is not valid.").format(code))
907
+
908
+ if col < 0:
909
+ raise ValueError(_("The code {} is not valid.").format(code))
910
+
911
+ if col >= len(codes):
912
+ raise ValueError(_("The code {} is not valid.").format(code))
913
+
914
+ if self._steps4eachcode.size == 0:
915
+ raise ValueError(_("No steps found for the given code."))
916
+
917
+ steps = self._steps4eachcode[:, col]
918
+ # remove the 0
919
+ steps = steps[steps > 0]
920
+ if steps.size == 0:
921
+ raise ValueError(_("No steps found for the given code."))
922
+
923
+ if base == 0:
924
+ # Python base (0-based)
925
+ steps = steps - 1
926
+ elif base == 1:
927
+ # Fortran base (1-based)
928
+ steps = steps
929
+
930
+ # convert to list
931
+ return steps.tolist()
932
+
933
+ def get_config4date(self, date:dt) -> int:
934
+ """
935
+ Get the configuration for a given date.
936
+ The date should be in the format 'datetime.datetime'.
937
+
938
+ :param date: The date for which to get the configuration.
939
+ """
940
+ if not isinstance(date, dt):
941
+ raise ValueError(_("The date must be a datetime object."))
942
+
943
+ # convert to UTC timestamp
944
+ date = date.replace(tzinfo=tz.utc) # remove timezone info if present
945
+ timsetamp = int(date.timestamp())
946
+
947
+ return self.get_config4timestamp(timsetamp)
948
+
949
+ def get_config4timestamp(self, timestamp:int) -> int:
950
+ """
951
+ Get the configuration for a given timestamp.
952
+ The timestamp should be an integer representing the seconds since epoch.
953
+
954
+ :param timestamp: The timestamp for which to get the configuration.
955
+ """
956
+ if not isinstance(timestamp, int):
957
+ raise ValueError(_("The timestamp must be an integer."))
958
+
959
+ if timestamp < self.timestamps[0] or timestamp > self.timestamps[-1]:
960
+ raise ValueError(_("Timestamp out of range."))
961
+
962
+ try:
963
+ idx = self.timestamps.index(timestamp)
964
+ return self.get_config4index(idx)
965
+ except ValueError:
966
+ logging.error(_("Timestamp {} not found in the configurations.").format(timestamp))
967
+ return None
968
+
969
+ def get_config4index(self, index:int) -> int:
970
+ """
971
+ Get the configuration for a given index time.
972
+ The index should be an integer representing the position in time (0-based).
973
+
974
+ :param index: The index for which to get the configuration (0-based).
975
+ :return: The configuration key for the given index (not the code itself).
976
+ """
977
+ if not isinstance(index, int):
978
+ raise ValueError(_("The index must be an integer."))
979
+
980
+ if index < 0 or index >= len(self._config4eachstep):
981
+ raise ValueError(_("Index out of range."))
982
+
983
+ return self._config4eachstep[index][1]
984
+
985
+ def get_code4index(self, index:int) -> int:
986
+ """
987
+ Get the code for a given index time.
988
+ The index should be an integer representing the position in time (0-based).
989
+
990
+ :param index: The index for which to get the code (0-based).
991
+ :return: The configuration code for the given index.
992
+ """
993
+ config = self.get_config4index(index)
994
+ return self._codes[config]
995
+
996
+ def get_code4date(self, date:dt) -> int:
997
+ """
998
+ Get the code for a given date.
999
+ The date should be in the format 'datetime.datetime'.
1000
+
1001
+ :param date: The date for which to get the code.
1002
+ :return: The configuration code for the given date.
1003
+ """
1004
+ config = self.get_config4date(date)
1005
+ return self._codes[config]
1006
+
1007
+ def get_code4timestamp(self, timestamp:int) -> int:
1008
+ """
1009
+ Get the code for a given timestamp.
1010
+ The timestamp should be an integer representing the seconds since epoch.
1011
+
1012
+ :param timestamp: The timestamp for which to get the code.
1013
+ :return: The configuration code for the given timestamp.
1014
+ """
1015
+ config = self.get_config4timestamp(timestamp)
1016
+ return self._codes[config]
1017
+
1018
+ def get_geometry4index(self, index:int, all_polygons:bool = True) -> Zones:
1019
+ """
1020
+ Get the geometry for a given index.
1021
+ The index should be an integer representing the time step position.
1022
+
1023
+ :param index: The index time for which to get the geometry (0-based).
1024
+ :param all_polygons: If True, return all polygons, otherwise return only the used polygons.
1025
+ :return: The geometry for the given index.
1026
+ """
1027
+
1028
+ code = self.get_code4index(index)
1029
+
1030
+ if all_polygons:
1031
+ return self._geometries[code]['all_polygons']
1032
+ else:
1033
+ return self._geometries[code]['used_polygons']
1034
+
1035
+ def get_geometry4code(self, code:int, all_polygons:bool = True) -> Zones:
1036
+ """
1037
+ Get the geometry for a given configuration code.
1038
+ The code should be an integer representing the configuration.
1039
+
1040
+ :param code: The configuration code for which to get the geometry.
1041
+ :param all_polygons: If True, return all polygons, otherwise return only the used polygons.
1042
+ """
1043
+ if not isinstance(code, int):
1044
+ raise ValueError(_("The code must be an integer."))
1045
+ if code not in self._codes.values():
1046
+ raise ValueError(_("The code {} is not valid.").format(code))
1047
+
1048
+ if code in self._geometries:
1049
+ if all_polygons:
1050
+ return self._geometries[code]['all_polygons']
1051
+ else:
1052
+ return self._geometries[code]['used_polygons']
1053
+ else:
1054
+ logging.error(_("Geometry for code {} not found.").format(code))
1055
+ return None
1056
+
1057
+ def get_geometry4codeindex(self, code_index:int, all_polygons:bool = True) -> Zones:
1058
+ """
1059
+ Get the geometry for a given configuration code.
1060
+ The code should be an integer representing the configuration.
1061
+
1062
+ :param code_index: The index of the code in the list of codes (1-based index).
1063
+ :param all_polygons: If True, return all polygons, otherwise return only the used polygons.
1064
+ """
1065
+ if not isinstance(code_index, int):
1066
+ raise ValueError(_("The code must be an integer."))
1067
+ if code_index < 1 or code_index > len(self._codes):
1068
+ raise ValueError(_("The code index {} is out of range (1-based).").format(code_index))
1069
+
1070
+ code = self._codes[code_index]
1071
+ return self.get_geometry4code(code, all_polygons=all_polygons)
1072
+
1073
+ @property
1074
+ def nb_steps4code(self) -> list[tuple[int, int]]:
1075
+ """
1076
+ Get the number of steps for each code.
1077
+ Returns a list of tuples (code, number of steps).
1078
+ """
1079
+ return [(code, self._nbsteps4code[code]) for code in self._codes.values()]
1080
+
1081
+ @property
1082
+ def nb_steps4code_asdict(self) -> dict[int, int]:
1083
+ """
1084
+ Get the number of steps for each code as a dictionary.
1085
+ Returns a dictionary with the code as key and the number of steps as value.
1086
+ """
1087
+ return self._nbsteps4code
1088
+
1089
+ def get_geometries(self, n_more_frequent:int = 5, all_poygons:bool = False) -> dict[int, Zones]:
1090
+ """
1091
+ Get the geometries for the most frequent configurations.
1092
+ The n_more_frequent parameter defines how many configurations to return.
1093
+
1094
+ :param n_more_frequent: The number of most frequent configurations to return.
1095
+ :param all_poygons: If True, return all polygons, otherwise return only the used polygons.
1096
+ :return: A dictionary with the configuration code as key and the Zones object as value.
1097
+ """
1098
+
1099
+ if not isinstance(n_more_frequent, int) or n_more_frequent <= 0:
1100
+ raise ValueError(_("The n_more_frequent parameter must be a positive integer."))
1101
+
1102
+ if n_more_frequent > len(self._codes):
1103
+ logging.warning(_("The n_more_frequent parameter is greater than the number of configurations. Returning all configurations."))
1104
+ n_more_frequent = min(n_more_frequent, len(self._codes))
1105
+
1106
+ sorted_codes = sorted(self.nb_steps4code, key = lambda x: x[1], reverse=True)
1107
+
1108
+ return {code[0] : self.get_geometry4code(code[0], all_polygons=all_poygons) for code in sorted_codes[:n_more_frequent]}
1109
+
1110
+ def get_most_frequent_code(self, n_more_frequent:int = 5) -> list[int]:
1111
+ """
1112
+ Get the most frequent configurations codes.
1113
+ The n_more_frequent parameter defines how many configurations to return.
1114
+
1115
+ :param n_more_frequent: The number of most frequent configurations to return.
1116
+ :return: A list of the most frequent configuration codes.
1117
+ """
1118
+
1119
+ if not isinstance(n_more_frequent, int) or n_more_frequent <= 0:
1120
+ raise ValueError(_("The n_more_frequent parameter must be a positive integer."))
1121
+
1122
+ if n_more_frequent > len(self._codes):
1123
+ logging.warning(_("The n_more_frequent parameter is greater than the number of configurations. Returning all configurations."))
1124
+ n_more_frequent = min(n_more_frequent, len(self._codes))
1125
+
1126
+ sorted_codes = sorted(self.nb_steps4code, key = lambda x: x[1], reverse=True)
1127
+
1128
+ return [code[0] for code in sorted_codes[:n_more_frequent]]
1129
+
1130
+ def get_sorted_codes(self) -> list[int]:
1131
+ """
1132
+ Get the sorted configuration codes based on the number of steps.
1133
+ Returns a list of configuration codes sorted by the number of steps in descending order.
1134
+
1135
+ :return: A list of configuration codes sorted by the number of steps.
1136
+ """
1137
+ return [code for code in sorted(self.nb_steps4code, key=lambda x: x[1], reverse=True)]
1138
+
1139
+ def get_hyetograms4index(self, index:int) -> list:
1140
+ """
1141
+ Get the hyetograms for a given index time.
1142
+ The index should be an integer representing the time step position.
1143
+
1144
+ :param index: The index time for which to get the hyetograms (0-based).
1145
+ :return: A list of hyetograms for the given index.
1146
+ """
1147
+
1148
+ config = self.get_config4index(index)
1149
+ code = self._codes[config]
1150
+ keys = self.get_geometry4index(index, all_polygons=False).zone_names
1151
+
1152
+ if len(keys) > 0:
1153
+ return [self._hyetograms[int(key)] for key in keys]
1154
+ else:
1155
+ logging.error(_("Hyetogram for code {} not found.").format(code))
1156
+ return None
1157
+
1158
+ def get_rains4index(self, index:int) -> np.ndarray:
1159
+ """
1160
+ Get the rain data for a given index time.
1161
+ The index should be an integer representing the time step position.
1162
+
1163
+ :param index: The index time for which to get the rain data (0-based).
1164
+ :return: A numpy array with the rain data for the given index.
1165
+ """
1166
+ if not isinstance(index, int):
1167
+ raise ValueError(_("The index must be an integer."))
1168
+
1169
+ hyetos = self.get_hyetograms4index(index)
1170
+ if hyetos is None:
1171
+ raise ValueError(_("No hyetograms found for the given index."))
1172
+
1173
+ # Search the value at the good time step
1174
+ ts = float(self.get_timestamp_from_index(index) - self.timestamps[0])
1175
+ rains = []
1176
+ hyeto = hyetos[0]
1177
+
1178
+ idx = -1
1179
+ if ts in hyeto['time']:
1180
+ idx = np.where(hyeto['time'] == ts)[0][0]
1181
+ # It is tha same index for all hyetos because they are aligned
1182
+ else:
1183
+ raise ValueError(_("The time step {} is not found in the hyetogram.").format(ts))
1184
+
1185
+ rains = [hyeto['rain'][idx] for hyeto in hyetos]
1186
+
1187
+ if len(rains) == 0:
1188
+ raise ValueError(_("No rain data found for the given index."))
1189
+
1190
+ # Convert to numpy array
1191
+ rains = np.array(rains, dtype=np.float64)
1192
+
1193
+ return rains
1194
+
1195
+ def get_rains4timestamp(self, timestamp:int) -> np.ndarray:
1196
+ """
1197
+ Get the rain data for a given timestamp.
1198
+ The timestamp should be an integer representing the seconds since epoch.
1199
+
1200
+ :param timestamp: The timestamp for which to get the rain data.
1201
+ :return: A numpy array with the rain data for the given timestamp.
1202
+ """
1203
+ if not isinstance(timestamp, int):
1204
+ raise ValueError(_("The timestamp must be an integer."))
1205
+
1206
+ index = self.timestamps.index(timestamp)
1207
+ return self.get_rains4index(index)
1208
+
1209
+ def get_rains4date(self, date:dt) -> np.ndarray:
1210
+ """
1211
+ Get the rain data for a given date.
1212
+ The date should be in the format 'datetime.datetime'.
1213
+
1214
+ :param date: The date for which to get the rain data.
1215
+ :return: A numpy array with the rain data for the given date.
1216
+ """
1217
+ if not isinstance(date, dt):
1218
+ raise ValueError(_("The date must be a datetime object."))
1219
+
1220
+ timestamp = int(date.timestamp())
1221
+ return self.get_rains4timestamp(timestamp)
1222
+
1223
+ def get_rains4code(self, code:int) -> np.ndarray:
1224
+ """
1225
+ Get the rain data for given configuration.
1226
+
1227
+ :param code: The configuration code for which to get the rain data.
1228
+ :return: A numpy array with the rain data for the given code.
1229
+ """
1230
+
1231
+ if not isinstance(code, int):
1232
+ raise ValueError(_("The code must be an integer."))
1233
+
1234
+ if code not in self._codes.values():
1235
+ raise ValueError(_("The code {} is not valid.").format(code))
1236
+
1237
+ steps = self.get_computed_steps4code(code)
1238
+
1239
+ hyetos = self.get_hyetograms4index(steps[0])
1240
+ if hyetos is None:
1241
+ raise ValueError(_("No hyetograms found for the given index."))
1242
+
1243
+ rains = [x['rain'] for x in hyetos]
1244
+ if len(rains) == 0:
1245
+ raise ValueError(_("No rain data found for the given code."))
1246
+
1247
+ # Select only the steps
1248
+ rains = np.asarray([[rain[i] for i in steps] for rain in rains], dtype=np.float64)
1249
+
1250
+ assert rains.shape[1] == len(steps), \
1251
+ _("The number of rain values does not match the number of steps for the given code.")
1252
+ assert rains.shape[0] == len(hyetos), \
1253
+ _("The number of rain values does not match the number of hyetograms for the given code.")
1254
+ return rains, steps
1255
+
1256
+ def get_footprint_and_rain4index(self, index:int) -> tuple:
1257
+ """
1258
+ Get the footprint and rain data for a given index.
1259
+ The index should be an integer representing the time step position.
1260
+
1261
+ :param index: The index time for which to get the footprint and rain data (0-based).
1262
+ :return: A tuple containing the footprints (as vector objects) and the rain data (as numpy array).
1263
+ """
1264
+ if not isinstance(index, int):
1265
+ raise ValueError(_("The index must be an integer."))
1266
+
1267
+ rains = self.get_rains4time(index)
1268
+ footprints = self.get_geometry4index(index, all_polygons=False)
1269
+ footprints = [x.myvectors[0] for x in footprints.myzones]
1270
+
1271
+ assert len(footprints) == len(rains), \
1272
+ _("The number of footprints does not match the number of rain values.")
1273
+
1274
+ return footprints, rains
1275
+
1276
+ def get_footprint_and_rain4code(self, code:int) -> tuple:
1277
+ """
1278
+ Get the footprint and rain data for a given configuration code.
1279
+ The code should be an integer representing the configuration.
1280
+
1281
+ :param code: The configuration code for which to get the footprint and rain data.
1282
+ :return: A tuple containing the footprints (as vector objects), the rain data (as numpy array) and the times.
1283
+ """
1284
+
1285
+ if not isinstance(code, int):
1286
+ raise ValueError(_("The code must be an integer."))
1287
+ if code not in self._codes.values():
1288
+ raise ValueError(_("The code {} is not valid.").format(code))
1289
+
1290
+ rains, steps = self.get_rains4code(code)
1291
+ footprints = self.get_geometry4index(steps[0], all_polygons=False)
1292
+ footprints = [x.myvectors[0] for x in footprints.myzones]
1293
+
1294
+ assert len(footprints) == len(rains), \
1295
+ _("The number of footprints does not match the number of rain values.")
1296
+
1297
+ return footprints, rains, [self.times[i] for i in steps]
1298
+
1299
+ def get_most_rainy_code(self, n_most:int = 5) -> int:
1300
+ """
1301
+ Get the configuration code with the most frequently rain.
1302
+
1303
+ We identify the number of consecutive steps with rain values greater than zero.
1304
+ """
1305
+ from scipy.ndimage import label
1306
+
1307
+ nb_events = {}
1308
+ for code in self._codes.values():
1309
+ rains, steps = self.get_rains4code(code)
1310
+ # mean the rain values accross all stations
1311
+ rains = np.mean(rains, axis=0)
1312
+
1313
+ lab, num = label(rains)
1314
+ nb_events[code] = num
1315
+
1316
+ # sort the codes by the number of events
1317
+ sorted_codes = sorted(nb_events.items(), key=lambda x: x[1], reverse=True)
1318
+ if n_most > len(sorted_codes):
1319
+ logging.warning(_("The n_most parameter is greater than the number of configurations. Returning all configurations."))
1320
+ n_most = len(sorted_codes)
1321
+
1322
+ if n_most == -1:
1323
+ n_most = len(sorted_codes)
1324
+
1325
+ # return the code with the most events
1326
+ return {sorted_codes[i][0]: sorted_codes[i][1] for i in range(n_most)}
1327
+
1328
+ def get_most_relative_rainy_code(self, n_most:int = 5) -> int:
1329
+ """
1330
+ Get the configuration code with the most frequently rain.
1331
+
1332
+ We identify the number of consecutive steps with rain values greater than zero.
1333
+ """
1334
+ from scipy.ndimage import label
1335
+
1336
+ nb_events = {}
1337
+ for code in self._codes.values():
1338
+ rains, steps = self.get_rains4code(code)
1339
+ # mean the rain values accross all stations
1340
+ rains = np.mean(rains, axis=0)
1341
+
1342
+ lab, num = label(rains)
1343
+ nb_events[code] = float(num) / float(len(rains)) # relative number of events
1344
+
1345
+ # sort the codes by the number of events
1346
+ sorted_codes = sorted(nb_events.items(), key=lambda x: x[1], reverse=True)
1347
+
1348
+ if n_most > len(sorted_codes):
1349
+ logging.warning(_("The n_most parameter is greater than the number of configurations. Returning all configurations."))
1350
+ n_most = len(sorted_codes)
1351
+
1352
+ if n_most == -1:
1353
+ n_most = len(sorted_codes)
1354
+
1355
+ # return the code with the most events
1356
+ return {sorted_codes[i][0]: sorted_codes[i][1] for i in range(n_most)}
1357
+
1358
+ @cached_property
1359
+ def rain_maximum(self) -> float:
1360
+ """
1361
+ Returns the maximum rain value across all configurations.
1362
+ """
1363
+ if not self._hyetograms:
1364
+ return 0.0
1365
+
1366
+ max_rain = 0.0
1367
+ for hyeto in self._hyetograms.values():
1368
+ max_rain = max(max_rain, np.max(hyeto['rain']))
1369
+ return max_rain
1370
+
1371
+ @cached_property
1372
+ def rain_maxima(self) -> dict[int, float]:
1373
+ """
1374
+ Returns a dictionary with the maximum rain value for each configuration code.
1375
+ The key is the configuration code and the value is the maximum rain value.
1376
+ """
1377
+ maxima = {}
1378
+ for code in self._codes.values():
1379
+ rains, __steps = self.get_rains4code(code)
1380
+ maxima[code] = np.max(rains)
1381
+ return maxima
1382
+
1383
+ @cached_property
1384
+ def timestamps(self):
1385
+ """
1386
+ Returns the timestamps list
1387
+ """
1388
+ return [x[0] for x in self._config4eachstep]
1389
+
1390
+ def get_timestamp_from_index(self, index:int) -> int:
1391
+ """
1392
+ Get the timestamp for a given index.
1393
+ The index should be an integer representing the time step position.
1394
+
1395
+ :param index: The index time for which to get the timestamp (0-based).
1396
+ """
1397
+ if not isinstance(index, int):
1398
+ raise ValueError(_("The index must be an integer."))
1399
+
1400
+ if index < 0 or index >= len(self._config4eachstep):
1401
+ raise ValueError(_("Index out of range."))
1402
+
1403
+ return self._config4eachstep[index][0]
1404
+
1405
+ @property
1406
+ def times(self):
1407
+ """
1408
+ Returns the time list for the first configuration.
1409
+ """
1410
+ locdate = [dt.fromtimestamp(x[0]).replace(tzinfo=tz.utc) for x in self._config4eachstep]
1411
+ return locdate
1412
+
1413
+ @times.setter
1414
+ def times(self, value:list[dt]):
1415
+ """
1416
+ Set the time array.
1417
+ The value should be a list of datetime objects.
1418
+ """
1419
+ if not isinstance(value, list) or not all(isinstance(x, dt) for x in value):
1420
+ raise ValueError(_("The value must be a list of datetime objects."))
1421
+
1422
+ self._config4eachstep = [(int(x.timestamp()), 0) for x in value]
1423
+
1424
+ @cached_property
1425
+ def time_begin(self):
1426
+ """
1427
+ Returns the beginning time of the first configuration.
1428
+ """
1429
+ if len(self._config4eachstep) > 0:
1430
+ return dt.fromtimestamp(self._config4eachstep[0][0]).replace(tzinfo=tz.utc)
1431
+ else:
1432
+ return None
1433
+
1434
+ @cached_property
1435
+ def time_end(self):
1436
+ """
1437
+ Returns the end time of the last configuration.
1438
+ """
1439
+ if len(self._config4eachstep) > 0:
1440
+ return dt.fromtimestamp(self._config4eachstep[-1][0]).replace(tzinfo=tz.utc)
1441
+ else:
1442
+ return None
1443
+
1444
+ @property
1445
+ def number_of_configurations(self):
1446
+ """
1447
+ Returns the number of unique configurations.
1448
+ """
1449
+ return len(self._codes)
1450
+
1451
+ @property
1452
+ def nb_records(self):
1453
+ """
1454
+ Returns the number of records (scenarios).
1455
+ """
1456
+ return self._steps4eachcode.shape[0]
1457
+
1458
+ def _decode_config(self, value:int) -> list:
1459
+ """
1460
+ Decode the unique configuration from an integer value.
1461
+ The value is a binary representation where each bit represents
1462
+ whether a station is included in the configuration.
1463
+ """
1464
+ # convert to int64
1465
+ value = np.int64(value)
1466
+ # create a list of 0 or 1 for each bit in the binary representation
1467
+ config = [(value >> i) & 1 for i in range(64)]
1468
+ return config
1469
+
1470
+ def _code_config(self, config:list) -> int:
1471
+ """
1472
+ Encode a unique configuration from a list of 0s and 1s into an integer.
1473
+ The list represents whether each station is included in the configuration.
1474
+ """
1475
+ # convert the list to a numpy array of int64
1476
+ config = np.array(config, dtype=np.int64)
1477
+ # calculate the integer value by summing the powers of 2 for each bit
1478
+ value = np.sum(config * (2 ** np.arange(len(config))))
1479
+ return value
1480
+
1481
+ def _read_geometries(self):
1482
+ """
1483
+ Read the geometries from the directory.
1484
+ The geometries are stored in vector files named as:
1485
+ Rain_basin_geom_<code>.vec and Rain_basin_geom_<code>_all_zones.vec
1486
+ where <code> is the configuration code.
1487
+ """
1488
+
1489
+ dir = self.workingDir / "Whole_basin"
1490
+
1491
+ if self.type_of_rain == source_point_measurements:
1492
+ for cur_id in self._codes.values():
1493
+ self._geometries[cur_id] = {}
1494
+ fileName = dir / f"Rain_basin_geom_{cur_id}.vec"
1495
+ if fileName.exists():
1496
+ self._geometries[cur_id]['used_polygons'] = Zones(fileName)
1497
+ else:
1498
+ logging.error(_("The file {} does not exist.").format(fileName))
1499
+
1500
+ fileName = dir / f"Rain_basin_geom_{cur_id}_all_zones.vec"
1501
+ if fileName.exists():
1502
+ self._geometries[cur_id]['all_polygons'] = Zones(fileName)
1503
+ else:
1504
+ logging.error(_("The file {} does not exist.").format(fileName))
1505
+ elif self.type_of_rain in [source_IRM, source_dist]:
1506
+ fileName = dir / 'Rain_basin_geom.vec'
1507
+ if fileName.exists():
1508
+ self._geometries[1] = {}
1509
+ self._geometries[1]['used_polygons'] = Zones(fileName)
1510
+ else:
1511
+ logging.error(_("The file {} does not exist.").format(fileName))
1512
+
1513
+ self._geometries[1]['all_polygons'] = None
1514
+
1515
+ def _read_configurations(self):
1516
+ """
1517
+ Read the unique configurations from a text file.
1518
+
1519
+ The configurations are stored in the following files:
1520
+ - unique.txt: contains the unique configurations as integer codes.
1521
+ - nb_ind_unique.txt: contains the number of steps for each configuration.
1522
+ - ind_unique.txt: contains the steps for each configuration.
1523
+ - input_data_gap.txt: contains the gap between the first and the second configuration.
1524
+ - scenarios.txt: contains the scenarios (time, configuration).
1525
+ The files should be located in the "Whole_basin" directory.
1526
+ The working directory should be set to the directory containing the "Whole_basin" directory.
1527
+ """
1528
+
1529
+ if self.type_of_rain == source_point_measurements:
1530
+
1531
+ files = ['unique.txt', 'nb_ind_unique.txt', 'ind_unique.txt']
1532
+ dir = self.workingDir / "Whole_basin"
1533
+
1534
+ for file in files:
1535
+ fileName = dir / file
1536
+ if not fileName.exists():
1537
+ logging.error(_("The file {} does not exist.").format(fileName))
1538
+ return None
1539
+
1540
+ fileName = dir / files[0]
1541
+ with open(fileName, 'r') as f:
1542
+ self._codes = {i+1: int(x.strip()) for i,x in enumerate(f.read().splitlines()[1:])}
1543
+
1544
+ fileName = dir / files[1]
1545
+ with open(fileName, 'r') as f:
1546
+ self._nbsteps4code = {self._codes[i+1]: int(x.strip()) for i,x in enumerate(f.read().splitlines()[1:])}
1547
+
1548
+ fileName = dir / files[2]
1549
+ with open(fileName, 'r') as f:
1550
+ content = f.read().split()
1551
+
1552
+ unique = int(content[0].strip())
1553
+ assert unique == len(self._codes), "The number of unique configurations does not match the number of unique indices."
1554
+ nb_records = int(content[1].strip())
1555
+
1556
+ self._steps4eachcode = np.zeros((nb_records, unique), dtype=int)
1557
+ for i in range(nb_records):
1558
+ self._steps4eachcode[i,:] = [int(x.strip()) for x in content[2+i*unique:2+(i+1)*unique]]
1559
+
1560
+ fileName = dir / 'input_data_gap.txt'
1561
+ if fileName.exists():
1562
+ assert self._codes[1] == 0, "The first unique configuration should be 0."
1563
+ logging.info(_('The first unique configuration is 0.'))
1564
+ with open(fileName, 'r') as f:
1565
+ content = f.read().splitlines()
1566
+ nb = int(content[0].strip())
1567
+ self._gap = [int(x.strip()) for x in content[1:]]
1568
+
1569
+ fileName = dir / 'scenarios.txt'
1570
+ if fileName.exists():
1571
+ with open(fileName, 'r') as f:
1572
+ content = f.read().splitlines()
1573
+ nb_records = int(content[0].strip())
1574
+
1575
+ assert nb_records == self.nb_records, "The number of scenarios does not match the number of records."
1576
+ self._config4eachstep = [line.split() for line in content[1:]]
1577
+ # convert scenarios to integer
1578
+ self._config4eachstep = [[int(x) for x in scenario] for scenario in self._config4eachstep]
1579
+
1580
+ elif self.type_of_rain in [source_IRM, source_dist]:
1581
+
1582
+ dir = self.workingDir / "Whole_basin"
1583
+ self._codes = {1: 1}
1584
+ self._gap = None
1585
+ times = self._hyetograms[list(self._hyetograms.keys())[0]]['time']
1586
+ self._nbsteps4code = {1: len(times)}
1587
+ tstamp = [int(self._timestamp_start + t) for t in times]
1588
+ self._config4eachstep = [[t, 1] for t in tstamp]
1589
+ self._steps4eachcode = np.zeros((len(times), 1), dtype=int)
1590
+ for i in range(len(times)):
1591
+ self._steps4eachcode[i, 0] = i + 1
1592
+
1593
+ def _read_hyetograms(self):
1594
+ """
1595
+ Read the hyetograms from the directory.
1596
+
1597
+ The hyetograms are stored in files named as:
1598
+ rain<code>.hyeto where <code> is the configuration code.
1599
+ The files should be located in the "Whole_basin" directory.
1600
+ The working directory should be set to the directory containing the "Whole_basin" directory.
1601
+ """
1602
+
1603
+ dir = self.workingDir / "Whole_basin"
1604
+
1605
+ for file in dir.rglob("*rain.hyeto"):
1606
+ code = int(file.stem.replace('rain',''))
1607
+ with open(file, 'r') as f:
1608
+ content = f.read().splitlines()
1609
+ time = np.asarray([float(x.split()[0]) for x in content[1:]])
1610
+ self._timestamp_start = int(time[0])
1611
+ time -= time[0] # normalize time to start from 0
1612
+ rain = np.asarray([float(x.split()[1]) for x in content[1:]])
1613
+ self._hyetograms[code] = {'time': time, 'rain': rain}
1614
+
1615
+ def _write_rain_binary(self, name_serie:int, filename: str | Path, data: np.ndarray, times: list[dt] = None):
1616
+ """
1617
+ Write data to a binary file.
1618
+ The filename should end with .dat.
1619
+
1620
+ Structure of the binary file:
1621
+ - 4 bytes for a "name" as integer
1622
+ - 4 bytes for the number of data columns (n)
1623
+ - 4 bytes for the total number of columns (n + 6)
1624
+ - 4 bytes for the number of rows
1625
+ - For each row:
1626
+ - 1 byte for the day
1627
+ - 1 byte for the month
1628
+ - 2 bytes for the year
1629
+ - 1 byte for the hour
1630
+ - 1 byte for the minute
1631
+ - 1 byte for the second
1632
+ - n*8 bytes for the rain value as float
1633
+
1634
+ :param filename: The name of the file to write.
1635
+ :param data: The data to write, should be a 1D numpy array.
1636
+ """
1637
+ import struct
1638
+
1639
+ try:
1640
+ name_serie = int(name_serie)
1641
+ except ValueError:
1642
+ raise ValueError(_("The name of the series must be an integer or could be convert to int."))
1643
+
1644
+ filename = Path(filename)
1645
+
1646
+ assert filename.suffix == '.dat', _("The file name must end with .dat")
1647
+
1648
+ data = data.flatten() # Ensure data is a 1D array
1649
+
1650
+ assert data.ndim == 1, _("The data must be a 1D numpy array.")
1651
+
1652
+ if times is None:
1653
+ times = self.times
1654
+ else:
1655
+ assert isinstance(times, list) and all(isinstance(t, dt) for t in times), _("The times must be a list of datetime objects.")
1656
+ # Check if the all the dates are in UTC
1657
+ for t in times:
1658
+ if t.tzinfo is None or t.tzinfo.utcoffset(t) is None:
1659
+ raise ValueError(_("All times must be timezone-aware datetime objects in UTC."))
1660
+ assert len(times) == data.size, _("The number of time steps does not match the number of data points.")
1661
+
1662
+ f = open(filename,'wb')
1663
+
1664
+ # Write the header
1665
+ nameb = name_serie.to_bytes(4, byteorder='little', signed=True)
1666
+ n = 1 # Number of data columns (1 for rain)
1667
+ ncols = n + 6 # 6 additional columns for date and time
1668
+ nrows = len(times)
1669
+ ncolsb = ncols.to_bytes(4, byteorder='little', signed=True)
1670
+ nrowsb = nrows.to_bytes(4, byteorder='little', signed=True)
1671
+ f.write(nameb)
1672
+ f.write(ncolsb)
1673
+ f.write(nrowsb)
1674
+ f.write(n.to_bytes(4, byteorder='little', signed=True))
1675
+
1676
+ # Write the data
1677
+ for t, r in zip(times, data):
1678
+
1679
+ dayb = t.day.to_bytes(1, byteorder='little', signed=True)
1680
+ monthb = t.month.to_bytes(1, byteorder='little', signed=True)
1681
+ yearb = t.year.to_bytes(2, byteorder='little', signed=True)
1682
+ hourb = t.hour.to_bytes(1, byteorder='little', signed=True)
1683
+ minuteb= t.minute.to_bytes(1, byteorder='little', signed=True)
1684
+ secondb= t.second.to_bytes(1, byteorder='little', signed=True)
1685
+ valb = bytearray(struct.pack("<d", float(r)))
1686
+
1687
+ f.write(dayb)
1688
+ f.write(monthb)
1689
+ f.write(yearb)
1690
+ f.write(hourb)
1691
+ f.write(minuteb)
1692
+ f.write(secondb)
1693
+ f.write(valb)
1694
+
1695
+ def _read_rain_binary(self, filename: str | Path) -> tuple[int, np.ndarray, list[dt]]:
1696
+ """
1697
+ Read data from a binary file.
1698
+ The filename should end with .dat.
1699
+
1700
+ Structure of the binary file:
1701
+ - 4 bytes for a "name" as integer
1702
+ - 4 bytes for the number of data columns (n)
1703
+ - 4 bytes for the total number of columns (n + 6)
1704
+ - 4 bytes for the number of rows
1705
+ - For each row:
1706
+ - 1 byte for the day
1707
+ - 1 byte for the month
1708
+ - 2 bytes for the year
1709
+ - 1 byte for the hour
1710
+ - 1 byte for the minute
1711
+ - 1 byte for the second
1712
+ - n*8 bytes for the rain value as float
1713
+
1714
+ :param filename: The name of the file to read.
1715
+ :return: A numpy array with the rain data.
1716
+ """
1717
+ import struct
1718
+
1719
+ filename = Path(filename)
1720
+
1721
+ assert filename.suffix == '.dat', _("The file name must end with .dat")
1722
+
1723
+ f = open(filename, 'rb')
1724
+
1725
+ # Read the header
1726
+ name_serie = int.from_bytes(f.read(4), byteorder='little', signed=True)
1727
+ ncols = int.from_bytes(f.read(4), byteorder='little', signed=True)
1728
+ nrows = int.from_bytes(f.read(4), byteorder='little', signed=True)
1729
+ n = int.from_bytes(f.read(4), byteorder='little', signed=True)
1730
+
1731
+ data = np.zeros((nrows, n), dtype=np.float64)
1732
+
1733
+ # Create a datetime array
1734
+ times = []
1735
+
1736
+ # Read the data
1737
+ for i in range(nrows):
1738
+ day = int.from_bytes(f.read(1), byteorder='little', signed=True)
1739
+ month = int.from_bytes(f.read(1), byteorder='little', signed=True)
1740
+ year = int.from_bytes(f.read(2), byteorder='little', signed=True)
1741
+ hour = int.from_bytes(f.read(1), byteorder='little', signed=True)
1742
+ minute= int.from_bytes(f.read(1), byteorder='little', signed=True)
1743
+ second= int.from_bytes(f.read(1), byteorder='little', signed=True)
1744
+
1745
+ times.append(dt(year, month, day, hour, minute, second, tzinfo=tz.utc))
1746
+
1747
+ # Read n floats
1748
+ values = f.read(n * 8)
1749
+ if len(values) != n * 8:
1750
+ raise ValueError(_("The number of values read does not match the expected number."))
1751
+ values = struct.unpack('<d' * n, values)
1752
+ data[i, :] = values
1753
+ f.close()
1754
+
1755
+ # Convert to a 1D numpy array
1756
+ data = data.flatten()
1757
+
1758
+ return name_serie, data, times
1759
+
1760
+
1761
+ def _write_rain_ascii(self, name_serie:str | int, filename: str | Path, data: np.ndarray, times: list[dt] = None):
1762
+ """
1763
+ Write data to an ASCII file.
1764
+ The filename should end with .rain.
1765
+
1766
+ Structure of the ASCII file:
1767
+ - 4 Header lines with :
1768
+ - The first line is a name of the series.
1769
+ - The second line is the number of data columns (n).
1770
+ - The third line is the total number of columns (n + 6).
1771
+ - The fourth line is the number of rows.
1772
+ - Each line represents a time step.
1773
+ - The first six columns are the day, month, year, hour, minute, and second.
1774
+ - The last column is the rain value.
1775
+
1776
+ :param name_serie: The name of the series, can be an integer or a string.
1777
+ :param filename: The name of the file to write.
1778
+ :param data: The data to write, should be a 1D numpy array.
1779
+ """
1780
+
1781
+ name_serie = str(name_serie)
1782
+ filename = Path(filename)
1783
+
1784
+ assert filename.suffix == '.rain', _("The file name must end with .rain")
1785
+
1786
+ data = data.flatten() # Ensure data is a 1D array
1787
+
1788
+ assert data.ndim == 1, _("The data must be a 1D numpy array.")
1789
+
1790
+ if times is None:
1791
+ times = self.times
1792
+ else:
1793
+ assert isinstance(times, list) and all(isinstance(t, dt) for t in times), _("The times must be a list of datetime objects.")
1794
+ # Check if the all the dates are in UTC
1795
+ for t in times:
1796
+ if t.tzinfo is None or t.tzinfo.utcoffset(t) is None:
1797
+ raise ValueError(_("All times must be timezone-aware datetime objects in UTC."))
1798
+ assert len(times) == data.size, _("The number of time steps does not match the number of data points.")
1799
+
1800
+ with open(filename, 'w') as f:
1801
+ # Write the header
1802
+ f.write(f"{name_serie}\n")
1803
+ f.write(f"{1}\n")
1804
+ f.write(f"{1 + 6}\n")
1805
+ f.write(f"{len(times)}\n")
1806
+
1807
+ # Write the data
1808
+ for t, r in zip(times, data):
1809
+ f.write("\t".join([str(t.day), str(t.month), str(t.year), str(t.hour), str(t.minute), str(t.second), str(r)]) + "\n")
1810
+
1811
+ def _read_rain_ascii(self, filename: str | Path) -> tuple[str | int, np.ndarray, list[dt]]:
1812
+ """
1813
+ Read data from an ASCII file.
1814
+ The filename should end with .rain.
1815
+
1816
+ Structure of the ASCII file:
1817
+ - 4 Header lines with :
1818
+ - The first line is a name of the series.
1819
+ - The second line is the number of data columns (n).
1820
+ - The third line is the total number of columns (n + 6).
1821
+ - The fourth line is the number of rows.
1822
+ - Each line represents a time step.
1823
+ - The first six columns are the day, month, year, hour, minute, and second.
1824
+ - The last column is the rain value.
1825
+
1826
+ :param filename: The name of the file to read.
1827
+
1828
+ """
1829
+
1830
+ filename = Path(filename)
1831
+
1832
+ assert filename.suffix == '.rain', _("The file name must end with .rain")
1833
+
1834
+ with open(filename, 'r') as f:
1835
+ lines = f.readlines()
1836
+
1837
+ # Read the header
1838
+ name_serie = lines[0].strip()
1839
+ ncols = int(lines[1].strip())
1840
+ nrows = int(lines[3].strip())
1841
+
1842
+ data = np.zeros((nrows, ncols), dtype=np.float64)
1843
+ times = []
1844
+
1845
+ # Read the data
1846
+ for i in range(nrows):
1847
+ line = lines[i + 4].strip().split('\t')
1848
+ day, month, year, hour, minute, second, rain = line
1849
+ times.append(dt(int(year), int(month), int(day), int(hour), int(minute), int(second), tzinfo=tz.utc))
1850
+ data[i, 0] = float(rain)
1851
+
1852
+ # Convert to a 1D numpy array
1853
+ data = data.flatten()
1854
+
1855
+ return name_serie, data, times
1856
+
1857
+ def convert2grid(self, grid: Path | str | Zones,
1858
+ output: Path | str = None,
1859
+ overwrite: bool = True,
1860
+ parallel: bool = True):
1861
+ """
1862
+ Convert the data to a grid.
1863
+
1864
+ The grid can be a Path or a Zones object.
1865
+ If a Path is provided, it should point to a grid file.
1866
+ If a Zones object is provided, it should contain the grid polygons.
1867
+ The output will be written to the specified output directory.
1868
+ If the output directory already exists, it will be overwritten if overwrite is set to True.
1869
+ If output is None, the output will be written to the parent directory of the grid file, 'data' subdirectory.
1870
+ If parallel is set to True, the computation will be done in parallel using threads.
1871
+
1872
+ :param grid: The grid to convert the data to.
1873
+ :param output: The output directory where the data will be written.
1874
+ :param overwrite: If True, the output directory will be overwritten if it already exists.
1875
+ :param parallel: If True, the computation will be done in parallel using threads.
1876
+ """
1877
+ import concurrent.futures
1878
+
1879
+ if isinstance(grid, Path | str):
1880
+ grid = Path(grid)
1881
+ if not grid.exists():
1882
+ logging.error(_("The grid file {} does not exist.").format(grid))
1883
+ return
1884
+ grid = Zones(grid)
1885
+ elif not isinstance(grid, Zones):
1886
+ logging.error(_("The grid must be a Path, str or Zones object."))
1887
+ return
1888
+
1889
+ if output is None:
1890
+ output = Path(grid.filename).parent.parent / 'data'
1891
+ output.mkdir(parents=True, exist_ok=True)
1892
+
1893
+ if output.exists():
1894
+ if not overwrite:
1895
+ logging.error(_("The output directory {} already exists and overwrite is set to False.").format(output))
1896
+ return
1897
+ logging.warning(_("The output directory {} already exists.").format(output))
1898
+ logging.warning(_("The data will be overwritten."))
1899
+
1900
+ # For each grid cells, we need the fraction of each polygon for each configuration
1901
+ grid_list_polygons = [curzone.myvectors[0].polygon for curzone in grid.myzones]
1902
+
1903
+ fractions = {}
1904
+ for idx in tqdm(range(len(self._codes))):
1905
+
1906
+ # Get the geometry for the code
1907
+ geometry = self.get_geometry4codeindex(idx+1, all_polygons=False)
1908
+ code = self._codes[idx+1]
1909
+
1910
+ if geometry is None:
1911
+ logging.error(_("The geometry for code {} is None.").format(code))
1912
+ continue
1913
+
1914
+ thiessen_polygons = [curzone.myvectors[0].polygon for curzone in geometry.myzones]
1915
+
1916
+ # Compute the fraction of surface intersection for each polygon
1917
+ loc_frac = np.zeros((len(grid_list_polygons), len(thiessen_polygons)), dtype=np.float64)
1918
+ for i, grid_polygon in enumerate(grid_list_polygons):
1919
+ for j, thiessen_polygon in enumerate(thiessen_polygons):
1920
+ if grid_polygon.intersects(thiessen_polygon):
1921
+ intersection = grid_polygon.intersection(thiessen_polygon)
1922
+ loc_frac[i, j] = intersection.area / grid_polygon.area
1923
+
1924
+ # check if the fractions sum to 1 for each grid cell
1925
+ if not np.allclose(np.sum(loc_frac, axis=1), 1.0):
1926
+ logging.warning(_("The fractions for code {} do not sum to 1 for all grid cells.").format(code))
1927
+ logging.warning(_("The fractions will be normalized."))
1928
+ # Normalize the fractions
1929
+ non_zeros = np.sum(loc_frac, axis=1) > 0
1930
+ loc_frac[non_zeros] = loc_frac[non_zeros] / np.sum(loc_frac, axis=1, keepdims=True)[non_zeros]
1931
+
1932
+ # if all fractions are 0, ignore tghe array
1933
+ if np.all(loc_frac == 0):
1934
+ logging.warning(_("All fractions for index {} are 0.").format(idx))
1935
+ fractions[code] = None
1936
+ else:
1937
+ # Store the fractions in the dictionary
1938
+ fractions[code] = loc_frac
1939
+
1940
+ geometry.reset_linestring()
1941
+
1942
+ # Iterate on each time step, compute and store the rain data
1943
+ gridded_rain = np.zeros((grid.nbzones, self.nb_records), dtype=np.float64)
1944
+
1945
+ def compute_gridded_rain(idx):
1946
+ rains = self.get_rains4index(idx)
1947
+ index_geom = self.get_config4index(idx)
1948
+ code = self._codes[index_geom]
1949
+ frac = fractions[code]
1950
+ if frac is None:
1951
+ logging.warning(_("No fractions found for index {}. Skipping.").format(idx))
1952
+ return idx, None
1953
+ else:
1954
+ # Compute the gridded rain data
1955
+ return idx, np.dot(frac, rains)
1956
+
1957
+ if parallel:
1958
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1959
+ results = list(tqdm(executor.map(compute_gridded_rain, range(self.nb_records)), total=self.nb_records))
1960
+ else:
1961
+ results = [compute_gridded_rain(idx) for idx in tqdm(range(self.nb_records))]
1962
+
1963
+ for idx, result in results:
1964
+ if result is not None:
1965
+ gridded_rain[:, idx] = result
1966
+
1967
+ # write the data to binary files
1968
+ for idx in tqdm(range(grid.nbzones)):
1969
+ zone_name = grid[idx].myname
1970
+ rain_data = gridded_rain[idx, :]
1971
+
1972
+ # Write the data to a binary file
1973
+ filename = output / f"{zone_name}.rain.dat"
1974
+ self._write_rain_binary(zone_name, filename, rain_data)
1975
+
1976
+
1977
+ def plot_spatial_rain4index(self, index:int, figax=None, title:str = None, cmap:str = 'viridis'):
1978
+ """
1979
+ Plot the spatial distribution of rain for a given index.
1980
+ The index should be an integer representing the time step position.
1981
+ """
1982
+
1983
+ if figax is None:
1984
+ fig, ax = plt.subplots()
1985
+ else:
1986
+ fig, ax = figax
1987
+
1988
+ rains = self.get_rains4index(index)
1989
+ geometry = self.get_geometry4index(index, all_polygons=False)
1990
+
1991
+ if rains is None or geometry is None:
1992
+ logging.error(_("No rain data or geometry found for the given index."))
1993
+ return
1994
+
1995
+ # Plot the rain data
1996
+ geometry.add_values('rain', rains) # Convert to mm
1997
+ geometry.set_colors_from_value('rain', cmap=cmap, vmin = 0., vmax=self.rain_maximum)
1998
+ geometry.set_filled(True)
1999
+ geometry.plot_matplotlib(ax)
2000
+
2001
+ if title is not None:
2002
+ ax.set_title(title)
2003
+ else:
2004
+ time = dt.fromtimestamp(self.timestamps[index]).replace(tzinfo=tz.utc)
2005
+ ax.set_title(_("Rain distribution for {}").format(time.strftime('%Y-%m-%d %H:%M:%S %Z')))
2006
+
2007
+ return fig, ax
2008
+
2009
+ def animation_spatial_rain_index(self, code:int = 1, idx_start:int = 0, idx_end:int = -1, figax=None, cmap:str = 'viridis', interval:int = 100):
2010
+ """
2011
+ Create an animation of the spatial distribution of rain for all indices.
2012
+ The animation will be displayed using matplotlib's FuncAnimation.
2013
+ """
2014
+ import matplotlib
2015
+ from matplotlib.animation import FuncAnimation
2016
+
2017
+ if idx_start < 0:
2018
+ idx_start = 0
2019
+ if idx_end < 0 or idx_end >= self.nb_records:
2020
+ idx_end = self.nb_records
2021
+
2022
+ if figax is None:
2023
+ fig, ax = plt.subplots()
2024
+ else:
2025
+ fig, ax = figax
2026
+
2027
+ def update(index):
2028
+ ax.clear()
2029
+ self.plot_spatial_rain4index(int(index), figax=(fig, ax), cmap=cmap)
2030
+
2031
+ if code not in self._codes.values():
2032
+ logging.error(_("The code {} is not valid.").format(code))
2033
+ return
2034
+
2035
+ # Get the indices for the given code
2036
+ steps = self.get_computed_steps4code(code)
2037
+ if steps is None:
2038
+ logging.error(_("No steps found for the given code."))
2039
+ return
2040
+ if len(steps) == 0:
2041
+ logging.error(_("No steps found for the given code."))
2042
+ return
2043
+
2044
+ # Remove all steps before idx_start and after idx_end
2045
+ steps = [s for s in steps if idx_start <= s <= idx_end]
2046
+
2047
+ # Create the animation
2048
+ matplotlib.rcParams['animation.embed_limit'] = 2**128
2049
+ ani = FuncAnimation(fig, update, frames=tqdm(steps), interval=interval, repeat=True)
2050
+
2051
+ return ani
2052
+
2053
+ def animation_spatial_rain_date(self, code:int = 1, date_start:dt = 0, date_end:dt = -1, figax=None, cmap:str = 'viridis', interval:int = 100):
2054
+ """
2055
+ Create an animation of the spatial distribution of rain for all indices within a date range.
2056
+ The animation will be displayed using matplotlib's FuncAnimation.
2057
+ """
2058
+ import matplotlib
2059
+ from matplotlib.animation import FuncAnimation
2060
+
2061
+ if date_start is None:
2062
+ date_start = self.time_begin
2063
+ if date_end is None or date_end >= self.time_end:
2064
+ date_end = self.time_end
2065
+
2066
+ if figax is None:
2067
+ fig, ax = plt.subplots()
2068
+ else:
2069
+ fig, ax = figax
2070
+
2071
+ def update(index):
2072
+ ax.clear()
2073
+ self.plot_spatial_rain4index(int(index), figax=(fig, ax), cmap=cmap)
2074
+
2075
+ if code not in self._codes.values():
2076
+ logging.error(_("The code {} is not valid.").format(code))
2077
+ return
2078
+
2079
+ # Get the indices for the given code
2080
+ steps = self.get_computed_steps4code(code)
2081
+ if steps is None:
2082
+ logging.error(_("No steps found for the given code."))
2083
+ return
2084
+ if len(steps) == 0:
2085
+ logging.error(_("No steps found for the given code."))
2086
+ return
2087
+
2088
+ # Remove all steps before date_start and after date_end
2089
+ steps = [s for s in steps if date_start <= dt.fromtimestamp(self.timestamps[s], tz=tz.utc) <= date_end]
2090
+
2091
+ # Create the animation
2092
+ matplotlib.rcParams['animation.embed_limit'] = 2**128
2093
+ ani = FuncAnimation(fig, update, frames=tqdm(steps), interval=interval, repeat=True)
2094
+
2095
+ return ani