toolbox-utils 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
@@ -0,0 +1,392 @@
1
+ """hspfbintoolbox to read HSPF binary files."""
2
+
3
+ import datetime
4
+ import struct
5
+ import sys
6
+
7
+ try:
8
+ from typing import Literal
9
+ except ImportError:
10
+ from typing import Literal
11
+
12
+ import pandas as pd
13
+
14
+ from .. import tsutils
15
+
16
+ code2intervalmap = {5: "yearly", 4: "monthly", 3: "daily", 2: "bivl"}
17
+
18
+ interval2codemap = {"yearly": 5, "monthly": 4, "daily": 3, "bivl": 2}
19
+
20
+ code2freqmap = {5: "A", 4: "M", 3: "D", 2: None}
21
+
22
+
23
+ _LOCAL_DOCSTRINGS = {
24
+ "hbnfilename": """hbnfilename: str
25
+ The HSPF binary output file. This file must have been created from
26
+ a completed model run."""
27
+ }
28
+
29
+
30
+ def tuple_match(findme, hay):
31
+ """Part of partial ordered matching.
32
+ See http://stackoverflow.com/a/4559604
33
+ """
34
+ return len(findme) == len(hay) and all(
35
+ i is None or j is None or i == j for i, j in zip(findme, hay)
36
+ )
37
+
38
+
39
+ def tuple_combine(findme, hay):
40
+ """Part of partial ordered matching.
41
+ See http://stackoverflow.com/a/4559604
42
+ """
43
+ return tuple(i is None and j or i for i, j in zip(findme, hay))
44
+
45
+
46
+ def tuple_search(findme, haystack):
47
+ """Partial ordered matching with 'None' as wildcard
48
+ See http://stackoverflow.com/a/4559604
49
+ """
50
+ return [
51
+ (index, tuple_combine(findme, hay))
52
+ for index, hay in enumerate(haystack)
53
+ if tuple_match(findme, hay)
54
+ ]
55
+
56
+
57
+ def _get_data(binfilename, interval="daily", labels=None, catalog_only=True):
58
+ """Underlying function to read from the binary file. Used by
59
+ 'extract', 'catalog'.
60
+ """
61
+ if labels is None:
62
+ labels = [",,,"]
63
+ testem = {
64
+ "PERLND": [
65
+ "ATEMP",
66
+ "SNOW",
67
+ "PWATER",
68
+ "SEDMNT",
69
+ "PSTEMP",
70
+ "PWTGAS",
71
+ "PQUAL",
72
+ "MSTLAY",
73
+ "PEST",
74
+ "NITR",
75
+ "PHOS",
76
+ "TRACER",
77
+ "",
78
+ ],
79
+ "IMPLND": ["ATEMP", "SNOW", "IWATER", "SOLIDS", "IWTGAS", "IQUAL", ""],
80
+ "RCHRES": [
81
+ "HYDR",
82
+ "CONS",
83
+ "HTRCH",
84
+ "SEDTRN",
85
+ "GQUAL",
86
+ "OXRX",
87
+ "NUTRX",
88
+ "PLANK",
89
+ "PHCARB",
90
+ "INFLOW",
91
+ "OFLOW",
92
+ "ROFLOW",
93
+ "",
94
+ ],
95
+ "BMPRAC": [""],
96
+ "": [""],
97
+ }
98
+
99
+ collect_dict = {}
100
+ lablist = []
101
+
102
+ # Normalize interval code
103
+ try:
104
+ intervalcode = interval2codemap[interval.lower()]
105
+ except AttributeError:
106
+ intervalcode = None
107
+
108
+ # convert label tuples to lists
109
+ labels = list(labels)
110
+
111
+ # turn into a list of lists
112
+ nlabels = []
113
+ for label in labels:
114
+ if isinstance(label, str):
115
+ nlabels.append(label.split(","))
116
+ else:
117
+ nlabels.append(label)
118
+ labels = nlabels
119
+
120
+ # Check the list members for valid values
121
+ for label in labels:
122
+ if len(label) != 4:
123
+ raise ValueError(
124
+ tsutils.error_wrapper(
125
+ f"""The label '{label}' has the wrong number of entries.
126
+ """
127
+ )
128
+ )
129
+
130
+ # replace empty fields with None
131
+ # operation,lue_number,group,variable
132
+ words = [None if (i in ("", "None")) else i for i in label]
133
+
134
+ # first word must be a valid operation type or None
135
+ if words[0] is not None:
136
+ # force uppercase before comparison
137
+ words[0] = words[0].upper()
138
+ if words[0] not in testem:
139
+ raise ValueError(
140
+ tsutils.error_wrapper(
141
+ f"""Operation type must be one of 'PERLND', 'IMPLND',
142
+ 'RCHRES', or 'BMPRAC', or missing (to get all) instead
143
+ of {words[0]}.
144
+ """
145
+ )
146
+ )
147
+
148
+ # second word must be integer 1-999 or None or range to parse
149
+ if words[1] is not None:
150
+ try:
151
+ words[1] = int(words[1])
152
+ luelist = [words[1]]
153
+ except ValueError:
154
+ luelist = tsutils.range_to_numlist(words[1])
155
+ for luenum in luelist:
156
+ if luenum < 1 or luenum > 999:
157
+ raise ValueError(
158
+ tsutils.error_wrapper(
159
+ f"""The land use element must be an integer from
160
+ 1 to 999 inclusive, instead of {luenum}.
161
+ """
162
+ )
163
+ )
164
+ else:
165
+ luelist = [None]
166
+
167
+ # third word must be a valid group name or None
168
+ if words[2] is not None:
169
+ words[2] = words[2].upper()
170
+ if words[2] not in testem[words[0]]:
171
+ raise ValueError(
172
+ tsutils.error_wrapper(
173
+ f"""The {words[0]} operation type only allows the
174
+ variable groups: {testem[words[0]][:-1]}, instead you
175
+ gave {words[2]}.
176
+ """
177
+ )
178
+ )
179
+
180
+ # fourth word is currently not checked - assumed to be a variable name
181
+ # if not, it will simply never be found in the file, so ok
182
+ # but no warning for the user - add check?
183
+
184
+ # add interval code as fifth word in list
185
+ words.append(intervalcode)
186
+
187
+ # add to new list of checked and expanded lists
188
+ for luenum in luelist:
189
+ words[1] = luenum
190
+ lablist.append(list(words))
191
+
192
+ # Now read through the binary file and collect the data matching the labels
193
+ with open(binfilename, "rb") as binfp:
194
+ labeltest = set()
195
+ vnames = {}
196
+ ndates = set()
197
+ # read first byte - must be hex FD (decimal 253) for valid file.
198
+ magicbyte = binfp.read(1)
199
+ if magicbyte != b"\xfd":
200
+ # not a valid HSPF binary file
201
+ raise ValueError(
202
+ tsutils.error_wrapper(
203
+ f"""{binfilename} is not a valid HSPF binary output file
204
+ (.hbn), The first byte must be FD hexadecimal, but it was
205
+ {magicbyte}.
206
+ """
207
+ )
208
+ )
209
+
210
+ # loop through each record
211
+ while True:
212
+ # reinitialize counter for record length - used to compute skip at
213
+ # end
214
+ recpos = 0
215
+
216
+ # read first four bytes to get record length bitfield
217
+ try:
218
+ reclen1, reclen2, reclen3, reclen = struct.unpack("4B", binfp.read(4))
219
+ recpos += 4
220
+ except struct.error:
221
+ # End of file.
222
+ break
223
+
224
+ # get record leader - next 24 bytes
225
+ rectype, optype, lue, group = struct.unpack("I8sI8s", binfp.read(24))
226
+ recpos += 24
227
+
228
+ # clean up
229
+ rectype = int(rectype)
230
+ lue = int(lue)
231
+ optype = optype.strip()
232
+ group = group.strip()
233
+
234
+ if rectype == 0:
235
+ # header record - collect variable names for this
236
+ # operation and group
237
+
238
+ # parse reclen bitfield to get actual remaining length
239
+ # the " - 24 " subtracts the 24 bytes already read
240
+ reclen1 = int(reclen1 / 4)
241
+ reclen2 = reclen2 * 64 + reclen1
242
+ reclen3 = reclen3 * 16384 + reclen2
243
+ reclen = reclen * 4194304 + reclen3 - 24
244
+
245
+ # loop through rest of record
246
+ slen = 0
247
+ while slen < reclen:
248
+ # read single 4B word for length of next variable name
249
+ length = struct.unpack("I", binfp.read(4))[0]
250
+
251
+ # read the variable name
252
+ variable_name = struct.unpack(f"{length}s", binfp.read(length))[0]
253
+
254
+ # add variable name to the set for this operation
255
+ # why a set instead of a list? There should never be
256
+ # a duplicate anyway
257
+ vnames.setdefault((lue, group), []).append(variable_name)
258
+
259
+ # update how far along the record we are
260
+ slen += length + 4
261
+ recpos += length + 4
262
+
263
+ elif rectype == 1:
264
+ # Data record
265
+
266
+ # record should contain a value for each variable name for this
267
+ # operation and group
268
+ numvals = len(vnames[(lue, group)])
269
+
270
+ (_, level, year, month, day, hour, minute) = struct.unpack(
271
+ "7I", binfp.read(28)
272
+ )
273
+ recpos += 28
274
+
275
+ vals = struct.unpack(f"{numvals}f", binfp.read(4 * numvals))
276
+ recpos += 4 * numvals
277
+
278
+ delta = datetime.timedelta(hours=0)
279
+ if hour == 24:
280
+ hour = 0
281
+
282
+ ndate = datetime.datetime(year, month, day, hour, minute) + delta
283
+
284
+ # Go through labels to see if these values need to be
285
+ # collected
286
+ for i, vname in enumerate(vnames[(lue, group)]):
287
+ tmpkey = (
288
+ optype.decode("ascii"),
289
+ lue,
290
+ group.decode("ascii"),
291
+ vname.decode("ascii"),
292
+ level,
293
+ )
294
+
295
+ for lbl in lablist:
296
+ res = tuple_search(tmpkey, [lbl])
297
+ if not res:
298
+ continue
299
+ labeltest.add(tuple(lbl))
300
+ nres = res[0][1]
301
+ ndates.add(ndate)
302
+ if catalog_only is False:
303
+ if intervalcode == level:
304
+ collect_dict.setdefault(nres, []).append(vals[i])
305
+ else:
306
+ collect_dict[nres] = level
307
+ else:
308
+ # there was a problem with unexpected record length
309
+ # back up almost all the way and try again
310
+ binfp.seek(-31, 1)
311
+
312
+ # calculate and skip to the end of the variable-length back pointer
313
+ reccnt = recpos * 4 + 1
314
+ if reccnt >= 256**2:
315
+ skbytes = 3
316
+ elif reccnt >= 256:
317
+ skbytes = 2
318
+ else:
319
+ skbytes = 1
320
+ binfp.read(skbytes)
321
+
322
+ if not collect_dict:
323
+ raise ValueError(
324
+ tsutils.error_wrapper(
325
+ f"""The label specifications below matched no records in the
326
+ binary file.
327
+
328
+ {lablist}
329
+ """
330
+ )
331
+ )
332
+
333
+ ndates = sorted(list(ndates))
334
+
335
+ if catalog_only is False:
336
+ for lbl in lablist:
337
+ if tuple(lbl) not in labeltest:
338
+ sys.stderr.write(
339
+ tsutils.error_wrapper(
340
+ f"""Warning: The label '{lbl}' matched no records in
341
+ the binary file.
342
+ """
343
+ )
344
+ )
345
+ else:
346
+ for key in collect_dict:
347
+ delta = ndates[1] - ndates[0] if key[4] == 2 else code2freqmap[key[4]]
348
+ collect_dict[key] = (
349
+ pd.Period(ndates[0], freq=delta),
350
+ pd.Period(ndates[-1], freq=delta),
351
+ )
352
+
353
+ return ndates, collect_dict
354
+
355
+
356
+ def hbn_extract(
357
+ hbnfilename: str,
358
+ interval: Literal["yearly", "monthly", "daily", "bivl"],
359
+ *labels,
360
+ sort_columns: bool = False,
361
+ ):
362
+ """Returns a DataFrame from a HSPF binary output file."""
363
+ interval = interval.lower()
364
+
365
+ if interval not in ("bivl", "daily", "monthly", "yearly"):
366
+ raise ValueError(
367
+ tsutils.error_wrapper(
368
+ f"""The "interval" argument must be one of "bivl", "daily",
369
+ "monthly", or "yearly". You supplied "{interval}".
370
+ """
371
+ )
372
+ )
373
+
374
+ index, data = _get_data(hbnfilename, interval, labels, catalog_only=False)
375
+ skeys = list(data.keys())
376
+ if sort_columns:
377
+ skeys.sort(key=lambda tup: tup[1:])
378
+
379
+ result = pd.DataFrame(
380
+ pd.concat(
381
+ [pd.Series(data[i], index=index) for i in skeys], sort=False, axis=1
382
+ ).reindex(pd.Index(index))
383
+ )
384
+ columns = [f"{i[0]}_{i[1]}_{i[3]}".replace(" ", "-") for i in skeys]
385
+ result.columns = columns
386
+ if interval == "bivl":
387
+ result.index = result.index.to_period(result.index[1] - result.index[0])
388
+ else:
389
+ result.index = result.index.to_period()
390
+ result.index.name = "Datetime"
391
+
392
+ return result
@@ -0,0 +1,55 @@
1
+ """For reading HSPF plotgen files."""
2
+
3
+ import datetime
4
+
5
+ import pandas as pd
6
+
7
+ _END_OF_HEADER = 25
8
+
9
+
10
+ def plotgen_extract(filename):
11
+ """Reads HSPF PLTGEN files and creates a DataFrame."""
12
+ foundcols = False
13
+ cols = []
14
+ lst = []
15
+ with open(filename, encoding="ascii") as fpointer:
16
+ for i, line in enumerate(fpointer):
17
+ if i < _END_OF_HEADER:
18
+ if "LINTYP" in line:
19
+ foundcols = True
20
+ elif line[5:].startswith("Time series"):
21
+ foundcols = False
22
+ elif foundcols:
23
+ if header := line[4:30].strip():
24
+ cols.append(header)
25
+ else:
26
+ foundcols = False
27
+
28
+ if i > _END_OF_HEADER:
29
+ year, month, day, hour, minute = line[4:22].split()
30
+
31
+ if int(hour) == 24:
32
+ day = [
33
+ datetime.datetime(int(year), int(month), int(day), tzinfo=None)
34
+ + datetime.timedelta(days=1)
35
+ ]
36
+ else:
37
+ day = [
38
+ datetime.datetime(
39
+ int(year),
40
+ int(month),
41
+ int(day),
42
+ int(hour),
43
+ int(minute),
44
+ tzinfo=None,
45
+ )
46
+ ]
47
+ data = [float(x) for x in line[23:].split()]
48
+ lst.append(day + data)
49
+
50
+ pgdf = pd.DataFrame(lst)
51
+ pgdf.columns = ["Datetime"] + cols
52
+ pgdf = pgdf.set_index(["Datetime"])
53
+ pgdf.index = pd.DatetimeIndex(pgdf.index)
54
+
55
+ return pgdf
@@ -0,0 +1,233 @@
1
+ """
2
+ Pure python WDM file reader.
3
+ """
4
+
5
+ from datetime import datetime
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ # look up attributes NAME, data type (Integer; Real; String) and data length by attribute number
11
+ attrinfo = {
12
+ 1: ("TSTYPE", "S", 4),
13
+ 2: ("STAID", "S", 16),
14
+ 11: ("DAREA", "R", 1),
15
+ 17: ("TCODE", "I", 1),
16
+ 27: ("TSBYR", "I", 1),
17
+ 28: ("TSBMO", "I", 1),
18
+ 29: ("TSBDY", "I", 1),
19
+ 30: ("TSBHR", "I", 1),
20
+ 32: ("TFILL", "R", 1),
21
+ 33: ("TSSTEP", "I", 1),
22
+ 34: ("TGROUP", "I", 1),
23
+ 45: ("STNAM", "S", 48),
24
+ 83: ("COMPFG", "I", 1),
25
+ 84: ("TSFORM", "I", 1),
26
+ 85: ("VBTIME", "I", 1),
27
+ 444: ("A444", "S", 12),
28
+ 443: ("A443", "S", 12),
29
+ 22: ("DCODE", "I", 1),
30
+ 10: ("DESCRP", "S", 80),
31
+ 7: ("ELEV", "R", 1),
32
+ 8: ("LATDEG", "R", 1),
33
+ 9: ("LNGDEG", "R", 1),
34
+ 288: ("SCENARIO", "S", 8),
35
+ 289: ("CONSTITUENT", "S", 8),
36
+ 290: ("LOCATION", "S", 8),
37
+ }
38
+
39
+ freq = {
40
+ 7: "100YS",
41
+ 6: "YS",
42
+ 5: "MS",
43
+ 4: "D",
44
+ 3: "H",
45
+ 2: "min",
46
+ 1: "S",
47
+ } # pandas date_range() frequency by TCODE, TGROUP
48
+
49
+
50
+ def wdm_extract(wdmfile, *idsn):
51
+ """Extract DSN from WDM file."""
52
+ idsn = [int(i) for i in idsn]
53
+
54
+ iarray = np.fromfile(wdmfile, dtype=np.int32)
55
+ farray = np.fromfile(wdmfile, dtype=np.float32)
56
+
57
+ if iarray[0] != -998:
58
+ raise ValueError("Not a WDM file, magic number is not -990. Stopping!")
59
+
60
+ nrecords = iarray[28] # first record is File Definition Record
61
+ ntimeseries = iarray[31]
62
+
63
+ dsnlist = [
64
+ index
65
+ for index in range(512, nrecords * 512, 512)
66
+ if (
67
+ not (
68
+ iarray[index] == iarray[index + 1] == iarray[index + 2] == 0
69
+ and iarray[index + 3]
70
+ )
71
+ and iarray[index + 5] == 1
72
+ )
73
+ ]
74
+ if len(dsnlist) != ntimeseries:
75
+ print("PROGRAM ERROR, wrong number of DSN records found")
76
+
77
+ retdf = pd.DataFrame()
78
+
79
+ for index in dsnlist:
80
+ # get layout information for TimeSeries Dataset frame
81
+ dsn = iarray[index + 4]
82
+
83
+ if dsn not in idsn:
84
+ continue
85
+ psa = iarray[index + 9]
86
+
87
+ if psa > 0:
88
+ sacnt = iarray[index + psa - 1]
89
+ pdat = iarray[index + 10]
90
+ pdatv = iarray[index + 11]
91
+
92
+ # get attributes
93
+ dattr = {
94
+ "TSBDY": 1,
95
+ "TSBHR": 1,
96
+ "TSBMO": 1,
97
+ "TSBYR": 1900,
98
+ "TFILL": -999.0,
99
+ } # preset defaults
100
+
101
+ for i in range(psa + 1, psa + 1 + 2 * sacnt, 2):
102
+ iarray_id = iarray[index + i]
103
+ ptr = iarray[index + i + 1] - 1 + index
104
+
105
+ if iarray_id not in attrinfo:
106
+ print(
107
+ "PROGRAM ERROR: ATTRIBUTE INDEX not found",
108
+ iarray_id,
109
+ "Attribute pointer",
110
+ iarray[index + i + 1],
111
+ )
112
+
113
+ continue
114
+
115
+ name, atype, length = attrinfo[iarray_id]
116
+
117
+ if atype == "I":
118
+ dattr[name] = iarray[ptr]
119
+ elif atype == "R":
120
+ dattr[name] = farray[ptr]
121
+ else:
122
+ dattr[name] = "".join(
123
+ [itostr(iarray[k]) for k in range(ptr, ptr + length // 4)]
124
+ ).strip()
125
+
126
+ # Get timeseries timebase data
127
+ records = []
128
+
129
+ for i in range(pdat + 1, pdatv - 1):
130
+ if a_record := iarray[index + i]:
131
+ records.append(splitposition(a_record))
132
+
133
+ if not records:
134
+ continue # WDM preallocated, but nothing saved here yet
135
+
136
+ srec, soffset = records[0]
137
+ start = splitdate(iarray[srec * 512 + soffset])
138
+
139
+ # calculate number of data points in each group, tindex is final index
140
+ # for storage
141
+ tgroup = dattr["TGROUP"]
142
+ tstep = dattr["TSSTEP"]
143
+ tcode = dattr["TCODE"]
144
+ cindex = pd.date_range(start=start, periods=len(records) + 1, freq=freq[tgroup])
145
+ tindex = pd.date_range(
146
+ start=start, end=cindex[-1], freq=str(tstep) + freq[tcode]
147
+ )
148
+ counts = np.diff(np.searchsorted(tindex, cindex))
149
+
150
+ # Get timeseries data
151
+ floats = np.zeros(sum(counts), dtype=np.float32)
152
+ findex = 0
153
+
154
+ for (rec, offset), count in zip(records, counts):
155
+ findex = getfloats(iarray, farray, floats, findex, rec, offset, count)
156
+
157
+ series = pd.DataFrame(floats[:findex], index=tindex[:findex])
158
+ series = series[series[0] != dattr["TFILL"]]
159
+ series.columns = [f"{wdmfile}_{dsn}"]
160
+ retdf = retdf.join(series, how="outer")
161
+
162
+ return retdf
163
+
164
+
165
+ def todatetime(year=1900, month=1, day=1, hour=0):
166
+ """takes yr,mo,dy,hr information then returns its datetime64"""
167
+
168
+ return (
169
+ datetime(year, month, day, 23) + pd.Timedelta(1, "h")
170
+ if hour == 24
171
+ else datetime(year, month, day, hour)
172
+ )
173
+
174
+
175
+ def splitdate(datwrd):
176
+ """splits WDM compressed datetime int32 DATWRD into year, month, day, hour
177
+ -> datetime64"""
178
+ year = int((datwrd / 16384) % 131072)
179
+ month = int((datwrd / 1024) % 16)
180
+ day = int((datwrd / 32) % 32)
181
+ hour = int(datwrd % 32)
182
+
183
+ return todatetime(year, month, day, hour)
184
+
185
+
186
+ def splitposition(recoffset):
187
+ """splits int32 into (record, offset), converting to Python zero based
188
+ indexing"""
189
+
190
+ return ((recoffset >> 9) - 1, (recoffset & 511) - 1)
191
+
192
+
193
+ def itostr(i):
194
+ """Convert integer to string."""
195
+
196
+ return chr(i & 255) + chr(i >> 8 & 255) + chr(i >> 16 & 255) + chr(i >> 24 & 255)
197
+
198
+
199
+ def getfloats(iarray, farray, floats, findex, rec, offset, count):
200
+ index = rec * 512 + offset + 1
201
+ stop = (rec + 1) * 512
202
+ cntr = 0
203
+
204
+ while cntr < count and findex < len(floats):
205
+ if index >= stop:
206
+ rec = (
207
+ iarray[rec * 512 + 3] - 1
208
+ ) # 3 is forward data pointer, -1 is python indexing
209
+ index = rec * 512 + 4 # 4 is index of start of new data
210
+ stop = (rec + 1) * 512
211
+
212
+ control_word = iarray[index] # control word, don't need most of it here
213
+ nval = control_word >> 16
214
+
215
+ index += 1
216
+
217
+ if control_word >> 5 & 0x3: # comp from control word, x
218
+ for _ in range(nval):
219
+ if findex >= len(floats):
220
+ return findex
221
+ floats[findex] = farray[index]
222
+ findex += 1
223
+ index += 1
224
+ else:
225
+ for k in range(nval):
226
+ if findex >= len(floats):
227
+ return findex
228
+ floats[findex] = farray[index + k]
229
+ findex += 1
230
+ index += nval
231
+ cntr += nval
232
+
233
+ return findex