xarray-dbd 0.2.3__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xarray_dbd-0.2.6/CHANGELOG.md +126 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/PKG-INFO +136 -7
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/README.md +135 -6
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/ColumnData.C +6 -2
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Decompress.C +21 -6
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Decompress.H +6 -1
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Header.C +25 -1
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Header.H +3 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/KnownBytes.C +28 -21
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/MyException.H +2 -2
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Sensor.C +11 -5
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Sensors.C +9 -6
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/SensorsMap.C +13 -1
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/SensorsMap.H +1 -1
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/config.h +1 -1
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/dbd_python.cpp +17 -8
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/pyproject.toml +2 -2
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/tests/test_backend.py +93 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/tests/test_cli.py +101 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/tests/test_cpp_backend.py +73 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/tests/test_dbdreader2.py +98 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/_dbd_cpp.pyi +1 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/backend.py +154 -60
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/csv.py +15 -2
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/dbd2nc.py +39 -3
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/mkone.py +44 -9
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/sensors.py +3 -3
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/dbdreader2/_core.py +67 -18
- xarray_dbd-0.2.3/CHANGELOG.md +0 -47
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/.clang-tidy +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/.gitignore +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/.pre-commit-config.yaml +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/CMakeLists.txt +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/CONTRIBUTING.md +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/License.txt +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/benchmark_performance.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/conda/recipe.yaml +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/ColumnData.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Data.C +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Data.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/FileInfo.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/KnownBytes.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Logger.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Sensor.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/Sensors.H +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/lz4.c +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/csrc/lz4.h +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/examples/README.md +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/scripts/README.md +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/tests/conftest.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/__init__.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/__init__.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/cache.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/logger.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/main.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/cli/missions.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/dbdreader2/__init__.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/dbdreader2/_cache.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/dbdreader2/_errors.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/dbdreader2/_list.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/dbdreader2/_util.py +0 -0
- {xarray_dbd-0.2.3 → xarray_dbd-0.2.6}/xarray_dbd/py.typed +0 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.2.6] - 2026-03-30
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- `--list-sensors` flag for `dbd2nc` CLI to print available sensors without conversion
|
|
13
|
+
- `batch_size` parameter for `write_multi_dbd_netcdf()` (was hardcoded at 100)
|
|
14
|
+
- Signal handling in `mkone` — Ctrl+C now terminates child processes cleanly
|
|
15
|
+
- "Working with Glider Data" section in README (sensor discovery, time conversion, fill values)
|
|
16
|
+
- Tests for `get_CTD_sync`, `determine_ctd_type`, `get_global_time_range`, file ordering, batch boundaries
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- `get_sync()` logs interpolation failures at WARNING level instead of INFO
|
|
21
|
+
- Streaming writer logs summary when batches are skipped due to errors
|
|
22
|
+
- `set_time_limits()` accepts numeric epoch seconds in addition to date strings
|
|
23
|
+
- C++ `SensorsMap::setUpForData()` validates sensor byte sizes across files
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
|
|
27
|
+
- **Data loss in streaming writer**: removed Python-side double-skip at batch boundaries (C++ already handles `skip_first_record`)
|
|
28
|
+
- **dbdreader2 file ordering**: pass `presorted=True` to `read_dbd_files` so C++ respects chronological order from `DBDList.sort()`
|
|
29
|
+
- **mkone worker error propagation**: workers now exit non-zero on failure so parent detects errors
|
|
30
|
+
- **`_get_with_source` time ordering**: results now sorted by time for consistency with normal `get()` path
|
|
31
|
+
- **`sci_extensions` missing `.sbd`**: file pairing now recognizes `.sbd` as a science file type
|
|
32
|
+
- **`set_time_limits` falsy check**: epoch time 0 no longer causes spurious ValueError
|
|
33
|
+
- **inf-to-NaN for repeated values**: code=1 (repeat) now converts infinity consistently with code=2 (new value)
|
|
34
|
+
- Removed unused `"j"` dimension from `DBDDataStore.get_dimensions()`
|
|
35
|
+
- Fixed `--skip-first` help text (was stale after skip semantics change)
|
|
36
|
+
- Fixed README: CLI command names, removed false wildcard `to_keep` claim
|
|
37
|
+
|
|
38
|
+
## [0.2.5] - 2026-03-30
|
|
39
|
+
|
|
40
|
+
### Added
|
|
41
|
+
|
|
42
|
+
- `sort` parameter for `open_multi_dbd_dataset()` and `write_multi_dbd_netcdf()` with three modes: `"header_time"` (default, sort by `fileopen_time` from each file's DBD header), `"lexicographic"`, and `"none"` (preserve caller's order)
|
|
43
|
+
- `--sort` CLI flag for `dbd2nc`, `mkone`, and `2csv` commands
|
|
44
|
+
- `presorted` parameter for `read_dbd_files()` C++ binding to skip internal lexicographic sort when files are pre-sorted by Python
|
|
45
|
+
- `sensor_size` attribute on variables from `open_multi_dbd_dataset()`, matching single-file behavior
|
|
46
|
+
- `--skip-first` flag for `mkone` as consistent alias for the inverse `--keep-first`
|
|
47
|
+
- Duplicate file detection and deduplication with warning in multi-file functions
|
|
48
|
+
- Output directory auto-creation in `write_multi_dbd_netcdf()`
|
|
49
|
+
- "Choosing an API" and "Slocum File Types" sections in README
|
|
50
|
+
- Fill value and CF-compliance guidance in README Known Limitations
|
|
51
|
+
|
|
52
|
+
### Changed
|
|
53
|
+
|
|
54
|
+
- `skip_first_record` in `read_dbd_files()` now skips the first record of **all** files (including the first), matching Lucas Merckelbach's dbdreader behavior
|
|
55
|
+
- Streaming NetCDF writer keeps a single file handle open instead of reopening per batch
|
|
56
|
+
|
|
57
|
+
### Fixed
|
|
58
|
+
|
|
59
|
+
- File ordering for TWR-style filenames (e.g. `ce_1137-2026-085-1-10.dbd` incorrectly sorting before `-2.dbd` under lexicographic sort)
|
|
60
|
+
- `_parse_fileopen_time()` now logs a warning instead of silently sorting unparseable files to end
|
|
61
|
+
- `DBD.get_fileopen_time()` no longer raises on unparseable header values
|
|
62
|
+
- Thread-safe random number generator in C++ cache file creation
|
|
63
|
+
- Integer overflow guard in C++ column capacity doubling
|
|
64
|
+
|
|
65
|
+
## [0.2.3] - 2026-02-23
|
|
66
|
+
|
|
67
|
+
### Added
|
|
68
|
+
|
|
69
|
+
- `include_source` support in `MultiDBD.get()` — returns per-record source DBD references, matching dbdreader's API
|
|
70
|
+
- `continue_on_reading_error` parameter for `MultiDBD.get()` — skip corrupted files instead of raising, matching dbdreader v0.5.9
|
|
71
|
+
- `DBD_ERROR_READ_ERROR` error code (14) for compatibility with dbdreader
|
|
72
|
+
- Python 3.14 pre-built wheels for all platforms (Linux, macOS, Windows)
|
|
73
|
+
- Attribution to Lucas Merckelbach's [dbdreader](https://github.com/smerckel/dbdreader) in README
|
|
74
|
+
|
|
75
|
+
## [0.2.2] - 2026-02-23
|
|
76
|
+
|
|
77
|
+
### Added
|
|
78
|
+
|
|
79
|
+
- `preload` parameter for `DBD` and `MultiDBD` constructors
|
|
80
|
+
- Changelog configuration and tag/version validation in publish workflow
|
|
81
|
+
|
|
82
|
+
### Fixed
|
|
83
|
+
|
|
84
|
+
- mypy errors: `datetime.UTC`, tuple assignments, type annotations
|
|
85
|
+
- ruff formatting compliance
|
|
86
|
+
|
|
87
|
+
## [0.2.1] - 2026-02-22
|
|
88
|
+
|
|
89
|
+
### Added
|
|
90
|
+
|
|
91
|
+
- Streaming NetCDF writer (`write_multi_dbd_netcdf`) for low-memory batch conversion
|
|
92
|
+
- dbdreader-compatible API layer (`DBD` and `MultiDBD` classes in `xarray_dbd.dbdreader2`)
|
|
93
|
+
- Unified CLI under `xdbd` command with subcommands (`2nc`, `mkone`, `2csv`, `missions`, `cache`)
|
|
94
|
+
- Monotonicity check in `get_sync()` to prevent silent wrong results from `np.interp`
|
|
95
|
+
|
|
96
|
+
### Changed
|
|
97
|
+
|
|
98
|
+
- CLI restructured: standalone `dbd2nc` and `mkone` commands replaced by `xdbd 2nc` and `xdbd mkone`
|
|
99
|
+
- Streaming mode is now the default for non-append `2nc` and `mkone` (requires netCDF4)
|
|
100
|
+
- Fill values corrected: -127 for int8, -32768 for int16 (matching C++ dbd2netCDF standalone)
|
|
101
|
+
- Multi-file reader uses read-copy-discard strategy to reduce peak memory ~53%
|
|
102
|
+
- Replaced inf with NaN in float reads to match C++ dbd2netCDF behavior
|
|
103
|
+
|
|
104
|
+
### Fixed
|
|
105
|
+
|
|
106
|
+
- Multi-file parse dropping records from unfactored DBD files
|
|
107
|
+
- Corrupted file recovery: discard partial record on I/O error
|
|
108
|
+
|
|
109
|
+
## [0.1.0] - 2026-02-20
|
|
110
|
+
|
|
111
|
+
### Added
|
|
112
|
+
|
|
113
|
+
- C++ backend via pybind11 wrapping [dbd2netCDF](https://github.com/mousebrains/dbd2netcdf) parser
|
|
114
|
+
- Native xarray engine integration (`xr.open_dataset(f, engine="dbd")`)
|
|
115
|
+
- Multi-file reading with `open_multi_dbd_dataset()` using C++ SensorsMap two-pass approach
|
|
116
|
+
- CLI tools: `dbd2nc` for single/multi-file conversion, `mkone` for batch directory processing
|
|
117
|
+
- Native dtype support: int8, int16, float32, float64 columns (no double-conversion overhead)
|
|
118
|
+
- LZ4 decompression for compressed `.?cd` files
|
|
119
|
+
- Sensor filtering (`to_keep`), mission filtering (`skip_missions`/`keep_missions`)
|
|
120
|
+
- Corrupted file recovery with `repair=True`
|
|
121
|
+
- Python 3.10+ and free-threaded Python (PEP 703) support
|
|
122
|
+
|
|
123
|
+
### Changed
|
|
124
|
+
|
|
125
|
+
- Replaced pure-Python parser with C++ pybind11 extension for ~5x performance improvement
|
|
126
|
+
- Fill values: NaN for float32/float64, -127 for int8, -32768 for int16 (matching C++ dbd2netCDF)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xarray-dbd
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Efficient xarray backend for reading glider DBD files
|
|
5
5
|
Keywords: glider,oceanography,dbd,slocum,xarray,netcdf
|
|
6
6
|
Author-Email: Pat Welch <pat@mousebrains.com>
|
|
@@ -41,7 +41,7 @@ Description-Content-Type: text/markdown
|
|
|
41
41
|
[](License.txt)
|
|
42
42
|
[](https://github.com/mousebrains/dbd2netcdf-python/actions/workflows/ci.yml)
|
|
43
43
|
[](https://github.com/mousebrains/dbd2netcdf-python/actions/workflows/codeql.yml)
|
|
44
|
-
[](https://codecov.io/gh/mousebrains/dbd2netcdf-python)
|
|
45
45
|
[](https://github.com/astral-sh/ruff)
|
|
46
46
|
|
|
47
47
|
An efficient xarray backend for reading Dinkum Binary Data (DBD) files from
|
|
@@ -74,7 +74,7 @@ pip install xarray-dbd
|
|
|
74
74
|
For the CLI tools only:
|
|
75
75
|
|
|
76
76
|
```bash
|
|
77
|
-
pipx install xarray-dbd # installs
|
|
77
|
+
pipx install xarray-dbd # installs xdbd command (xdbd 2nc, xdbd mkone, etc.)
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
Or install from source (requires a C++ compiler and CMake):
|
|
@@ -147,6 +147,30 @@ ds = xdbd.open_multi_dbd_dataset(
|
|
|
147
147
|
)
|
|
148
148
|
```
|
|
149
149
|
|
|
150
|
+
### File sort order
|
|
151
|
+
|
|
152
|
+
By default, files are sorted by the `fileopen_time` timestamp in each file's
|
|
153
|
+
header, which is correct regardless of filename convention. Alternative sort
|
|
154
|
+
modes are available:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
# Default: sort by header timestamp (universally correct)
|
|
158
|
+
ds = xdbd.open_multi_dbd_dataset(files)
|
|
159
|
+
|
|
160
|
+
# Sort by filename (lexicographic)
|
|
161
|
+
ds = xdbd.open_multi_dbd_dataset(files, sort="lexicographic")
|
|
162
|
+
|
|
163
|
+
# Preserve the caller's order (no sorting)
|
|
164
|
+
ds = xdbd.open_multi_dbd_dataset(files, sort="none")
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
The `--sort` flag is also available on all CLI commands:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
dbd2nc --sort lexicographic -C cache -o output.nc *.dbd
|
|
171
|
+
mkone --sort none --output-prefix /path/to/output/ /path/to/raw/
|
|
172
|
+
```
|
|
173
|
+
|
|
150
174
|
### Advanced options
|
|
151
175
|
|
|
152
176
|
```python
|
|
@@ -154,7 +178,7 @@ ds = xdbd.open_dbd_dataset(
|
|
|
154
178
|
'test.sbd',
|
|
155
179
|
skip_first_record=True, # Skip first record (default)
|
|
156
180
|
repair=True, # Attempt to repair corrupted data
|
|
157
|
-
to_keep=['
|
|
181
|
+
to_keep=['m_depth', 'm_lat'], # Keep only these sensors
|
|
158
182
|
criteria=['m_present_time'], # Sensors for record selection
|
|
159
183
|
)
|
|
160
184
|
```
|
|
@@ -189,6 +213,7 @@ Open a single DBD file as an xarray Dataset.
|
|
|
189
213
|
- `to_keep` (list of str): Sensor names to keep (default: all)
|
|
190
214
|
- `criteria` (list of str): Sensor names for selection criteria
|
|
191
215
|
- `drop_variables` (list of str): Variables to exclude
|
|
216
|
+
- `cache_dir` (str, Path, or None): Directory for sensor cache files
|
|
192
217
|
|
|
193
218
|
**Returns:** `xarray.Dataset`
|
|
194
219
|
|
|
@@ -204,9 +229,32 @@ Open multiple DBD files as a single concatenated xarray Dataset.
|
|
|
204
229
|
- `criteria` (list of str): Sensor names for selection criteria
|
|
205
230
|
- `skip_missions` (list of str): Mission names to skip
|
|
206
231
|
- `keep_missions` (list of str): Mission names to keep
|
|
232
|
+
- `cache_dir` (str, Path, or None): Directory for sensor cache files
|
|
233
|
+
- `sort` (str): File sort order — `"header_time"` (default, sort by `fileopen_time` from each file's header), `"lexicographic"`, or `"none"` (preserve caller's order).
|
|
207
234
|
|
|
208
235
|
**Returns:** `xarray.Dataset`
|
|
209
236
|
|
|
237
|
+
### `write_multi_dbd_netcdf(filenames, output, **kwargs)`
|
|
238
|
+
|
|
239
|
+
Stream multiple DBD files directly to a NetCDF file without loading all data
|
|
240
|
+
into memory. Preferred for large datasets (100+ files).
|
|
241
|
+
|
|
242
|
+
**Parameters:**
|
|
243
|
+
- `filenames` (iterable): Paths to DBD files (duplicates removed automatically)
|
|
244
|
+
- `output` (str or Path): Output NetCDF file path (parent directory created if needed)
|
|
245
|
+
- `skip_first_record` (bool): Skip first record in each file (default: True)
|
|
246
|
+
- `repair` (bool): Attempt to repair corrupted records (default: False)
|
|
247
|
+
- `to_keep` (list of str): Sensor names to keep (default: all)
|
|
248
|
+
- `criteria` (list of str): Sensor names for selection criteria
|
|
249
|
+
- `skip_missions` (list of str): Mission names to skip
|
|
250
|
+
- `keep_missions` (list of str): Mission names to keep
|
|
251
|
+
- `cache_dir` (str, Path, or None): Directory for sensor cache files
|
|
252
|
+
- `compression` (int): Zlib compression level 0-9 (default: 5, 0 disables)
|
|
253
|
+
- `sort` (str): File sort order (default: `"header_time"`)
|
|
254
|
+
- `batch_size` (int): Files per batch (default: 100; smaller reduces peak memory)
|
|
255
|
+
|
|
256
|
+
**Returns:** `tuple[int, int]` — (n_records, n_files)
|
|
257
|
+
|
|
210
258
|
## Migration from dbdreader
|
|
211
259
|
|
|
212
260
|
The dbdreader2 API is derived from Lucas Merckelbach's
|
|
@@ -353,9 +401,8 @@ mdbd = dbdreader.MultiDBD(
|
|
|
353
401
|
to batch additional sensors into the first `get()` call.
|
|
354
402
|
|
|
355
403
|
- **`skip_initial_line` semantics.** When reading multiple files, the
|
|
356
|
-
first
|
|
357
|
-
|
|
358
|
-
Multi-file record counts may therefore differ by up to N-1.
|
|
404
|
+
first record of every file is skipped (matching dbdreader). Multi-file
|
|
405
|
+
record counts should match dbdreader exactly.
|
|
359
406
|
|
|
360
407
|
- **Float64 output.** `get()` always returns float64 arrays, matching
|
|
361
408
|
dbdreader's behavior. Integer fill values (-127 for int8, -32768 for
|
|
@@ -472,6 +519,7 @@ print(f"Depth units: {ds['m_depth'].attrs['units']}")
|
|
|
472
519
|
### Working with trajectories
|
|
473
520
|
|
|
474
521
|
```python
|
|
522
|
+
from pathlib import Path
|
|
475
523
|
import xarray_dbd as xdbd
|
|
476
524
|
import matplotlib.pyplot as plt
|
|
477
525
|
|
|
@@ -492,6 +540,7 @@ plt.show()
|
|
|
492
540
|
### Extracting science data
|
|
493
541
|
|
|
494
542
|
```python
|
|
543
|
+
from pathlib import Path
|
|
495
544
|
# Read full resolution science data
|
|
496
545
|
files = sorted(Path('.').glob('*.ebd'))
|
|
497
546
|
ds = xdbd.open_multi_dbd_dataset(
|
|
@@ -504,6 +553,74 @@ df = ds.to_dataframe()
|
|
|
504
553
|
print(df.describe())
|
|
505
554
|
```
|
|
506
555
|
|
|
556
|
+
## Choosing an API
|
|
557
|
+
|
|
558
|
+
| Scenario | Recommended API |
|
|
559
|
+
|----------|----------------|
|
|
560
|
+
| Single file, quick look | `xr.open_dataset(f, engine="dbd")` |
|
|
561
|
+
| Multiple files, < 1 GB | `xdbd.open_multi_dbd_dataset(files, to_keep=[...])` |
|
|
562
|
+
| Multiple files, large dataset | `xdbd.write_multi_dbd_netcdf(files, "out.nc")` |
|
|
563
|
+
| Interactive / Jupyter | `xdbd.MultiDBD(filenames=files)` with `.get()` (lazy) |
|
|
564
|
+
| Batch processing 1000+ files | `mkone` CLI (multiprocessing) |
|
|
565
|
+
| Drop-in dbdreader replacement | `import xarray_dbd.dbdreader2 as dbdreader` |
|
|
566
|
+
|
|
567
|
+
## Slocum File Types
|
|
568
|
+
|
|
569
|
+
| Extension | Name | Contents |
|
|
570
|
+
|-----------|------|----------|
|
|
571
|
+
| `.dbd` / `.dcd` | Flight | Vehicle sensors: depth, attitude, speed, GPS |
|
|
572
|
+
| `.ebd` / `.ecd` | Science | Payload sensors: CTD, optics, oxygen |
|
|
573
|
+
| `.sbd` / `.scd` | Short burst | Surface telemetry summary records |
|
|
574
|
+
| `.tbd` / `.tcd` | Technical | Detailed engineering telemetry |
|
|
575
|
+
| `.mbd` / `.mcd` | Mini | Compact engineering subset |
|
|
576
|
+
| `.nbd` / `.ncd` | Narrow | Compact science subset |
|
|
577
|
+
|
|
578
|
+
Compressed variants (`.?cd`) use LZ4 framing and are handled transparently.
|
|
579
|
+
|
|
580
|
+
## Working with Glider Data
|
|
581
|
+
|
|
582
|
+
### Discovering available sensors
|
|
583
|
+
|
|
584
|
+
```python
|
|
585
|
+
import xarray_dbd as xdbd
|
|
586
|
+
|
|
587
|
+
# xarray API
|
|
588
|
+
ds = xdbd.open_dbd_dataset("file.dbd", cache_dir="cache")
|
|
589
|
+
for var in sorted(ds.data_vars):
|
|
590
|
+
print(f" {var:30s} {ds[var].attrs.get('units', '')}")
|
|
591
|
+
|
|
592
|
+
# dbdreader2 API
|
|
593
|
+
dbd = xdbd.MultiDBD(pattern="*.dbd", cacheDir="cache")
|
|
594
|
+
for name in sorted(dbd.parameterNames["eng"]):
|
|
595
|
+
print(f" {name:30s} {dbd.parameterUnits.get(name, '')}")
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
Sensor naming conventions are documented in
|
|
599
|
+
[TWR's masterdata files](https://gliderfs2.ceoas.oregonstate.edu/gliderweb/masterdata/).
|
|
600
|
+
|
|
601
|
+
### Time conversion
|
|
602
|
+
|
|
603
|
+
`m_present_time` contains UTC seconds since 1970-01-01 (Unix epoch, float64):
|
|
604
|
+
|
|
605
|
+
```python
|
|
606
|
+
import pandas as pd
|
|
607
|
+
|
|
608
|
+
time = pd.to_datetime(ds["m_present_time"].values, unit="s", utc=True)
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
### Handling fill values
|
|
612
|
+
|
|
613
|
+
Float sensors use NaN for missing data. Integer sensors use sentinel fill
|
|
614
|
+
values (-127 for int8, -32768 for int16). Filter them out:
|
|
615
|
+
|
|
616
|
+
```python
|
|
617
|
+
# xarray — replace sentinels with NaN
|
|
618
|
+
ds = ds.where(ds != -32768)
|
|
619
|
+
|
|
620
|
+
# dbdreader2 — automatic filtering (default)
|
|
621
|
+
t, v = dbd.get("m_depth") # return_nans=False by default
|
|
622
|
+
```
|
|
623
|
+
|
|
507
624
|
## Known Limitations
|
|
508
625
|
|
|
509
626
|
- **Python 3.10+ required** — uses `from __future__ import annotations` for modern type-hint syntax.
|
|
@@ -514,6 +631,18 @@ print(df.describe())
|
|
|
514
631
|
- **No lazy loading for xarray API** — `open_dataset()` reads all sensor data
|
|
515
632
|
into memory. For very large deployments, use `to_keep` to select only needed
|
|
516
633
|
sensors. The dbdreader2 API (`DBD`/`MultiDBD`) uses lazy incremental loading.
|
|
634
|
+
- **Fill values in xarray output** — Integer sensors use sentinel fill values
|
|
635
|
+
(-127 for int8, -32768 for int16) rather than NaN. Between dives, science
|
|
636
|
+
sensors may contain these sentinels or NaN. Filter with
|
|
637
|
+
`ds.where(ds != -32768)` or use the dbdreader2 `get(return_nans=False)` API
|
|
638
|
+
which filters automatically.
|
|
639
|
+
- **Not CF-compliant** — NetCDF output preserves sensor `units` but does not
|
|
640
|
+
add CF attributes (`standard_name`, `axis`, `calendar`). Add metadata
|
|
641
|
+
post-hoc for publication, e.g.:
|
|
642
|
+
```python
|
|
643
|
+
ds["m_present_time"].attrs["axis"] = "T"
|
|
644
|
+
ds["m_present_time"].attrs["units"] = "seconds since 1970-01-01"
|
|
645
|
+
```
|
|
517
646
|
|
|
518
647
|
## Troubleshooting
|
|
519
648
|
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](License.txt)
|
|
6
6
|
[](https://github.com/mousebrains/dbd2netcdf-python/actions/workflows/ci.yml)
|
|
7
7
|
[](https://github.com/mousebrains/dbd2netcdf-python/actions/workflows/codeql.yml)
|
|
8
|
-
[](https://codecov.io/gh/mousebrains/dbd2netcdf-python)
|
|
9
9
|
[](https://github.com/astral-sh/ruff)
|
|
10
10
|
|
|
11
11
|
An efficient xarray backend for reading Dinkum Binary Data (DBD) files from
|
|
@@ -38,7 +38,7 @@ pip install xarray-dbd
|
|
|
38
38
|
For the CLI tools only:
|
|
39
39
|
|
|
40
40
|
```bash
|
|
41
|
-
pipx install xarray-dbd # installs
|
|
41
|
+
pipx install xarray-dbd # installs xdbd command (xdbd 2nc, xdbd mkone, etc.)
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
Or install from source (requires a C++ compiler and CMake):
|
|
@@ -111,6 +111,30 @@ ds = xdbd.open_multi_dbd_dataset(
|
|
|
111
111
|
)
|
|
112
112
|
```
|
|
113
113
|
|
|
114
|
+
### File sort order
|
|
115
|
+
|
|
116
|
+
By default, files are sorted by the `fileopen_time` timestamp in each file's
|
|
117
|
+
header, which is correct regardless of filename convention. Alternative sort
|
|
118
|
+
modes are available:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
# Default: sort by header timestamp (universally correct)
|
|
122
|
+
ds = xdbd.open_multi_dbd_dataset(files)
|
|
123
|
+
|
|
124
|
+
# Sort by filename (lexicographic)
|
|
125
|
+
ds = xdbd.open_multi_dbd_dataset(files, sort="lexicographic")
|
|
126
|
+
|
|
127
|
+
# Preserve the caller's order (no sorting)
|
|
128
|
+
ds = xdbd.open_multi_dbd_dataset(files, sort="none")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
The `--sort` flag is also available on all CLI commands:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
dbd2nc --sort lexicographic -C cache -o output.nc *.dbd
|
|
135
|
+
mkone --sort none --output-prefix /path/to/output/ /path/to/raw/
|
|
136
|
+
```
|
|
137
|
+
|
|
114
138
|
### Advanced options
|
|
115
139
|
|
|
116
140
|
```python
|
|
@@ -118,7 +142,7 @@ ds = xdbd.open_dbd_dataset(
|
|
|
118
142
|
'test.sbd',
|
|
119
143
|
skip_first_record=True, # Skip first record (default)
|
|
120
144
|
repair=True, # Attempt to repair corrupted data
|
|
121
|
-
to_keep=['
|
|
145
|
+
to_keep=['m_depth', 'm_lat'], # Keep only these sensors
|
|
122
146
|
criteria=['m_present_time'], # Sensors for record selection
|
|
123
147
|
)
|
|
124
148
|
```
|
|
@@ -153,6 +177,7 @@ Open a single DBD file as an xarray Dataset.
|
|
|
153
177
|
- `to_keep` (list of str): Sensor names to keep (default: all)
|
|
154
178
|
- `criteria` (list of str): Sensor names for selection criteria
|
|
155
179
|
- `drop_variables` (list of str): Variables to exclude
|
|
180
|
+
- `cache_dir` (str, Path, or None): Directory for sensor cache files
|
|
156
181
|
|
|
157
182
|
**Returns:** `xarray.Dataset`
|
|
158
183
|
|
|
@@ -168,9 +193,32 @@ Open multiple DBD files as a single concatenated xarray Dataset.
|
|
|
168
193
|
- `criteria` (list of str): Sensor names for selection criteria
|
|
169
194
|
- `skip_missions` (list of str): Mission names to skip
|
|
170
195
|
- `keep_missions` (list of str): Mission names to keep
|
|
196
|
+
- `cache_dir` (str, Path, or None): Directory for sensor cache files
|
|
197
|
+
- `sort` (str): File sort order — `"header_time"` (default, sort by `fileopen_time` from each file's header), `"lexicographic"`, or `"none"` (preserve caller's order).
|
|
171
198
|
|
|
172
199
|
**Returns:** `xarray.Dataset`
|
|
173
200
|
|
|
201
|
+
### `write_multi_dbd_netcdf(filenames, output, **kwargs)`
|
|
202
|
+
|
|
203
|
+
Stream multiple DBD files directly to a NetCDF file without loading all data
|
|
204
|
+
into memory. Preferred for large datasets (100+ files).
|
|
205
|
+
|
|
206
|
+
**Parameters:**
|
|
207
|
+
- `filenames` (iterable): Paths to DBD files (duplicates removed automatically)
|
|
208
|
+
- `output` (str or Path): Output NetCDF file path (parent directory created if needed)
|
|
209
|
+
- `skip_first_record` (bool): Skip first record in each file (default: True)
|
|
210
|
+
- `repair` (bool): Attempt to repair corrupted records (default: False)
|
|
211
|
+
- `to_keep` (list of str): Sensor names to keep (default: all)
|
|
212
|
+
- `criteria` (list of str): Sensor names for selection criteria
|
|
213
|
+
- `skip_missions` (list of str): Mission names to skip
|
|
214
|
+
- `keep_missions` (list of str): Mission names to keep
|
|
215
|
+
- `cache_dir` (str, Path, or None): Directory for sensor cache files
|
|
216
|
+
- `compression` (int): Zlib compression level 0-9 (default: 5, 0 disables)
|
|
217
|
+
- `sort` (str): File sort order (default: `"header_time"`)
|
|
218
|
+
- `batch_size` (int): Files per batch (default: 100; smaller reduces peak memory)
|
|
219
|
+
|
|
220
|
+
**Returns:** `tuple[int, int]` — (n_records, n_files)
|
|
221
|
+
|
|
174
222
|
## Migration from dbdreader
|
|
175
223
|
|
|
176
224
|
The dbdreader2 API is derived from Lucas Merckelbach's
|
|
@@ -317,9 +365,8 @@ mdbd = dbdreader.MultiDBD(
|
|
|
317
365
|
to batch additional sensors into the first `get()` call.
|
|
318
366
|
|
|
319
367
|
- **`skip_initial_line` semantics.** When reading multiple files, the
|
|
320
|
-
first
|
|
321
|
-
|
|
322
|
-
Multi-file record counts may therefore differ by up to N-1.
|
|
368
|
+
first record of every file is skipped (matching dbdreader). Multi-file
|
|
369
|
+
record counts should match dbdreader exactly.
|
|
323
370
|
|
|
324
371
|
- **Float64 output.** `get()` always returns float64 arrays, matching
|
|
325
372
|
dbdreader's behavior. Integer fill values (-127 for int8, -32768 for
|
|
@@ -436,6 +483,7 @@ print(f"Depth units: {ds['m_depth'].attrs['units']}")
|
|
|
436
483
|
### Working with trajectories
|
|
437
484
|
|
|
438
485
|
```python
|
|
486
|
+
from pathlib import Path
|
|
439
487
|
import xarray_dbd as xdbd
|
|
440
488
|
import matplotlib.pyplot as plt
|
|
441
489
|
|
|
@@ -456,6 +504,7 @@ plt.show()
|
|
|
456
504
|
### Extracting science data
|
|
457
505
|
|
|
458
506
|
```python
|
|
507
|
+
from pathlib import Path
|
|
459
508
|
# Read full resolution science data
|
|
460
509
|
files = sorted(Path('.').glob('*.ebd'))
|
|
461
510
|
ds = xdbd.open_multi_dbd_dataset(
|
|
@@ -468,6 +517,74 @@ df = ds.to_dataframe()
|
|
|
468
517
|
print(df.describe())
|
|
469
518
|
```
|
|
470
519
|
|
|
520
|
+
## Choosing an API
|
|
521
|
+
|
|
522
|
+
| Scenario | Recommended API |
|
|
523
|
+
|----------|----------------|
|
|
524
|
+
| Single file, quick look | `xr.open_dataset(f, engine="dbd")` |
|
|
525
|
+
| Multiple files, < 1 GB | `xdbd.open_multi_dbd_dataset(files, to_keep=[...])` |
|
|
526
|
+
| Multiple files, large dataset | `xdbd.write_multi_dbd_netcdf(files, "out.nc")` |
|
|
527
|
+
| Interactive / Jupyter | `xdbd.MultiDBD(filenames=files)` with `.get()` (lazy) |
|
|
528
|
+
| Batch processing 1000+ files | `mkone` CLI (multiprocessing) |
|
|
529
|
+
| Drop-in dbdreader replacement | `import xarray_dbd.dbdreader2 as dbdreader` |
|
|
530
|
+
|
|
531
|
+
## Slocum File Types
|
|
532
|
+
|
|
533
|
+
| Extension | Name | Contents |
|
|
534
|
+
|-----------|------|----------|
|
|
535
|
+
| `.dbd` / `.dcd` | Flight | Vehicle sensors: depth, attitude, speed, GPS |
|
|
536
|
+
| `.ebd` / `.ecd` | Science | Payload sensors: CTD, optics, oxygen |
|
|
537
|
+
| `.sbd` / `.scd` | Short burst | Surface telemetry summary records |
|
|
538
|
+
| `.tbd` / `.tcd` | Technical | Detailed engineering telemetry |
|
|
539
|
+
| `.mbd` / `.mcd` | Mini | Compact engineering subset |
|
|
540
|
+
| `.nbd` / `.ncd` | Narrow | Compact science subset |
|
|
541
|
+
|
|
542
|
+
Compressed variants (`.?cd`) use LZ4 framing and are handled transparently.
|
|
543
|
+
|
|
544
|
+
## Working with Glider Data
|
|
545
|
+
|
|
546
|
+
### Discovering available sensors
|
|
547
|
+
|
|
548
|
+
```python
|
|
549
|
+
import xarray_dbd as xdbd
|
|
550
|
+
|
|
551
|
+
# xarray API
|
|
552
|
+
ds = xdbd.open_dbd_dataset("file.dbd", cache_dir="cache")
|
|
553
|
+
for var in sorted(ds.data_vars):
|
|
554
|
+
print(f" {var:30s} {ds[var].attrs.get('units', '')}")
|
|
555
|
+
|
|
556
|
+
# dbdreader2 API
|
|
557
|
+
dbd = xdbd.MultiDBD(pattern="*.dbd", cacheDir="cache")
|
|
558
|
+
for name in sorted(dbd.parameterNames["eng"]):
|
|
559
|
+
print(f" {name:30s} {dbd.parameterUnits.get(name, '')}")
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
Sensor naming conventions are documented in
|
|
563
|
+
[TWR's masterdata files](https://gliderfs2.ceoas.oregonstate.edu/gliderweb/masterdata/).
|
|
564
|
+
|
|
565
|
+
### Time conversion
|
|
566
|
+
|
|
567
|
+
`m_present_time` contains UTC seconds since 1970-01-01 (Unix epoch, float64):
|
|
568
|
+
|
|
569
|
+
```python
|
|
570
|
+
import pandas as pd
|
|
571
|
+
|
|
572
|
+
time = pd.to_datetime(ds["m_present_time"].values, unit="s", utc=True)
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
### Handling fill values
|
|
576
|
+
|
|
577
|
+
Float sensors use NaN for missing data. Integer sensors use sentinel fill
|
|
578
|
+
values (-127 for int8, -32768 for int16). Filter them out:
|
|
579
|
+
|
|
580
|
+
```python
|
|
581
|
+
# xarray — replace sentinels with NaN
|
|
582
|
+
ds = ds.where(ds != -32768)
|
|
583
|
+
|
|
584
|
+
# dbdreader2 — automatic filtering (default)
|
|
585
|
+
t, v = dbd.get("m_depth") # return_nans=False by default
|
|
586
|
+
```
|
|
587
|
+
|
|
471
588
|
## Known Limitations
|
|
472
589
|
|
|
473
590
|
- **Python 3.10+ required** — uses `from __future__ import annotations` for modern type-hint syntax.
|
|
@@ -478,6 +595,18 @@ print(df.describe())
|
|
|
478
595
|
- **No lazy loading for xarray API** — `open_dataset()` reads all sensor data
|
|
479
596
|
into memory. For very large deployments, use `to_keep` to select only needed
|
|
480
597
|
sensors. The dbdreader2 API (`DBD`/`MultiDBD`) uses lazy incremental loading.
|
|
598
|
+
- **Fill values in xarray output** — Integer sensors use sentinel fill values
|
|
599
|
+
(-127 for int8, -32768 for int16) rather than NaN. Between dives, science
|
|
600
|
+
sensors may contain these sentinels or NaN. Filter with
|
|
601
|
+
`ds.where(ds != -32768)` or use the dbdreader2 `get(return_nans=False)` API
|
|
602
|
+
which filters automatically.
|
|
603
|
+
- **Not CF-compliant** — NetCDF output preserves sensor `units` but does not
|
|
604
|
+
add CF attributes (`standard_name`, `axis`, `calendar`). Add metadata
|
|
605
|
+
post-hoc for publication, e.g.:
|
|
606
|
+
```python
|
|
607
|
+
ds["m_present_time"].attrs["axis"] = "T"
|
|
608
|
+
ds["m_present_time"].attrs["units"] = "seconds since 1970-01-01"
|
|
609
|
+
```
|
|
481
610
|
|
|
482
611
|
## Troubleshooting
|
|
483
612
|
|
|
@@ -123,7 +123,7 @@ ColumnDataResult read_columns(std::istream& is,
|
|
|
123
123
|
qKeep |= sensor.qCriteria();
|
|
124
124
|
const int oi = outIndex[i];
|
|
125
125
|
if (oi >= 0) {
|
|
126
|
-
// Copy previous value into current row
|
|
126
|
+
// Copy previous value into current row, converting inf to NaN
|
|
127
127
|
std::visit([nRows, oi](auto& col_vec, const auto& prev_vec) {
|
|
128
128
|
using T = typename std::decay_t<decltype(col_vec)>::value_type;
|
|
129
129
|
using PT = typename std::decay_t<decltype(prev_vec)>::value_type;
|
|
@@ -136,7 +136,11 @@ ColumnDataResult read_columns(std::istream& is,
|
|
|
136
136
|
else
|
|
137
137
|
col_vec.resize(col_vec.size() * 2, NAN);
|
|
138
138
|
}
|
|
139
|
-
|
|
139
|
+
T val = prev_vec[0];
|
|
140
|
+
if constexpr (std::is_floating_point_v<T>) {
|
|
141
|
+
if (std::isinf(val)) val = NAN;
|
|
142
|
+
}
|
|
143
|
+
col_vec[nRows] = val;
|
|
140
144
|
}
|
|
141
145
|
}, columns[oi], prevValues[oi]);
|
|
142
146
|
}
|
|
@@ -38,17 +38,20 @@ int DecompressTWRBuf::underflow() {
|
|
|
38
38
|
if (!this->mIS.read(frame.data(), n)) { // EOF
|
|
39
39
|
return std::char_traits<char>::eof();
|
|
40
40
|
}
|
|
41
|
-
const int j
|
|
41
|
+
const int j(LZ4_decompress_safe(frame.data(), this->mBuffer, static_cast<int>(n), sizeof(this->mBuffer)));
|
|
42
42
|
if (j < 0) { // LZ4 decompression error
|
|
43
|
+
LOG_ERROR("LZ4 decompression failed (error {}) in {} (block size {})",
|
|
44
|
+
j, this->mFilename, n);
|
|
43
45
|
return std::char_traits<char>::eof();
|
|
44
46
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
this->setg(this->mBuffer, this->mBuffer, this->mBuffer + j);
|
|
47
|
+
const size_t decompressedSize(static_cast<size_t>(j));
|
|
48
|
+
this->setg(this->mBuffer, this->mBuffer, this->mBuffer + decompressedSize);
|
|
49
|
+
this->mPos += decompressedSize;
|
|
49
50
|
} else { // Not compressed
|
|
50
51
|
if (this->mIS.read(this->mBuffer, sizeof(this->mBuffer)) || this->mIS.gcount()) {
|
|
51
|
-
|
|
52
|
+
const auto n = this->mIS.gcount();
|
|
53
|
+
this->setg(this->mBuffer, this->mBuffer, this->mBuffer + n);
|
|
54
|
+
this->mPos += static_cast<size_t>(n);
|
|
52
55
|
} else {
|
|
53
56
|
return std::char_traits<char>::eof();
|
|
54
57
|
}
|
|
@@ -57,6 +60,18 @@ int DecompressTWRBuf::underflow() {
|
|
|
57
60
|
return std::char_traits<char>::to_int_type(*this->gptr());
|
|
58
61
|
}
|
|
59
62
|
|
|
63
|
+
DecompressTWRBuf::pos_type
|
|
64
|
+
DecompressTWRBuf::seekoff(off_type off, std::ios_base::seekdir dir,
|
|
65
|
+
std::ios_base::openmode /*which*/) {
|
|
66
|
+
// Only support tellg(): seekoff(0, cur)
|
|
67
|
+
if (dir == std::ios_base::cur && off == 0) {
|
|
68
|
+
// mPos is total bytes loaded; subtract unread bytes remaining in buffer
|
|
69
|
+
const auto remaining = this->egptr() - this->gptr();
|
|
70
|
+
return static_cast<pos_type>(this->mPos - static_cast<size_t>(remaining));
|
|
71
|
+
}
|
|
72
|
+
return pos_type(off_type(-1)); // Seeking not supported
|
|
73
|
+
}
|
|
74
|
+
|
|
60
75
|
bool qCompressed(const std::string& fn) {
|
|
61
76
|
const std::string suffix(fs::path(fn).extension().string());
|
|
62
77
|
const bool q((suffix.size() == 4) && (std::tolower(static_cast<unsigned char>(suffix[2])) == 'c'));
|
|
@@ -11,6 +11,7 @@ class DecompressTWRBuf: public std::streambuf {
|
|
|
11
11
|
const bool mqCompressed;
|
|
12
12
|
char mBuffer[65536];
|
|
13
13
|
const std::string mFilename;
|
|
14
|
+
size_t mPos = 0; // Total decompressed bytes loaded into buffer
|
|
14
15
|
public:
|
|
15
16
|
DecompressTWRBuf(const std::string& fn, const bool qCompressed)
|
|
16
17
|
: mIS(fn.c_str(), std::ios::binary)
|
|
@@ -23,7 +24,11 @@ public:
|
|
|
23
24
|
|
|
24
25
|
void close() {mIS.close();}
|
|
25
26
|
|
|
26
|
-
int underflow();
|
|
27
|
+
int underflow() override;
|
|
28
|
+
|
|
29
|
+
protected:
|
|
30
|
+
pos_type seekoff(off_type off, std::ios_base::seekdir dir,
|
|
31
|
+
std::ios_base::openmode which = std::ios_base::in) override;
|
|
27
32
|
};
|
|
28
33
|
|
|
29
34
|
class DecompressTWR: public std::istream {
|