xarray-dbd 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. xarray_dbd-0.2.0/.clang-tidy +41 -0
  2. xarray_dbd-0.2.0/.gitignore +159 -0
  3. xarray_dbd-0.2.0/.pre-commit-config.yaml +15 -0
  4. xarray_dbd-0.2.0/CHANGELOG.md +25 -0
  5. xarray_dbd-0.2.0/CMakeLists.txt +75 -0
  6. xarray_dbd-0.2.0/CONTRIBUTING.md +82 -0
  7. xarray_dbd-0.2.0/License.txt +674 -0
  8. xarray_dbd-0.2.0/PKG-INFO +406 -0
  9. xarray_dbd-0.2.0/README.md +370 -0
  10. xarray_dbd-0.2.0/benchmark_performance.py +171 -0
  11. xarray_dbd-0.2.0/conda/recipe.yaml +60 -0
  12. xarray_dbd-0.2.0/csrc/ColumnData.C +222 -0
  13. xarray_dbd-0.2.0/csrc/ColumnData.H +47 -0
  14. xarray_dbd-0.2.0/csrc/Data.C +173 -0
  15. xarray_dbd-0.2.0/csrc/Data.H +67 -0
  16. xarray_dbd-0.2.0/csrc/Decompress.C +64 -0
  17. xarray_dbd-0.2.0/csrc/Decompress.H +59 -0
  18. xarray_dbd-0.2.0/csrc/FileInfo.H +25 -0
  19. xarray_dbd-0.2.0/csrc/Header.C +138 -0
  20. xarray_dbd-0.2.0/csrc/Header.H +53 -0
  21. xarray_dbd-0.2.0/csrc/KnownBytes.C +160 -0
  22. xarray_dbd-0.2.0/csrc/KnownBytes.H +41 -0
  23. xarray_dbd-0.2.0/csrc/Logger.H +14 -0
  24. xarray_dbd-0.2.0/csrc/MyException.H +34 -0
  25. xarray_dbd-0.2.0/csrc/Sensor.C +126 -0
  26. xarray_dbd-0.2.0/csrc/Sensor.H +63 -0
  27. xarray_dbd-0.2.0/csrc/Sensors.C +279 -0
  28. xarray_dbd-0.2.0/csrc/Sensors.H +85 -0
  29. xarray_dbd-0.2.0/csrc/SensorsMap.C +146 -0
  30. xarray_dbd-0.2.0/csrc/SensorsMap.H +54 -0
  31. xarray_dbd-0.2.0/csrc/config.h +11 -0
  32. xarray_dbd-0.2.0/csrc/dbd_python.cpp +729 -0
  33. xarray_dbd-0.2.0/csrc/lz4.c +2831 -0
  34. xarray_dbd-0.2.0/csrc/lz4.h +886 -0
  35. xarray_dbd-0.2.0/examples/README.md +227 -0
  36. xarray_dbd-0.2.0/pyproject.toml +148 -0
  37. xarray_dbd-0.2.0/scripts/README.md +12 -0
  38. xarray_dbd-0.2.0/tests/conftest.py +37 -0
  39. xarray_dbd-0.2.0/tests/test_backend.py +285 -0
  40. xarray_dbd-0.2.0/tests/test_cli.py +1172 -0
  41. xarray_dbd-0.2.0/tests/test_compat.py +333 -0
  42. xarray_dbd-0.2.0/tests/test_cpp_backend.py +330 -0
  43. xarray_dbd-0.2.0/xarray_dbd/__init__.py +38 -0
  44. xarray_dbd-0.2.0/xarray_dbd/_dbd_cpp.pyi +33 -0
  45. xarray_dbd-0.2.0/xarray_dbd/backend.py +547 -0
  46. xarray_dbd-0.2.0/xarray_dbd/cli/__init__.py +0 -0
  47. xarray_dbd-0.2.0/xarray_dbd/cli/cache.py +124 -0
  48. xarray_dbd-0.2.0/xarray_dbd/cli/csv.py +237 -0
  49. xarray_dbd-0.2.0/xarray_dbd/cli/dbd2nc.py +258 -0
  50. xarray_dbd-0.2.0/xarray_dbd/cli/logger.py +102 -0
  51. xarray_dbd-0.2.0/xarray_dbd/cli/main.py +46 -0
  52. xarray_dbd-0.2.0/xarray_dbd/cli/missions.py +79 -0
  53. xarray_dbd-0.2.0/xarray_dbd/cli/mkone.py +319 -0
  54. xarray_dbd-0.2.0/xarray_dbd/cli/sensors.py +120 -0
  55. xarray_dbd-0.2.0/xarray_dbd/compat.py +340 -0
  56. xarray_dbd-0.2.0/xarray_dbd/py.typed +0 -0
@@ -0,0 +1,41 @@
1
+ ---
2
+ # clang-tidy configuration for xarray-dbd C++ extension
3
+ #
4
+ # Focused checks for C++17 codebase with pybind11 bindings.
5
+ # Run via: cmake -B build -DENABLE_CLANG_TIDY=ON && cmake --build build
6
+
7
+ Checks: >
8
+ -*,
9
+ bugprone-*,
10
+ cppcoreguidelines-*,
11
+ modernize-*,
12
+ performance-*,
13
+ readability-braces-around-statements,
14
+ readability-container-size-empty,
15
+ readability-else-after-return,
16
+ readability-implicit-bool-conversion,
17
+ readability-redundant-smartptr-get,
18
+ readability-simplify-boolean-expr,
19
+ -bugprone-easily-swappable-parameters,
20
+ -cppcoreguidelines-avoid-magic-numbers,
21
+ -cppcoreguidelines-pro-type-reinterpret-cast,
22
+ -cppcoreguidelines-pro-type-union-access,
23
+ -cppcoreguidelines-pro-bounds-pointer-arithmetic,
24
+ -cppcoreguidelines-pro-bounds-array-to-pointer-decay,
25
+ -cppcoreguidelines-pro-bounds-constant-array-index,
26
+ -cppcoreguidelines-avoid-c-arrays,
27
+ -cppcoreguidelines-avoid-non-const-global-variables,
28
+ -cppcoreguidelines-owning-memory,
29
+ -cppcoreguidelines-special-member-functions,
30
+ -cppcoreguidelines-non-private-member-variables-in-classes,
31
+ -modernize-use-trailing-return-type,
32
+ -modernize-avoid-c-arrays,
33
+ -modernize-use-nodiscard,
34
+
35
+ WarningsAsErrors: ''
36
+
37
+ HeaderFilterRegex: 'csrc/.*\.H$'
38
+
39
+ CheckOptions:
40
+ - key: readability-braces-around-statements.ShortStatementLines
41
+ value: '1'
@@ -0,0 +1,159 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py,cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+
52
+ # Translations
53
+ *.mo
54
+ *.pot
55
+
56
+ # Django stuff:
57
+ *.log
58
+ local_settings.py
59
+ db.sqlite3
60
+ db.sqlite3-journal
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # IPython
79
+ profile_default/
80
+ ipython_config.py
81
+
82
+ # pyenv
83
+ .python-version
84
+
85
+ # pipenv
86
+ Pipfile.lock
87
+
88
+ # PEP 582
89
+ __pypackages__/
90
+
91
+ # Celery stuff
92
+ celerybeat-schedule
93
+ celerybeat.pid
94
+
95
+ # SageMath parsed files
96
+ *.sage.py
97
+
98
+ # Environments
99
+ .env
100
+ .venv
101
+ env/
102
+ venv/
103
+ ENV/
104
+ env.bak/
105
+ venv.bak/
106
+
107
+ # Spyder project settings
108
+ .spyderproject
109
+ .spyproject
110
+
111
+ # Rope project settings
112
+ .ropeproject
113
+
114
+ # mkdocs documentation
115
+ /site
116
+
117
+ # ruff
118
+ .ruff_cache/
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+ .dmypy.json
123
+ dmypy.json
124
+
125
+ # Pyre type checker
126
+ .pyre/
127
+
128
+ # IDEs
129
+ .vscode/
130
+ .idea/
131
+ *.swp
132
+ *.swo
133
+ *~
134
+
135
+ # macOS
136
+ .DS_Store
137
+
138
+ # CMake / scikit-build-core build artifacts
139
+ _skbuild/
140
+ CMakeCache.txt
141
+ CMakeFiles/
142
+ cmake_install.cmake
143
+
144
+ # Project specific
145
+ dbd2netcdf/
146
+ cache/
147
+ *.nc
148
+ *.dbd
149
+ *.dcd
150
+ *.ebd
151
+ *.ecd
152
+ *.sbd
153
+ *.scd
154
+ *.tbd
155
+ *.tcd
156
+ *.mbd
157
+ *.mcd
158
+ *.nbd
159
+ *.ncd
@@ -0,0 +1,15 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-added-large-files
9
+
10
+ - repo: https://github.com/astral-sh/ruff-pre-commit
11
+ rev: v0.9.10
12
+ hooks:
13
+ - id: ruff
14
+ args: [--fix, --exit-non-zero-on-fix]
15
+ - id: ruff-format
@@ -0,0 +1,25 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-02-20
9
+
10
+ ### Added
11
+
12
+ - C++ backend via pybind11 wrapping [dbd2netCDF](https://github.com/mousebrains/dbd2netcdf) parser
13
+ - Native xarray engine integration (`xr.open_dataset(f, engine="dbd")`)
14
+ - Multi-file reading with `open_multi_dbd_dataset()` using C++ SensorsMap two-pass approach
15
+ - CLI tools: `dbd2nc` for single/multi-file conversion, `mkone` for batch directory processing
16
+ - Native dtype support: int8, int16, float32, float64 columns (no double-conversion overhead)
17
+ - LZ4 decompression for compressed `.?cd` files
18
+ - Sensor filtering (`to_keep`), mission filtering (`skip_missions`/`keep_missions`)
19
+ - Corrupted file recovery with `repair=True`
20
+ - Python 3.10+ and free-threaded Python (PEP 703) support
21
+
22
+ ### Changed
23
+
24
+ - Replaced pure-Python parser with C++ pybind11 extension for ~5x performance improvement
25
+ - Fill values: NaN for float32/float64, 0 for int8/int16 (matching C++ double-NaN semantics)
@@ -0,0 +1,75 @@
1
+ cmake_minimum_required(VERSION 3.17)
2
+ project(xarray_dbd_cpp LANGUAGES C CXX)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+ set(CMAKE_CXX_EXTENSIONS OFF)
7
+
8
+ # Compiler warning flags
9
+ if(MSVC)
10
+ add_compile_options(/W4)
11
+ else()
12
+ add_compile_options(-Wall -Wextra -Wpedantic
13
+ -Wshadow -Wconversion -Wsign-conversion -Wnon-virtual-dtor)
14
+ endif()
15
+
16
+ # Optional clang-tidy integration
17
+ option(ENABLE_CLANG_TIDY "Enable clang-tidy static analysis" OFF)
18
+ if(ENABLE_CLANG_TIDY)
19
+ find_program(CLANG_TIDY_EXE NAMES clang-tidy)
20
+ if(CLANG_TIDY_EXE)
21
+ set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_EXE}")
22
+ else()
23
+ message(WARNING "clang-tidy requested but not found")
24
+ endif()
25
+ endif()
26
+
27
+ find_package(pybind11 2.11...4 CONFIG REQUIRED)
28
+
29
+ pybind11_add_module(_dbd_cpp
30
+ csrc/dbd_python.cpp
31
+ csrc/ColumnData.C
32
+ csrc/Header.C
33
+ csrc/Sensor.C
34
+ csrc/Sensors.C
35
+ csrc/SensorsMap.C
36
+ csrc/KnownBytes.C
37
+ csrc/Decompress.C
38
+ csrc/Data.C
39
+ csrc/lz4.c
40
+ )
41
+
42
+ target_include_directories(_dbd_cpp PRIVATE csrc)
43
+
44
+ # Suppress warnings for vendored lz4.c
45
+ if(MSVC)
46
+ set_source_files_properties(csrc/lz4.c PROPERTIES COMPILE_FLAGS /w)
47
+ else()
48
+ set_source_files_properties(csrc/lz4.c PROPERTIES COMPILE_FLAGS -w)
49
+ endif()
50
+
51
+ # std::filesystem needs explicit linking on GCC < 9
52
+ include(CheckCXXSourceCompiles)
53
+ set(CMAKE_REQUIRED_FLAGS "-std=c++17")
54
+ check_cxx_source_compiles("
55
+ #include <filesystem>
56
+ int main() { std::filesystem::path p(\"/tmp\"); return p.empty(); }
57
+ " STDFS_NO_LIB)
58
+ if(NOT STDFS_NO_LIB)
59
+ set(CMAKE_REQUIRED_LIBRARIES stdc++fs)
60
+ check_cxx_source_compiles("
61
+ #include <filesystem>
62
+ int main() { std::filesystem::path p(\"/tmp\"); return p.empty(); }
63
+ " STDFS_NEEDS_STDC_FS)
64
+ unset(CMAKE_REQUIRED_LIBRARIES)
65
+ if(STDFS_NEEDS_STDC_FS)
66
+ target_link_libraries(_dbd_cpp PRIVATE stdc++fs)
67
+ endif()
68
+ endif()
69
+
70
+ # Platform-specific: Windows needs winsock2 for ntohs/ntohl
71
+ if(WIN32)
72
+ target_link_libraries(_dbd_cpp PRIVATE ws2_32)
73
+ endif()
74
+
75
+ install(TARGETS _dbd_cpp LIBRARY DESTINATION xarray_dbd)
@@ -0,0 +1,82 @@
1
+ # Contributing to xarray-dbd
2
+
3
+ Thank you for your interest in contributing! This guide covers the development
4
+ workflow for xarray-dbd.
5
+
6
+ ## Development Setup
7
+
8
+ **Prerequisites:** Python 3.13+, a C++17 compiler, and CMake ≥ 3.15.
9
+
10
+ ```bash
11
+ git clone https://github.com/mousebrains/dbd2netcdf-python
12
+ cd dbd2netcdf-python
13
+ pip install -e ".[dev]"
14
+ ```
15
+
16
+ The editable install compiles the C++ extension (`_dbd_cpp`) in-place.
17
+ Re-run `pip install -e .` after changing any C++ source in `csrc/`.
18
+
19
+ ## Running Tests
20
+
21
+ ```bash
22
+ pytest # all tests
23
+ pytest tests/test_backend.py # backend integration tests only
24
+ pytest -v --tb=short # verbose with short tracebacks
25
+ ```
26
+
27
+ Some tests require sample `.dbd`/`.dcd` files in `dbd_files/`. Tests that need
28
+ data are skipped automatically when the directory is absent.
29
+
30
+ ## Linting and Formatting
31
+
32
+ ```bash
33
+ ruff check xarray_dbd/ tests/ *.py # lint
34
+ ruff format xarray_dbd/ tests/ *.py # auto-format
35
+ mypy xarray_dbd/ # type checking
36
+ ```
37
+
38
+ Configuration for all tools lives in `pyproject.toml`. The project uses a
39
+ 100-character line length.
40
+
41
+ ## Syncing C++ Changes from Upstream
42
+
43
+ The C++ parser in `csrc/` is a copy of
44
+ [dbd2netCDF](https://github.com/mousebrains/dbd2netcdf) (`src/` directory).
45
+ To incorporate upstream changes:
46
+
47
+ 1. Copy updated `.C` / `.H` files from `dbd2netcdf/src/` into `csrc/`.
48
+ 2. Keep `csrc/Logger.H` (our no-op stub replacing spdlog) and
49
+ `csrc/dbd_python.cpp` (pybind11 bindings) — these are local files.
50
+ 3. Rebuild: `pip install -e .`
51
+ 4. Run the full test suite and compare output against the C++ reference.
52
+
53
+ See `docs/SYNC.md` for the detailed file mapping.
54
+
55
+ ## Project Layout
56
+
57
+ ```
58
+ csrc/ C++ source (pybind11 extension)
59
+ xarray_dbd/ Python package (backend, CLI, public API)
60
+ tests/ pytest test suite
61
+ scripts/ Utility and debug scripts
62
+ docs/ Project documentation
63
+ conda/ Conda build recipe
64
+ ```
65
+
66
+ ## Pull Request Guidelines
67
+
68
+ - Keep changes focused — one logical change per PR.
69
+ - Add or update tests for any behavioral changes.
70
+ - Run `ruff check`, `ruff format`, and `mypy` before submitting.
71
+ - Include a clear description of *what* and *why* in the PR body.
72
+
73
+ ## Commit Messages
74
+
75
+ Use conventional-style messages:
76
+
77
+ ```
78
+ Fix sensor cache lookup for compressed .ccc files
79
+
80
+ The cache reader was only checking .cac files. Now tries .ccc
81
+ (LZ4-compressed) as a fallback, matching dbd2netCDF behavior.
82
+ ```