widpath 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+ branches: ["main"]
8
+
9
+ jobs:
10
+ test:
11
+ name: Test (Python ${{ matrix.python-version }})
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Set up Python ${{ matrix.python-version }}
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+
26
+ - name: Install dependencies
27
+ run: pip install -e ".[dev]"
28
+
29
+ - name: Lint (ruff)
30
+ run: ruff check src tests
31
+
32
+ - name: Type check (mypy)
33
+ if: matrix.python-version != '3.9'
34
+ run: mypy src
35
+
36
+ - name: Run tests
37
+ run: pytest -m "not perf"
38
+
39
+ - name: Upload coverage
40
+ if: matrix.python-version == '3.13'
41
+ uses: actions/upload-artifact@v4
42
+ with:
43
+ name: coverage-report
44
+ path: .coverage
@@ -0,0 +1,61 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ test:
9
+ name: Run tests before publish
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: actions/setup-python@v5
14
+ with:
15
+ python-version: "3.12"
16
+ - run: pip install -e ".[dev]"
17
+ - run: pytest -m "not perf"
18
+
19
+ build:
20
+ name: Build distribution
21
+ needs: test
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+
26
+ - uses: actions/setup-python@v5
27
+ with:
28
+ python-version: "3.12"
29
+
30
+ - name: Install build
31
+ run: pip install build
32
+
33
+ - name: Build wheel and distribution
34
+ run: python -m build
35
+
36
+ - name: Upload dist artifacts
37
+ uses: actions/upload-artifact@v4
38
+ with:
39
+ name: dist
40
+ path: dist/
41
+
42
+ publish:
43
+ name: Publish to PyPI
44
+ needs: build
45
+ runs-on: ubuntu-latest
46
+ environment:
47
+ name: pypi
48
+ url: https://pypi.org/project/widpath
49
+ permissions:
50
+ id-token: write # Required for OIDC Trusted Publishing
51
+
52
+ steps:
53
+ - name: Download dist artifacts
54
+ uses: actions/download-artifact@v4
55
+ with:
56
+ name: dist
57
+ path: dist/
58
+
59
+ - name: Publish to PyPI
60
+ uses: pypa/gh-action-pypi-publish@release/v1
61
+ # No api_token needed - uses OIDC Trunsted Publishing configured on PyPI
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ htmlcov/
8
+ .coverage
9
+ .coverage.*
10
+ *.log
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ .pytest_cache/
@@ -0,0 +1,70 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ---
9
+
10
+ ## [Unreleased]
11
+
12
+ ### Changed
13
+ - **Breaking:** Renamed `WidPathResolver` methods to be more Pythonic and precise:
14
+ - `get_file_path(wid, base_dir)` -> `resolve(wid, base_dir)` - aligns with the class name *Resolver*.
15
+ - `get_hierarchical_json(wid, level)` -> `path_at_level(wid, level)` - describes what the method returns rather than the output format.
16
+ - `get_max_level(wid)` -> `max_level(wid)` - drops the un-Pythonic `get_` prefix from a pure-computation method.
17
+ - `get_candidate_paths(wid, base_dir)` -> `candidate_paths(wid, base_dir)` - same rationale as above.
18
+
19
+ ---
20
+
21
+ ## [0.2.0] - 2026-06-03
22
+
23
+ ### Added
24
+ - `locate(base_dir, wid, size=2)` - module-level functional interface implementing
25
+ the canonical O(depth) linear-scan algorithm from the widpath specification.
26
+ - `WidPathResolver.get_candidate_paths(wid, base_dir)` - returns all candidate paths
27
+ from shallowest to deepest; useful for debugging and tooling.
28
+ - `WidPathResolver.__init__.py` now exports `__version__ = "0.2.0"`.
29
+ - Full test suite: `test_split`, `test_levels`, `test_locate`, `test_edge_cases`,
30
+ `test_perf` - coverage ≥ 95 %.
31
+ - GitHub Actions CI workflow (Python 3.8-3.12 matrix, ruff, mypy).
32
+ - GitHub Actions publish workflow (OIDC Trusted Publishing -> PyPI on Release).
33
+ - Bilingual README (EN + CN).
34
+ - `pyproject.toml` replaces legacy `setup.py`.
35
+
36
+ ### Changed
37
+ - **Breaking:** `WidPathResolver.get_file_path(wid)` now requires a mandatory
38
+ `base_dir: Path` argument. Previously the method used the process CWD
39
+ implicitly, which was unsafe in library code.
40
+ - `WidPathResolver.get_hierarchical_json` now uses `pathlib` path composition
41
+ (`Path(*parts).with_suffix(".json")`) instead of string join with a
42
+ configurable `separator`.
43
+ - `WidPathResolver.__init__` no longer accepts a `separator` parameter
44
+ (removed - `pathlib` handles OS-native separators automatically).
45
+
46
+ ### Fixed
47
+ - `get_file_path` now raises `FileNotFoundError` when `base_dir` does not exist,
48
+ instead of silently returning an invalid path.
49
+ - `get_max_level == 0` edge case (WID length equals `size`) is now handled
50
+ explicitly with an early return.
51
+
52
+ ---
53
+
54
+ ## [0.1.1] - 2025-10-08
55
+
56
+ ### Fixed
57
+ - Minor metadata corrections in package distribution.
58
+
59
+ ---
60
+
61
+ ## [0.1.0] - 2025-09-21
62
+
63
+ ### Added
64
+ - Initial release.
65
+ - `WidPathResolver` with `get_file_path`, `get_hierarchical_json`, `get_max_level`.
66
+
67
+ [Unreleased]: https://github.com/junsxu/widpath/compare/v0.2.0...HEAD
68
+ [0.2.0]: https://github.com/junsxu/widpath/compare/v0.1.1...v0.2.0
69
+ [0.1.1]: https://github.com/junsxu/widpath/compare/v0.1.0...v0.1.1
70
+ [0.1.0]: https://github.com/junsxu/widpath/releases/tag/v0.1.0
@@ -0,0 +1,68 @@
1
+ # Contributing to widpath
2
+
3
+ Thank you for considering a contribution!
4
+
5
+ ## Local development setup
6
+
7
+ ```bash
8
+ git clone https://github.com/junsxu/widpath
9
+ cd widpath
10
+ pip install -e ".[dev]"
11
+ ```
12
+
13
+ ## Running tests
14
+
15
+ ```bash
16
+ pytest # all tests except perf benchmarks
17
+ pytest -m perf # performance benchmarks only
18
+ pytest --cov-report=html # generate HTML coverage report in htmlcov/
19
+ ```
20
+
21
+ The CI gate requires **≥ 95% coverage**.
22
+
23
+ ## Linting and type checking
24
+
25
+ ```bash
26
+ ruff check widpath tests # lint
27
+ ruff format widpath tests # auto-format
28
+ mypy widpath # strict type checking
29
+ ```
30
+
31
+ All checks must pass before a PR can be merged.
32
+
33
+ ## Branch naming
34
+
35
+ | Type | Pattern | Example |
36
+ |------|---------|---------|
37
+ | Feature | `feat/<short-desc>` | `feat/locate-function` |
38
+ | Bug fix | `fix/<short-desc>` | `fix/base-dir-missing` |
39
+ | Docs | `docs/<short-desc>` | `docs/readme-cn` |
40
+ | Refactor | `refactor/<short-desc>` | `refactor/pathlib-join` |
41
+
42
+ ## Commit style
43
+
44
+ Use [Conventional Commits](https://www.conventionalcommits.org/):
45
+
46
+ ```
47
+ feat: add locate() functional interface
48
+ fix: raise FileNotFoundError when base_dir missing
49
+ docs: add Chinese README section
50
+ test: add edge cases for single-segment WID
51
+ ```
52
+
53
+ ## Pull request checklist
54
+
55
+ - [ ] Tests added or updated
56
+ - [ ] `pytest` passes locally (coverage ≥ 95 %)
57
+ - [ ] `ruff check` and `mypy` pass
58
+ - [ ] `CHANGELOG.md` updated under `[Unreleased]`
59
+ - [ ] PR description explains *why* the change is needed
60
+
61
+ ## Release process (maintainers only)
62
+
63
+ 1. Update `version` in `pyproject.toml` and `widpath/__init__.py`.
64
+ 2. Move `[Unreleased]` entries in `CHANGELOG.md` to a new versioned section.
65
+ 3. Commit: `chore: bump version to v0.x.y`.
66
+ 4. Push to `main`, then create a **GitHub Release** with tag `v0.x.y`.
67
+ 5. The `publish.yml` workflow triggers automatically and publishes to PyPI via
68
+ OIDC Trusted Publishing (no API token required).
widpath-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 junsxu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
widpath-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,180 @@
1
+ Metadata-Version: 2.4
2
+ Name: widpath
3
+ Version: 0.2.0
4
+ Summary: Hierarchical file-path resolver for WID-based storage
5
+ Project-URL: Homepage, https://github.com/junsxu/widpath
6
+ Project-URL: Repository, https://github.com/junsxu/widpath
7
+ Project-URL: Bug Tracker, https://github.com/junsxu/widpath/issues
8
+ Project-URL: Changelog, https://github.com/junsxu/widpath/blob/main/CHANGELOG.md
9
+ Author-email: "sheng.SMLH" <smlh.sheng@gmail.com>, junsxu <sheng@silmoony.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: file storage,graph,hierarchical path,uuid,wid
13
+ Requires-Python: >=3.9
14
+ Provides-Extra: dev
15
+ Requires-Dist: mypy>=1.0; extra == 'dev'
16
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
17
+ Requires-Dist: pytest>=7.0; extra == 'dev'
18
+ Requires-Dist: ruff>=0.4; extra == 'dev'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # widpath
22
+
23
+ [![CI](https://github.com/junsxu/widpath/actions/workflows/ci.yml/badge.svg)](https://github.com/junsxu/widpath/actions/workflows/ci/yml)
24
+ [![PyPI version](https://badge.fury.io/py/widpath.svg)](https://pypi.org/project/widpath/)
25
+ [![Python](https://img.shields.io/pypi/pyversions/widpath)](https://pypi.org/project/widpath/)
26
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
27
+
28
+ **widpath** maps WID strings (UUID4 or any fixed-length hex ID) to a hierarchical file-system path tree, keeping directory entry counts bounded while supporting O(1) point lookup - no database required.
29
+
30
+ ---
31
+
32
+ ## What problem does it solve?
33
+
34
+ Storing millions of UUID-keyed JSON files in a flat directory causes performance problems on every major OS (HFS+, ext4, NTFS all degrade beyong ~100 k entries per directory).
35
+
36
+ widpath borrows the idea from Git's objext store (`.git/objects/ab/cdef...`) and generalises it to **adaptive depth**: a single JSON file at a shallow level holds all WIDs that share the same prefix. When that file grows too large, the caller splits it into deeper sub-files - and widpath's `locate` / `resolve` find the right file in at most **16 stat calls** for a 32-char UUID.
37
+
38
+ ```
39
+ data/nodes/
40
+ ├── 8b.json ← all WIDs starting with "8b" (few entries, stays shallow)
41
+ ├── 4a/
42
+ | ├── 3f.json ← split: "4a3f..." WIDs moved here
43
+ | └── b7.json ← split: "4ab7..." WIDs moved here
44
+ └── ...
45
+ ```
46
+
47
+ ---
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ pip install widpath
53
+ ```
54
+
55
+ Requires Python ≥ 3.8, no third-party dependencies.
56
+
57
+ ---
58
+
59
+ ## Quick start
60
+
61
+ ```python
62
+ from pathlib import Path
63
+ from widpath import locate, WidPathResolver
64
+
65
+ base = Path("data/nodes")
66
+ wid = "4a3f9c2b1e0d5678abcd1234567890ab" # UUID4 with dashes stripped
67
+
68
+ # ── Functional interface (canonical, O(depth) linear scan) ─────────────────
69
+ path = locate(base, wid)
70
+ # -> PosixPath('data/nodes/4a.json') when base/ is empty
71
+
72
+ # ── OOP interface (binary-search variant, O(log depth)) ────────────────────
73
+ resolver = WidPathResolver()
74
+ path = resolver.srsolve(wid, base)
75
+ # same result
76
+ ```
77
+
78
+ > **Note:** Strip UUID dashes before passing to widpath:
79
+ > `wid = uuid_str.replace("-", "")`
80
+
81
+ ---
82
+
83
+ ## API reference
84
+
85
+ ### `locate(base_dir, wid, size=2) -> Path`
86
+
87
+ Canonical O(depth) algorithm. Greedily descends into existing subdirectories
88
+ named by successive WID segments, stopping at the first missing directory and
89
+ returning `<current>/<segment>.json`.
90
+
91
+ | Parameter | Type | Default | Description |
92
+ |-----------|------|---------|-------------|
93
+ | `base_dir` | `Path` | - | Root storage directory |
94
+ | `wid` | `str` | - | Hex string, dashes removed |
95
+ | `size` | `int` | `2` | Chars per path segment |
96
+
97
+ ---
98
+
99
+ ### `WidPathResolver(size=2)`
100
+
101
+ OOP interface with a binary-search implementation of path location.
102
+
103
+ | Method | Description |
104
+ |--------|-------------|
105
+ | `resolve(wid, base_dir)` | Locate file via binary search. Raises `FileNotFoundError` if `base_dir` missing. |
106
+ | `path_at_level(wid, level)` | Build the **relative** path for `wid` at depth `level`. |
107
+ | `max_level(wid)` | Maximum depth level = `len(wid) // size - 1`. |
108
+ | `candidate_paths(wid, base_dir)` | All candidate paths from shallowest to deepest. |
109
+
110
+ ---
111
+
112
+ ## Comparison with Git object store
113
+
114
+ | Feature | Git object store | widpath |
115
+ |---------|------------------|---------|
116
+ | Hash algorithm | SHA1 / SHA256 | Any hex string (UUID, SHA, etc.) |
117
+ | Directory depth | Fixed 2 levels | Adaptive 1-16 levels |
118
+ | File format | Binary blobs | Caller-defined (JSON, etc.) |
119
+ | Multiple objects per file | No (1 object = 1 file) | Yes (bucket file holds many) |
120
+ | Split strategy | `git gc` packs loose objects | Caller splits bucket files on overflow |
121
+
122
+
123
+ ## Comparison with Existing Solutions
124
+
125
+ ### Several path manipulation libraries are commonly available on PyPI:
126
+ | Package / Type | Key Features | Difference from `widpath` |
127
+ | -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------ |
128
+ | **widpath** (this package) | WID-based slicing, hierarchical path generation, and binary search | Specifically designed for WID management, enabling fast storage path discovery |
129
+ | `wildpath` | Wildcard-based access to data structures | Unrelated to hierarchical filesystem path organization |
130
+ | `path` / `path.py` | More user-friendly path manipulation APIs | Focuses on path operations rather than WID-based hierarchical storage strategies |
131
+ | Standard Library `pathlib` | Object-oriented, cross-platform path handling | Provides general path operations only, without hierarchical partitioning or binary search capabilities |
132
+
133
+ ### Conclusion
134
+
135
+ widpath introduces a dedicated hierarchical file organization and lookup mechanism tailored for WIDs. It complements existing general-purpose path libraries by providing efficient storage path management and fast lookup capabilities for large-scale WID-based datasets.
136
+
137
+ ---
138
+
139
+ ## 中文说明
140
+
141
+ **widpath** 将WID字符串(UUID4或任意等长十六进制ID)映射到分层文件路径,
142
+ 避免单目录下文件过多,同时支持 O(1)级别的点查询,无需数据库。
143
+
144
+ ### 核心原理
145
+
146
+ UUID4 去掉 `-` 后共32个十六进制字符,按每 2 字符分段得到 16 级路径:
147
+
148
+ ```
149
+ 4a3f9c2b... -> 4a / 3f / 9c / 2b / ...
150
+ ```
151
+
152
+ 同一前缀的 WID 共存于同一个 JSON 文件。 文件过大时,调用方将其拆分为更深的子目录,
153
+ widpath 的 `locate` / `get_file_path` 自动找到正确的文件。
154
+
155
+ ### 两种接口
156
+
157
+ - **`locate(base_dir, wid)`**: 顺序遍历,沿着已存在的子目录下探,遇到缺失则返回当前层文件路径。
158
+ - **`WidPathResolver.resolve(wid, base_dir)`**: 二分查找版本,在稀疏目录树上减少 stat 调用次数。
159
+
160
+ 两者在相同文件系统状态下返回相同结果(见 `tests/test_locate.py::TestAlgorithmConsistency`)。
161
+
162
+ ---
163
+
164
+ ## Development
165
+
166
+ ```bash
167
+ git clone https://github.com/junsxu/widpath
168
+ cd widpath
169
+ pip install -e ".[dev]"
170
+ pytest # run all tests (except perf)
171
+ pytest -m perf # run performance benchmarks
172
+ ruff check widpath tests # lint
173
+ mypy widpath # type check
174
+ ```
175
+
176
+ ---
177
+
178
+ ## License
179
+
180
+ MIT @ junsxu / silmoony.com
@@ -0,0 +1,160 @@
1
+ # widpath
2
+
3
+ [![CI](https://github.com/junsxu/widpath/actions/workflows/ci.yml/badge.svg)](https://github.com/junsxu/widpath/actions/workflows/ci/yml)
4
+ [![PyPI version](https://badge.fury.io/py/widpath.svg)](https://pypi.org/project/widpath/)
5
+ [![Python](https://img.shields.io/pypi/pyversions/widpath)](https://pypi.org/project/widpath/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
7
+
8
+ **widpath** maps WID strings (UUID4 or any fixed-length hex ID) to a hierarchical file-system path tree, keeping directory entry counts bounded while supporting O(1) point lookup - no database required.
9
+
10
+ ---
11
+
12
+ ## What problem does it solve?
13
+
14
+ Storing millions of UUID-keyed JSON files in a flat directory causes performance problems on every major OS (HFS+, ext4, NTFS all degrade beyong ~100 k entries per directory).
15
+
16
+ widpath borrows the idea from Git's objext store (`.git/objects/ab/cdef...`) and generalises it to **adaptive depth**: a single JSON file at a shallow level holds all WIDs that share the same prefix. When that file grows too large, the caller splits it into deeper sub-files - and widpath's `locate` / `resolve` find the right file in at most **16 stat calls** for a 32-char UUID.
17
+
18
+ ```
19
+ data/nodes/
20
+ ├── 8b.json ← all WIDs starting with "8b" (few entries, stays shallow)
21
+ ├── 4a/
22
+ | ├── 3f.json ← split: "4a3f..." WIDs moved here
23
+ | └── b7.json ← split: "4ab7..." WIDs moved here
24
+ └── ...
25
+ ```
26
+
27
+ ---
28
+
29
+ ## Install
30
+
31
+ ```bash
32
+ pip install widpath
33
+ ```
34
+
35
+ Requires Python ≥ 3.8, no third-party dependencies.
36
+
37
+ ---
38
+
39
+ ## Quick start
40
+
41
+ ```python
42
+ from pathlib import Path
43
+ from widpath import locate, WidPathResolver
44
+
45
+ base = Path("data/nodes")
46
+ wid = "4a3f9c2b1e0d5678abcd1234567890ab" # UUID4 with dashes stripped
47
+
48
+ # ── Functional interface (canonical, O(depth) linear scan) ─────────────────
49
+ path = locate(base, wid)
50
+ # -> PosixPath('data/nodes/4a.json') when base/ is empty
51
+
52
+ # ── OOP interface (binary-search variant, O(log depth)) ────────────────────
53
+ resolver = WidPathResolver()
54
+ path = resolver.srsolve(wid, base)
55
+ # same result
56
+ ```
57
+
58
+ > **Note:** Strip UUID dashes before passing to widpath:
59
+ > `wid = uuid_str.replace("-", "")`
60
+
61
+ ---
62
+
63
+ ## API reference
64
+
65
+ ### `locate(base_dir, wid, size=2) -> Path`
66
+
67
+ Canonical O(depth) algorithm. Greedily descends into existing subdirectories
68
+ named by successive WID segments, stopping at the first missing directory and
69
+ returning `<current>/<segment>.json`.
70
+
71
+ | Parameter | Type | Default | Description |
72
+ |-----------|------|---------|-------------|
73
+ | `base_dir` | `Path` | - | Root storage directory |
74
+ | `wid` | `str` | - | Hex string, dashes removed |
75
+ | `size` | `int` | `2` | Chars per path segment |
76
+
77
+ ---
78
+
79
+ ### `WidPathResolver(size=2)`
80
+
81
+ OOP interface with a binary-search implementation of path location.
82
+
83
+ | Method | Description |
84
+ |--------|-------------|
85
+ | `resolve(wid, base_dir)` | Locate file via binary search. Raises `FileNotFoundError` if `base_dir` missing. |
86
+ | `path_at_level(wid, level)` | Build the **relative** path for `wid` at depth `level`. |
87
+ | `max_level(wid)` | Maximum depth level = `len(wid) // size - 1`. |
88
+ | `candidate_paths(wid, base_dir)` | All candidate paths from shallowest to deepest. |
89
+
90
+ ---
91
+
92
+ ## Comparison with Git object store
93
+
94
+ | Feature | Git object store | widpath |
95
+ |---------|------------------|---------|
96
+ | Hash algorithm | SHA1 / SHA256 | Any hex string (UUID, SHA, etc.) |
97
+ | Directory depth | Fixed 2 levels | Adaptive 1-16 levels |
98
+ | File format | Binary blobs | Caller-defined (JSON, etc.) |
99
+ | Multiple objects per file | No (1 object = 1 file) | Yes (bucket file holds many) |
100
+ | Split strategy | `git gc` packs loose objects | Caller splits bucket files on overflow |
101
+
102
+
103
+ ## Comparison with Existing Solutions
104
+
105
+ ### Several path manipulation libraries are commonly available on PyPI:
106
+ | Package / Type | Key Features | Difference from `widpath` |
107
+ | -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------ |
108
+ | **widpath** (this package) | WID-based slicing, hierarchical path generation, and binary search | Specifically designed for WID management, enabling fast storage path discovery |
109
+ | `wildpath` | Wildcard-based access to data structures | Unrelated to hierarchical filesystem path organization |
110
+ | `path` / `path.py` | More user-friendly path manipulation APIs | Focuses on path operations rather than WID-based hierarchical storage strategies |
111
+ | Standard Library `pathlib` | Object-oriented, cross-platform path handling | Provides general path operations only, without hierarchical partitioning or binary search capabilities |
112
+
113
+ ### Conclusion
114
+
115
+ widpath introduces a dedicated hierarchical file organization and lookup mechanism tailored for WIDs. It complements existing general-purpose path libraries by providing efficient storage path management and fast lookup capabilities for large-scale WID-based datasets.
116
+
117
+ ---
118
+
119
+ ## 中文说明
120
+
121
+ **widpath** 将WID字符串(UUID4或任意等长十六进制ID)映射到分层文件路径,
122
+ 避免单目录下文件过多,同时支持 O(1)级别的点查询,无需数据库。
123
+
124
+ ### 核心原理
125
+
126
+ UUID4 去掉 `-` 后共32个十六进制字符,按每 2 字符分段得到 16 级路径:
127
+
128
+ ```
129
+ 4a3f9c2b... -> 4a / 3f / 9c / 2b / ...
130
+ ```
131
+
132
+ 同一前缀的 WID 共存于同一个 JSON 文件。 文件过大时,调用方将其拆分为更深的子目录,
133
+ widpath 的 `locate` / `get_file_path` 自动找到正确的文件。
134
+
135
+ ### 两种接口
136
+
137
+ - **`locate(base_dir, wid)`**: 顺序遍历,沿着已存在的子目录下探,遇到缺失则返回当前层文件路径。
138
+ - **`WidPathResolver.resolve(wid, base_dir)`**: 二分查找版本,在稀疏目录树上减少 stat 调用次数。
139
+
140
+ 两者在相同文件系统状态下返回相同结果(见 `tests/test_locate.py::TestAlgorithmConsistency`)。
141
+
142
+ ---
143
+
144
+ ## Development
145
+
146
+ ```bash
147
+ git clone https://github.com/junsxu/widpath
148
+ cd widpath
149
+ pip install -e ".[dev]"
150
+ pytest # run all tests (except perf)
151
+ pytest -m perf # run performance benchmarks
152
+ ruff check widpath tests # lint
153
+ mypy widpath # type check
154
+ ```
155
+
156
+ ---
157
+
158
+ ## License
159
+
160
+ MIT @ junsxu / silmoony.com
@@ -0,0 +1,65 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "widpath"
7
+ version = "0.2.0"
8
+ description = "Hierarchical file-path resolver for WID-based storage"
9
+ authors = [
10
+ { name = "sheng.SMLH", email = "smlh.sheng@gmail.com" },
11
+ { name = "junsxu", email = "sheng@silmoony.com"}
12
+ ]
13
+ readme = "README.md"
14
+ license = { text = "MIT" }
15
+ requires-python = ">=3.9"
16
+ keywords = ["wid", "file storage", "hierarchical path", "uuid", "graph"]
17
+ classifies = [
18
+ "Development Status :: 4 - Beta",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: Software Development :: Libraries",
28
+ "Topic :: System :: Filesystems",
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/junsxu/widpath"
33
+ Repository = "https://github.com/junsxu/widpath"
34
+ "Bug Tracker" = "https://github.com/junsxu/widpath/issues"
35
+ Changelog = "https://github.com/junsxu/widpath/blob/main/CHANGELOG.md"
36
+
37
+ [project.optional-dependencies]
38
+ dev = [
39
+ "pytest>=7.0",
40
+ "pytest-cov>=4.0",
41
+ "ruff>=0.4",
42
+ "mypy>=1.0",
43
+ ]
44
+
45
+ [tool.pytest.ini_options]
46
+ testpaths = ["tests"]
47
+ addopts = "--cov=widpath --cov-report=term-missing --cov-fail-under=95"
48
+ markers = [
49
+ "perf: performance benchmarks (deselect with '-m not perf')",
50
+ ]
51
+
52
+ [tool.coverage.run]
53
+ source = ["widpath"]
54
+
55
+ [tool.ruff]
56
+ line-length = 100
57
+ target-version = "py39"
58
+
59
+ [tool.ruff.lint]
60
+ select = ["E", "F", "I", "UP"]
61
+
62
+ [tool.mypy]
63
+ python_version = "3.9"
64
+ strict = true
65
+ exclude = ["tests/"]