widpath 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- widpath-0.2.0/.github/workflows/ci.yml +44 -0
- widpath-0.2.0/.github/workflows/publish.yml +61 -0
- widpath-0.2.0/.gitignore +13 -0
- widpath-0.2.0/CHANGELOG.md +70 -0
- widpath-0.2.0/CONTRIBUTING.md +68 -0
- widpath-0.2.0/LICENSE +21 -0
- widpath-0.2.0/PKG-INFO +180 -0
- widpath-0.2.0/README.md +160 -0
- widpath-0.2.0/pyproject.toml +65 -0
- widpath-0.2.0/src/widpath/__init__.py +21 -0
- widpath-0.2.0/src/widpath/resolver.py +192 -0
- widpath-0.2.0/tests/conftest.py +19 -0
- widpath-0.2.0/tests/test_edge_cases.py +96 -0
- widpath-0.2.0/tests/test_levels.py +78 -0
- widpath-0.2.0/tests/test_locate.py +114 -0
- widpath-0.2.0/tests/test_perf.py +51 -0
- widpath-0.2.0/tests/test_split.py +42 -0
- widpath-0.1.1/LICENSE +0 -0
- widpath-0.1.1/PKG-INFO +0 -69
- widpath-0.1.1/README.md +0 -57
- widpath-0.1.1/pyproject.toml +0 -18
- widpath-0.1.1/setup.cfg +0 -4
- widpath-0.1.1/src/widpath/__init__.py +0 -3
- widpath-0.1.1/src/widpath/resolver.py +0 -45
- widpath-0.1.1/src/widpath.egg-info/PKG-INFO +0 -69
- widpath-0.1.1/src/widpath.egg-info/SOURCES.txt +0 -10
- widpath-0.1.1/src/widpath.egg-info/dependency_links.txt +0 -1
- widpath-0.1.1/src/widpath.egg-info/top_level.txt +0 -1
- widpath-0.1.1/tests/test_resolver.py +0 -53
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ["main"]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
strategy:
|
|
14
|
+
fail-fast: false
|
|
15
|
+
matrix:
|
|
16
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
22
|
+
uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: ${{ matrix.python-version }}
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Lint (ruff)
|
|
30
|
+
run: ruff check src tests
|
|
31
|
+
|
|
32
|
+
- name: Type check (mypy)
|
|
33
|
+
if: matrix.python-version != '3.9'
|
|
34
|
+
run: mypy src
|
|
35
|
+
|
|
36
|
+
- name: Run tests
|
|
37
|
+
run: pytest -m "not perf"
|
|
38
|
+
|
|
39
|
+
- name: Upload coverage
|
|
40
|
+
if: matrix.python-version == '3.13'
|
|
41
|
+
uses: actions/upload-artifact@v4
|
|
42
|
+
with:
|
|
43
|
+
name: coverage-report
|
|
44
|
+
path: .coverage
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
test:
|
|
9
|
+
name: Run tests before publish
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: actions/setup-python@v5
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.12"
|
|
16
|
+
- run: pip install -e ".[dev]"
|
|
17
|
+
- run: pytest -m "not perf"
|
|
18
|
+
|
|
19
|
+
build:
|
|
20
|
+
name: Build distribution
|
|
21
|
+
needs: test
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: "3.12"
|
|
29
|
+
|
|
30
|
+
- name: Install build
|
|
31
|
+
run: pip install build
|
|
32
|
+
|
|
33
|
+
- name: Build wheel and distribution
|
|
34
|
+
run: python -m build
|
|
35
|
+
|
|
36
|
+
- name: Upload dist artifacts
|
|
37
|
+
uses: actions/upload-artifact@v4
|
|
38
|
+
with:
|
|
39
|
+
name: dist
|
|
40
|
+
path: dist/
|
|
41
|
+
|
|
42
|
+
publish:
|
|
43
|
+
name: Publish to PyPI
|
|
44
|
+
needs: build
|
|
45
|
+
runs-on: ubuntu-latest
|
|
46
|
+
environment:
|
|
47
|
+
name: pypi
|
|
48
|
+
url: https://pypi.org/project/widpath
|
|
49
|
+
permissions:
|
|
50
|
+
id-token: write # Required for OIDC Trusted Publishing
|
|
51
|
+
|
|
52
|
+
steps:
|
|
53
|
+
- name: Download dist artifacts
|
|
54
|
+
uses: actions/download-artifact@v4
|
|
55
|
+
with:
|
|
56
|
+
name: dist
|
|
57
|
+
path: dist/
|
|
58
|
+
|
|
59
|
+
- name: Publish to PyPI
|
|
60
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
61
|
+
# No api_token needed - uses OIDC Trunsted Publishing configured on PyPI
|
widpath-0.2.0/.gitignore
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## [Unreleased]
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
- **Breaking:** Renamed `WidPathResolver` methods to be more Pythonic and precise:
|
|
14
|
+
- `get_file_path(wid, base_dir)` -> `resolve(wid, base_dir)` - aligns with the class name *Resolver*.
|
|
15
|
+
- `get_hierarchical_json(wid, level)` -> `path_at_level(wid, level)` - describes what the method returns rather than the output format.
|
|
16
|
+
- `get_max_level(wid)` -> `max_level(wid)` - drops the un-Pythonic `get_` prefix from a pure-computation method.
|
|
17
|
+
- `get_candidate_paths(wid, base_dir)` -> `candidate_paths(wid, base_dir)` - same rationale as above.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## [0.2.0] - 2026-06-03
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- `locate(base_dir, wid, size=2)` - module-level functional interface implementing
|
|
25
|
+
the canonical O(depth) linear-scan algorithm from the widpath specification.
|
|
26
|
+
- `WidPathResolver.get_candidate_paths(wid, base_dir)` - returns all candidate paths
|
|
27
|
+
from shallowest to deepest; useful for debugging and tooling.
|
|
28
|
+
- `WidPathResolver.__init__.py` now exports `__version__ = "0.2.0"`.
|
|
29
|
+
- Full test suite: `test_split`, `test_levels`, `test_locate`, `test_edge_cases`,
|
|
30
|
+
`test_perf` - coverage ≥ 95 %.
|
|
31
|
+
- GitHub Actions CI workflow (Python 3.8-3.12 matrix, ruff, mypy).
|
|
32
|
+
- GitHub Actions publish workflow (OIDC Trusted Publishing -> PyPI on Release).
|
|
33
|
+
- Bilingual README (EN + CN).
|
|
34
|
+
- `pyproject.toml` replaces legacy `setup.py`.
|
|
35
|
+
|
|
36
|
+
### Changed
|
|
37
|
+
- **Breaking:** `WidPathResolver.get_file_path(wid)` now requires a mandatory
|
|
38
|
+
`base_dir: Path` argument. Previously the method used the process CWD
|
|
39
|
+
implicitly, which was unsafe in library code.
|
|
40
|
+
- `WidPathResolver.get_hierarchical_json` now uses `pathlib` path composition
|
|
41
|
+
(`Path(*parts).with_suffix(".json")`) instead of string join with a
|
|
42
|
+
configurable `separator`.
|
|
43
|
+
- `WidPathResolver.__init__` no longer accepts a `separator` parameter
|
|
44
|
+
(removed - `pathlib` handles OS-native separators automatically).
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
- `get_file_path` now raises `FileNotFoundError` when `base_dir` does not exist,
|
|
48
|
+
instead of silently returning an invalid path.
|
|
49
|
+
- `get_max_level == 0` edge case (WID length equals `size`) is now handled
|
|
50
|
+
explicitly with an early return.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## [0.1.1] - 2025-10-08
|
|
55
|
+
|
|
56
|
+
### Fixed
|
|
57
|
+
- Minor metadata corrections in package distribution.
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## [0.1.0] - 2025-09-21
|
|
62
|
+
|
|
63
|
+
### Added
|
|
64
|
+
- Initial release.
|
|
65
|
+
- `WidPathResolver` with `get_file_path`, `get_hierarchical_json`, `get_max_level`.
|
|
66
|
+
|
|
67
|
+
[Unreleased]: https://github.com/junsxu/widpath/compare/v0.2.0...HEAD
|
|
68
|
+
[0.2.0]: https://github.com/junsxu/widpath/compare/v0.1.1...v0.2.0
|
|
69
|
+
[0.1.1]: https://github.com/junsxu/widpath/compare/v0.1.0...v0.1.1
|
|
70
|
+
[0.1.0]: https://github.com/junsxu/widpath/releases/tag/v0.1.0
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Contributing to widpath
|
|
2
|
+
|
|
3
|
+
Thank you for considering a contribution!
|
|
4
|
+
|
|
5
|
+
## Local development setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/junsxu/widpath
|
|
9
|
+
cd widpath
|
|
10
|
+
pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Running tests
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pytest # all tests except perf benchmarks
|
|
17
|
+
pytest -m perf # performance benchmarks only
|
|
18
|
+
pytest --cov-report=html # generate HTML coverage report in htmlcov/
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
The CI gate requires **≥ 95% coverage**.
|
|
22
|
+
|
|
23
|
+
## Linting and type checking
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
ruff check widpath tests # lint
|
|
27
|
+
ruff format widpath tests # auto-format
|
|
28
|
+
mypy widpath # strict type checking
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
All checks must pass before a PR can be merged.
|
|
32
|
+
|
|
33
|
+
## Branch naming
|
|
34
|
+
|
|
35
|
+
| Type | Pattern | Example |
|
|
36
|
+
|------|---------|---------|
|
|
37
|
+
| Feature | `feat/<short-desc>` | `feat/locate-function` |
|
|
38
|
+
| Bug fix | `fix/<short-desc>` | `fix/base-dir-missing` |
|
|
39
|
+
| Docs | `docs/<short-desc>` | `docs/readme-cn` |
|
|
40
|
+
| Refactor | `refactor/<short-desc>` | `refactor/pathlib-join` |
|
|
41
|
+
|
|
42
|
+
## Commit style
|
|
43
|
+
|
|
44
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/):
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
feat: add locate() functional interface
|
|
48
|
+
fix: raise FileNotFoundError when base_dir missing
|
|
49
|
+
docs: add Chinese README section
|
|
50
|
+
test: add edge cases for single-segment WID
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Pull request checklist
|
|
54
|
+
|
|
55
|
+
- [ ] Tests added or updated
|
|
56
|
+
- [ ] `pytest` passes locally (coverage ≥ 95 %)
|
|
57
|
+
- [ ] `ruff check` and `mypy` pass
|
|
58
|
+
- [ ] `CHANGELOG.md` updated under `[Unreleased]`
|
|
59
|
+
- [ ] PR description explains *why* the change is needed
|
|
60
|
+
|
|
61
|
+
## Release process (maintainers only)
|
|
62
|
+
|
|
63
|
+
1. Update `version` in `pyproject.toml` and `widpath/__init__.py`.
|
|
64
|
+
2. Move `[Unreleased]` entries in `CHANGELOG.md` to a new versioned section.
|
|
65
|
+
3. Commit: `chore: bump version to v0.x.y`.
|
|
66
|
+
4. Push to `main`, then create a **GitHub Release** with tag `v0.x.y`.
|
|
67
|
+
5. The `publish.yml` workflow triggers automatically and publishes to PyPI via
|
|
68
|
+
OIDC Trusted Publishing (no API token required).
|
widpath-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 junsxu
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
widpath-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: widpath
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Hierarchical file-path resolver for WID-based storage
|
|
5
|
+
Project-URL: Homepage, https://github.com/junsxu/widpath
|
|
6
|
+
Project-URL: Repository, https://github.com/junsxu/widpath
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/junsxu/widpath/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/junsxu/widpath/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: "sheng.SMLH" <smlh.sheng@gmail.com>, junsxu <sheng@silmoony.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: file storage,graph,hierarchical path,uuid,wid
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# widpath
|
|
22
|
+
|
|
23
|
+
[](https://github.com/junsxu/widpath/actions/workflows/ci/yml)
|
|
24
|
+
[](https://pypi.org/project/widpath/)
|
|
25
|
+
[](https://pypi.org/project/widpath/)
|
|
26
|
+
[](LICENSE)
|
|
27
|
+
|
|
28
|
+
**widpath** maps WID strings (UUID4 or any fixed-length hex ID) to a hierarchical file-system path tree, keeping directory entry counts bounded while supporting O(1) point lookup - no database required.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## What problem does it solve?
|
|
33
|
+
|
|
34
|
+
Storing millions of UUID-keyed JSON files in a flat directory causes performance problems on every major OS (HFS+, ext4, NTFS all degrade beyong ~100 k entries per directory).
|
|
35
|
+
|
|
36
|
+
widpath borrows the idea from Git's objext store (`.git/objects/ab/cdef...`) and generalises it to **adaptive depth**: a single JSON file at a shallow level holds all WIDs that share the same prefix. When that file grows too large, the caller splits it into deeper sub-files - and widpath's `locate` / `resolve` find the right file in at most **16 stat calls** for a 32-char UUID.
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
data/nodes/
|
|
40
|
+
├── 8b.json ← all WIDs starting with "8b" (few entries, stays shallow)
|
|
41
|
+
├── 4a/
|
|
42
|
+
| ├── 3f.json ← split: "4a3f..." WIDs moved here
|
|
43
|
+
| └── b7.json ← split: "4ab7..." WIDs moved here
|
|
44
|
+
└── ...
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Install
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install widpath
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Requires Python ≥ 3.8, no third-party dependencies.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Quick start
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from pathlib import Path
|
|
63
|
+
from widpath import locate, WidPathResolver
|
|
64
|
+
|
|
65
|
+
base = Path("data/nodes")
|
|
66
|
+
wid = "4a3f9c2b1e0d5678abcd1234567890ab" # UUID4 with dashes stripped
|
|
67
|
+
|
|
68
|
+
# ── Functional interface (canonical, O(depth) linear scan) ─────────────────
|
|
69
|
+
path = locate(base, wid)
|
|
70
|
+
# -> PosixPath('data/nodes/4a.json') when base/ is empty
|
|
71
|
+
|
|
72
|
+
# ── OOP interface (binary-search variant, O(log depth)) ────────────────────
|
|
73
|
+
resolver = WidPathResolver()
|
|
74
|
+
path = resolver.srsolve(wid, base)
|
|
75
|
+
# same result
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
> **Note:** Strip UUID dashes before passing to widpath:
|
|
79
|
+
> `wid = uuid_str.replace("-", "")`
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## API reference
|
|
84
|
+
|
|
85
|
+
### `locate(base_dir, wid, size=2) -> Path`
|
|
86
|
+
|
|
87
|
+
Canonical O(depth) algorithm. Greedily descends into existing subdirectories
|
|
88
|
+
named by successive WID segments, stopping at the first missing directory and
|
|
89
|
+
returning `<current>/<segment>.json`.
|
|
90
|
+
|
|
91
|
+
| Parameter | Type | Default | Description |
|
|
92
|
+
|-----------|------|---------|-------------|
|
|
93
|
+
| `base_dir` | `Path` | - | Root storage directory |
|
|
94
|
+
| `wid` | `str` | - | Hex string, dashes removed |
|
|
95
|
+
| `size` | `int` | `2` | Chars per path segment |
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
### `WidPathResolver(size=2)`
|
|
100
|
+
|
|
101
|
+
OOP interface with a binary-search implementation of path location.
|
|
102
|
+
|
|
103
|
+
| Method | Description |
|
|
104
|
+
|--------|-------------|
|
|
105
|
+
| `resolve(wid, base_dir)` | Locate file via binary search. Raises `FileNotFoundError` if `base_dir` missing. |
|
|
106
|
+
| `path_at_level(wid, level)` | Build the **relative** path for `wid` at depth `level`. |
|
|
107
|
+
| `max_level(wid)` | Maximum depth level = `len(wid) // size - 1`. |
|
|
108
|
+
| `candidate_paths(wid, base_dir)` | All candidate paths from shallowest to deepest. |
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Comparison with Git object store
|
|
113
|
+
|
|
114
|
+
| Feature | Git object store | widpath |
|
|
115
|
+
|---------|------------------|---------|
|
|
116
|
+
| Hash algorithm | SHA1 / SHA256 | Any hex string (UUID, SHA, etc.) |
|
|
117
|
+
| Directory depth | Fixed 2 levels | Adaptive 1-16 levels |
|
|
118
|
+
| File format | Binary blobs | Caller-defined (JSON, etc.) |
|
|
119
|
+
| Multiple objects per file | No (1 object = 1 file) | Yes (bucket file holds many) |
|
|
120
|
+
| Split strategy | `git gc` packs loose objects | Caller splits bucket files on overflow |
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
## Comparison with Existing Solutions
|
|
124
|
+
|
|
125
|
+
### Several path manipulation libraries are commonly available on PyPI:
|
|
126
|
+
| Package / Type | Key Features | Difference from `widpath` |
|
|
127
|
+
| -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------ |
|
|
128
|
+
| **widpath** (this package) | WID-based slicing, hierarchical path generation, and binary search | Specifically designed for WID management, enabling fast storage path discovery |
|
|
129
|
+
| `wildpath` | Wildcard-based access to data structures | Unrelated to hierarchical filesystem path organization |
|
|
130
|
+
| `path` / `path.py` | More user-friendly path manipulation APIs | Focuses on path operations rather than WID-based hierarchical storage strategies |
|
|
131
|
+
| Standard Library `pathlib` | Object-oriented, cross-platform path handling | Provides general path operations only, without hierarchical partitioning or binary search capabilities |
|
|
132
|
+
|
|
133
|
+
### Conclusion
|
|
134
|
+
|
|
135
|
+
widpath introduces a dedicated hierarchical file organization and lookup mechanism tailored for WIDs. It complements existing general-purpose path libraries by providing efficient storage path management and fast lookup capabilities for large-scale WID-based datasets.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## 中文说明
|
|
140
|
+
|
|
141
|
+
**widpath** 将WID字符串(UUID4或任意等长十六进制ID)映射到分层文件路径,
|
|
142
|
+
避免单目录下文件过多,同时支持 O(1)级别的点查询,无需数据库。
|
|
143
|
+
|
|
144
|
+
### 核心原理
|
|
145
|
+
|
|
146
|
+
UUID4 去掉 `-` 后共32个十六进制字符,按每 2 字符分段得到 16 级路径:
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
4a3f9c2b... -> 4a / 3f / 9c / 2b / ...
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
同一前缀的 WID 共存于同一个 JSON 文件。 文件过大时,调用方将其拆分为更深的子目录,
|
|
153
|
+
widpath 的 `locate` / `get_file_path` 自动找到正确的文件。
|
|
154
|
+
|
|
155
|
+
### 两种接口
|
|
156
|
+
|
|
157
|
+
- **`locate(base_dir, wid)`**: 顺序遍历,沿着已存在的子目录下探,遇到缺失则返回当前层文件路径。
|
|
158
|
+
- **`WidPathResolver.resolve(wid, base_dir)`**: 二分查找版本,在稀疏目录树上减少 stat 调用次数。
|
|
159
|
+
|
|
160
|
+
两者在相同文件系统状态下返回相同结果(见 `tests/test_locate.py::TestAlgorithmConsistency`)。
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Development
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
git clone https://github.com/junsxu/widpath
|
|
168
|
+
cd widpath
|
|
169
|
+
pip install -e ".[dev]"
|
|
170
|
+
pytest # run all tests (except perf)
|
|
171
|
+
pytest -m perf # run performance benchmarks
|
|
172
|
+
ruff check widpath tests # lint
|
|
173
|
+
mypy widpath # type check
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT @ junsxu / silmoony.com
|
widpath-0.2.0/README.md
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# widpath
|
|
2
|
+
|
|
3
|
+
[](https://github.com/junsxu/widpath/actions/workflows/ci/yml)
|
|
4
|
+
[](https://pypi.org/project/widpath/)
|
|
5
|
+
[](https://pypi.org/project/widpath/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
**widpath** maps WID strings (UUID4 or any fixed-length hex ID) to a hierarchical file-system path tree, keeping directory entry counts bounded while supporting O(1) point lookup - no database required.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## What problem does it solve?
|
|
13
|
+
|
|
14
|
+
Storing millions of UUID-keyed JSON files in a flat directory causes performance problems on every major OS (HFS+, ext4, NTFS all degrade beyong ~100 k entries per directory).
|
|
15
|
+
|
|
16
|
+
widpath borrows the idea from Git's objext store (`.git/objects/ab/cdef...`) and generalises it to **adaptive depth**: a single JSON file at a shallow level holds all WIDs that share the same prefix. When that file grows too large, the caller splits it into deeper sub-files - and widpath's `locate` / `resolve` find the right file in at most **16 stat calls** for a 32-char UUID.
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
data/nodes/
|
|
20
|
+
├── 8b.json ← all WIDs starting with "8b" (few entries, stays shallow)
|
|
21
|
+
├── 4a/
|
|
22
|
+
| ├── 3f.json ← split: "4a3f..." WIDs moved here
|
|
23
|
+
| └── b7.json ← split: "4ab7..." WIDs moved here
|
|
24
|
+
└── ...
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Install
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install widpath
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Requires Python ≥ 3.8, no third-party dependencies.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
from widpath import locate, WidPathResolver
|
|
44
|
+
|
|
45
|
+
base = Path("data/nodes")
|
|
46
|
+
wid = "4a3f9c2b1e0d5678abcd1234567890ab" # UUID4 with dashes stripped
|
|
47
|
+
|
|
48
|
+
# ── Functional interface (canonical, O(depth) linear scan) ─────────────────
|
|
49
|
+
path = locate(base, wid)
|
|
50
|
+
# -> PosixPath('data/nodes/4a.json') when base/ is empty
|
|
51
|
+
|
|
52
|
+
# ── OOP interface (binary-search variant, O(log depth)) ────────────────────
|
|
53
|
+
resolver = WidPathResolver()
|
|
54
|
+
path = resolver.srsolve(wid, base)
|
|
55
|
+
# same result
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
> **Note:** Strip UUID dashes before passing to widpath:
|
|
59
|
+
> `wid = uuid_str.replace("-", "")`
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## API reference
|
|
64
|
+
|
|
65
|
+
### `locate(base_dir, wid, size=2) -> Path`
|
|
66
|
+
|
|
67
|
+
Canonical O(depth) algorithm. Greedily descends into existing subdirectories
|
|
68
|
+
named by successive WID segments, stopping at the first missing directory and
|
|
69
|
+
returning `<current>/<segment>.json`.
|
|
70
|
+
|
|
71
|
+
| Parameter | Type | Default | Description |
|
|
72
|
+
|-----------|------|---------|-------------|
|
|
73
|
+
| `base_dir` | `Path` | - | Root storage directory |
|
|
74
|
+
| `wid` | `str` | - | Hex string, dashes removed |
|
|
75
|
+
| `size` | `int` | `2` | Chars per path segment |
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### `WidPathResolver(size=2)`
|
|
80
|
+
|
|
81
|
+
OOP interface with a binary-search implementation of path location.
|
|
82
|
+
|
|
83
|
+
| Method | Description |
|
|
84
|
+
|--------|-------------|
|
|
85
|
+
| `resolve(wid, base_dir)` | Locate file via binary search. Raises `FileNotFoundError` if `base_dir` missing. |
|
|
86
|
+
| `path_at_level(wid, level)` | Build the **relative** path for `wid` at depth `level`. |
|
|
87
|
+
| `max_level(wid)` | Maximum depth level = `len(wid) // size - 1`. |
|
|
88
|
+
| `candidate_paths(wid, base_dir)` | All candidate paths from shallowest to deepest. |
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Comparison with Git object store
|
|
93
|
+
|
|
94
|
+
| Feature | Git object store | widpath |
|
|
95
|
+
|---------|------------------|---------|
|
|
96
|
+
| Hash algorithm | SHA1 / SHA256 | Any hex string (UUID, SHA, etc.) |
|
|
97
|
+
| Directory depth | Fixed 2 levels | Adaptive 1-16 levels |
|
|
98
|
+
| File format | Binary blobs | Caller-defined (JSON, etc.) |
|
|
99
|
+
| Multiple objects per file | No (1 object = 1 file) | Yes (bucket file holds many) |
|
|
100
|
+
| Split strategy | `git gc` packs loose objects | Caller splits bucket files on overflow |
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
## Comparison with Existing Solutions
|
|
104
|
+
|
|
105
|
+
### Several path manipulation libraries are commonly available on PyPI:
|
|
106
|
+
| Package / Type | Key Features | Difference from `widpath` |
|
|
107
|
+
| -------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------ |
|
|
108
|
+
| **widpath** (this package) | WID-based slicing, hierarchical path generation, and binary search | Specifically designed for WID management, enabling fast storage path discovery |
|
|
109
|
+
| `wildpath` | Wildcard-based access to data structures | Unrelated to hierarchical filesystem path organization |
|
|
110
|
+
| `path` / `path.py` | More user-friendly path manipulation APIs | Focuses on path operations rather than WID-based hierarchical storage strategies |
|
|
111
|
+
| Standard Library `pathlib` | Object-oriented, cross-platform path handling | Provides general path operations only, without hierarchical partitioning or binary search capabilities |
|
|
112
|
+
|
|
113
|
+
### Conclusion
|
|
114
|
+
|
|
115
|
+
widpath introduces a dedicated hierarchical file organization and lookup mechanism tailored for WIDs. It complements existing general-purpose path libraries by providing efficient storage path management and fast lookup capabilities for large-scale WID-based datasets.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## 中文说明
|
|
120
|
+
|
|
121
|
+
**widpath** 将WID字符串(UUID4或任意等长十六进制ID)映射到分层文件路径,
|
|
122
|
+
避免单目录下文件过多,同时支持 O(1)级别的点查询,无需数据库。
|
|
123
|
+
|
|
124
|
+
### 核心原理
|
|
125
|
+
|
|
126
|
+
UUID4 去掉 `-` 后共32个十六进制字符,按每 2 字符分段得到 16 级路径:
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
4a3f9c2b... -> 4a / 3f / 9c / 2b / ...
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
同一前缀的 WID 共存于同一个 JSON 文件。 文件过大时,调用方将其拆分为更深的子目录,
|
|
133
|
+
widpath 的 `locate` / `get_file_path` 自动找到正确的文件。
|
|
134
|
+
|
|
135
|
+
### 两种接口
|
|
136
|
+
|
|
137
|
+
- **`locate(base_dir, wid)`**: 顺序遍历,沿着已存在的子目录下探,遇到缺失则返回当前层文件路径。
|
|
138
|
+
- **`WidPathResolver.resolve(wid, base_dir)`**: 二分查找版本,在稀疏目录树上减少 stat 调用次数。
|
|
139
|
+
|
|
140
|
+
两者在相同文件系统状态下返回相同结果(见 `tests/test_locate.py::TestAlgorithmConsistency`)。
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Development
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
git clone https://github.com/junsxu/widpath
|
|
148
|
+
cd widpath
|
|
149
|
+
pip install -e ".[dev]"
|
|
150
|
+
pytest # run all tests (except perf)
|
|
151
|
+
pytest -m perf # run performance benchmarks
|
|
152
|
+
ruff check widpath tests # lint
|
|
153
|
+
mypy widpath # type check
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## License
|
|
159
|
+
|
|
160
|
+
MIT @ junsxu / silmoony.com
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "widpath"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Hierarchical file-path resolver for WID-based storage"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "sheng.SMLH", email = "smlh.sheng@gmail.com" },
|
|
11
|
+
{ name = "junsxu", email = "sheng@silmoony.com"}
|
|
12
|
+
]
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
license = { text = "MIT" }
|
|
15
|
+
requires-python = ">=3.9"
|
|
16
|
+
keywords = ["wid", "file storage", "hierarchical path", "uuid", "graph"]
|
|
17
|
+
classifies = [
|
|
18
|
+
"Development Status :: 4 - Beta",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Topic :: Software Development :: Libraries",
|
|
28
|
+
"Topic :: System :: Filesystems",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/junsxu/widpath"
|
|
33
|
+
Repository = "https://github.com/junsxu/widpath"
|
|
34
|
+
"Bug Tracker" = "https://github.com/junsxu/widpath/issues"
|
|
35
|
+
Changelog = "https://github.com/junsxu/widpath/blob/main/CHANGELOG.md"
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
dev = [
|
|
39
|
+
"pytest>=7.0",
|
|
40
|
+
"pytest-cov>=4.0",
|
|
41
|
+
"ruff>=0.4",
|
|
42
|
+
"mypy>=1.0",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[tool.pytest.ini_options]
|
|
46
|
+
testpaths = ["tests"]
|
|
47
|
+
addopts = "--cov=widpath --cov-report=term-missing --cov-fail-under=95"
|
|
48
|
+
markers = [
|
|
49
|
+
"perf: performance benchmarks (deselect with '-m not perf')",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tool.coverage.run]
|
|
53
|
+
source = ["widpath"]
|
|
54
|
+
|
|
55
|
+
[tool.ruff]
|
|
56
|
+
line-length = 100
|
|
57
|
+
target-version = "py39"
|
|
58
|
+
|
|
59
|
+
[tool.ruff.lint]
|
|
60
|
+
select = ["E", "F", "I", "UP"]
|
|
61
|
+
|
|
62
|
+
[tool.mypy]
|
|
63
|
+
python_version = "3.9"
|
|
64
|
+
strict = true
|
|
65
|
+
exclude = ["tests/"]
|