vextor 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. vextor-0.1.0/.clang-format +5 -0
  2. vextor-0.1.0/.clangd +3 -0
  3. vextor-0.1.0/.github/workflows/ci.yml +101 -0
  4. vextor-0.1.0/.github/workflows/publish.yml +49 -0
  5. vextor-0.1.0/.gitignore +16 -0
  6. vextor-0.1.0/.pre-commit-config.yaml +6 -0
  7. vextor-0.1.0/CMakeLists.txt +75 -0
  8. vextor-0.1.0/CMakePresets.json +52 -0
  9. vextor-0.1.0/LICENSE +21 -0
  10. vextor-0.1.0/PKG-INFO +207 -0
  11. vextor-0.1.0/README.md +170 -0
  12. vextor-0.1.0/benchmarks/CMakeLists.txt +30 -0
  13. vextor-0.1.0/benchmarks/core_distance_bench.cpp +81 -0
  14. vextor-0.1.0/benchmarks/core_sq8_bench.cpp +100 -0
  15. vextor-0.1.0/benchmarks/index_flat_index_bench.cpp +52 -0
  16. vextor-0.1.0/benchmarks/index_hnsw_index_bench.cpp +150 -0
  17. vextor-0.1.0/benchmarks/perf_test.cpp +48 -0
  18. vextor-0.1.0/benchmarks/sift1m/CMakeLists.txt +2 -0
  19. vextor-0.1.0/benchmarks/sift1m/download.sh +42 -0
  20. vextor-0.1.0/benchmarks/sift1m/results.md +11 -0
  21. vextor-0.1.0/benchmarks/sift1m/sift1m_bench.cpp +307 -0
  22. vextor-0.1.0/docs/PRD.md +197 -0
  23. vextor-0.1.0/pyproject.toml +34 -0
  24. vextor-0.1.0/python/CMakeLists.txt +48 -0
  25. vextor-0.1.0/python/bindings.cpp +66 -0
  26. vextor-0.1.0/python/test_vextor.py +79 -0
  27. vextor-0.1.0/src/core/distance.cpp +48 -0
  28. vextor-0.1.0/src/core/distance.h +10 -0
  29. vextor-0.1.0/src/core/query_result.h +12 -0
  30. vextor-0.1.0/src/core/search_result.h +23 -0
  31. vextor-0.1.0/src/core/sq8.cpp +127 -0
  32. vextor-0.1.0/src/core/sq8.h +34 -0
  33. vextor-0.1.0/src/core/types.h +11 -0
  34. vextor-0.1.0/src/index/.gitkeep +0 -0
  35. vextor-0.1.0/src/index/flat_index.h +53 -0
  36. vextor-0.1.0/src/index/hnsw_index.h +387 -0
  37. vextor-0.1.0/src/persistence/.gitkeep +0 -0
  38. vextor-0.1.0/src/persistence/format.h +47 -0
  39. vextor-0.1.0/src/persistence/loader.cpp +136 -0
  40. vextor-0.1.0/src/persistence/loader.h +15 -0
  41. vextor-0.1.0/src/persistence/serializer.cpp +87 -0
  42. vextor-0.1.0/src/persistence/serializer.h +12 -0
  43. vextor-0.1.0/src/segment/.gitkeep +0 -0
  44. vextor-0.1.0/src/segment/active_segment.cpp +55 -0
  45. vextor-0.1.0/src/segment/active_segment.h +44 -0
  46. vextor-0.1.0/src/segment/id_mapping.cpp +33 -0
  47. vextor-0.1.0/src/segment/id_mapping.h +27 -0
  48. vextor-0.1.0/src/segment/sealed_segment.cpp +52 -0
  49. vextor-0.1.0/src/segment/sealed_segment.h +60 -0
  50. vextor-0.1.0/src/segment/segment_manager.cpp +172 -0
  51. vextor-0.1.0/src/segment/segment_manager.h +50 -0
  52. vextor-0.1.0/src/store/.gitkeep +0 -0
  53. vextor-0.1.0/src/store/concept.h +17 -0
  54. vextor-0.1.0/src/store/in_memory_store.cpp +35 -0
  55. vextor-0.1.0/src/store/in_memory_store.h +33 -0
  56. vextor-0.1.0/src/store/mmap_store.cpp +149 -0
  57. vextor-0.1.0/src/store/mmap_store.h +40 -0
  58. vextor-0.1.0/src/vextor/vextor.h +19 -0
  59. vextor-0.1.0/tests/CMakeLists.txt +29 -0
  60. vextor-0.1.0/tests/core_distance_test.cpp +68 -0
  61. vextor-0.1.0/tests/core_sq8_test.cpp +141 -0
  62. vextor-0.1.0/tests/core_types_test.cpp +79 -0
  63. vextor-0.1.0/tests/index_flat_index_test.cpp +140 -0
  64. vextor-0.1.0/tests/index_hnsw_index_test.cpp +254 -0
  65. vextor-0.1.0/tests/persistence_test.cpp +150 -0
  66. vextor-0.1.0/tests/segment_active_segment_test.cpp +122 -0
  67. vextor-0.1.0/tests/segment_id_mapping_test.cpp +67 -0
  68. vextor-0.1.0/tests/segment_sealed_segment_test.cpp +108 -0
  69. vextor-0.1.0/tests/segment_segment_manager_test.cpp +206 -0
  70. vextor-0.1.0/tests/store_in_memory_store_test.cpp +82 -0
  71. vextor-0.1.0/tests/store_mmap_store_test.cpp +144 -0
  72. vextor-0.1.0/tests/vextor_public_api_test.cpp +27 -0
@@ -0,0 +1,5 @@
1
+ BasedOnStyle: Google
2
+ ColumnLimit: 100
3
+ IndentWidth: 4
4
+ PointerAlignment: Left
5
+ AllowShortFunctionsOnASingleLine: Inline
vextor-0.1.0/.clangd ADDED
@@ -0,0 +1,3 @@
1
+ CompileFlags:
2
+ CompilationDatabase: build
3
+ Add: [-Isrc, -std=c++20]
@@ -0,0 +1,101 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+ push:
7
+ branches: [main]
8
+ workflow_dispatch:
9
+
10
+ permissions:
11
+ contents: read
12
+ actions: write
13
+
14
+ concurrency:
15
+ group: ${{ github.workflow }}-${{ github.ref }}
16
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
17
+
18
+ jobs:
19
+ format-check:
20
+ name: clang-format check
21
+ runs-on: ubuntu-24.04
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - name: Run clang-format check
26
+ uses: jidicula/clang-format-action@v4.14.0
27
+ with:
28
+ clang-format-version: '18'
29
+ check-path: 'src'
30
+
31
+ - name: Check test formatting
32
+ uses: jidicula/clang-format-action@v4.14.0
33
+ with:
34
+ clang-format-version: '18'
35
+ check-path: 'tests'
36
+
37
+ - name: Check benchmark formatting
38
+ uses: jidicula/clang-format-action@v4.14.0
39
+ with:
40
+ clang-format-version: '18'
41
+ check-path: 'benchmarks'
42
+
43
+ build-and-test:
44
+ name: ${{ matrix.compiler }} (${{ matrix.build_type }})
45
+ runs-on: ubuntu-24.04
46
+ strategy:
47
+ fail-fast: false
48
+ matrix:
49
+ include:
50
+ - compiler: gcc-14
51
+ cc: gcc-14
52
+ cxx: g++-14
53
+ build_type: Release
54
+ sanitizers: false
55
+
56
+ - compiler: gcc-14
57
+ cc: gcc-14
58
+ cxx: g++-14
59
+ build_type: Debug
60
+ sanitizers: true
61
+
62
+ - compiler: clang-18
63
+ cc: clang-18
64
+ cxx: clang++-18
65
+ build_type: Release
66
+ sanitizers: false
67
+
68
+ - compiler: clang-18
69
+ cc: clang-18
70
+ cxx: clang++-18
71
+ build_type: Debug
72
+ sanitizers: true
73
+
74
+ steps:
75
+ - uses: actions/checkout@v4
76
+
77
+ - name: Cache FetchContent dependencies
78
+ uses: actions/cache@v4
79
+ with:
80
+ path: build/_deps
81
+ key: fetchcontent-${{ matrix.compiler }}-${{ hashFiles('CMakeLists.txt', 'tests/CMakeLists.txt', 'benchmarks/CMakeLists.txt') }}
82
+
83
+ - name: Configure
84
+ env:
85
+ CC: ${{ matrix.cc }}
86
+ CXX: ${{ matrix.cxx }}
87
+ # -O1 for Debug: sanitizers don't need -O0, and the HNSW construction
88
+ # tests are unusably slow unoptimized (ASan docs recommend -O1).
89
+ # Asserts stay active (still a Debug build, NDEBUG undefined).
90
+ run: |
91
+ cmake -B build \
92
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
93
+ -DCMAKE_CXX_FLAGS_DEBUG="-O1 -g" \
94
+ -DVEXTOR_ENABLE_ASAN=${{ matrix.sanitizers && 'ON' || 'OFF' }} \
95
+ -DVEXTOR_ENABLE_UBSAN=${{ matrix.sanitizers && 'ON' || 'OFF' }}
96
+
97
+ - name: Build
98
+ run: cmake --build build -j$(nproc)
99
+
100
+ - name: Test
101
+ run: ctest --test-dir build --output-on-failure --parallel $(nproc)
@@ -0,0 +1,49 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ build-sdist:
13
+ name: Build and verify sdist
14
+ runs-on: ubuntu-24.04
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Build sdist
19
+ run: pipx run build --sdist
20
+
21
+ - name: Verify sdist installs and passes smoke tests
22
+ env:
23
+ CC: gcc-14
24
+ CXX: g++-14
25
+ run: |
26
+ python3 -m pip install dist/*.tar.gz
27
+ python3 python/test_vextor.py
28
+
29
+ - uses: actions/upload-artifact@v4
30
+ with:
31
+ name: sdist
32
+ path: dist/*.tar.gz
33
+
34
+ publish:
35
+ name: Publish to PyPI (trusted publishing)
36
+ needs: build-sdist
37
+ runs-on: ubuntu-24.04
38
+ environment:
39
+ name: pypi
40
+ url: https://pypi.org/p/vextor
41
+ permissions:
42
+ id-token: write
43
+ steps:
44
+ - uses: actions/download-artifact@v4
45
+ with:
46
+ name: sdist
47
+ path: dist
48
+
49
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ CMakeLists.txt.user
2
+ CMakeCache.txt
3
+ CMakeFiles
4
+ CMakeScripts
5
+ Testing
6
+ Makefile
7
+ cmake_install.cmake
8
+ install_manifest.txt
9
+ compile_commands.json
10
+ CTestTestfile.cmake
11
+ _deps
12
+ CMakeUserPresets.json
13
+ build*/
14
+ ai-notes/
15
+ benchmarks/sift1m/data/
16
+ .claude/
@@ -0,0 +1,6 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/mirrors-clang-format
3
+ rev: v18.1.8
4
+ hooks:
5
+ - id: clang-format
6
+ types_or: [c, c++]
@@ -0,0 +1,75 @@
1
+ cmake_minimum_required(VERSION 3.20)
2
+ project(vextor VERSION 0.1.0 LANGUAGES CXX)
3
+
4
+ set(CMAKE_CXX_STANDARD 20)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+ set(CMAKE_CXX_EXTENSIONS OFF)
7
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
8
+
9
+ # --- Options ---
10
+ option(VEXTOR_BUILD_TESTS "Build unit tests" ON)
11
+ option(VEXTOR_BUILD_BENCHMARKS "Build benchmarks" ON)
12
+ option(VEXTOR_ENABLE_ASAN "Enable AddressSanitizer" OFF)
13
+ option(VEXTOR_ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer" OFF)
14
+
15
+ # --- Compiler warnings ---
16
+ add_compile_options(-Wall -Wextra -Wpedantic)
17
+
18
+ # --- Sanitizers ---
19
+ if(VEXTOR_ENABLE_ASAN)
20
+ add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
21
+ add_link_options(-fsanitize=address)
22
+ endif()
23
+
24
+ if(VEXTOR_ENABLE_UBSAN)
25
+ add_compile_options(-fsanitize=undefined)
26
+ add_link_options(-fsanitize=undefined)
27
+ endif()
28
+
29
+ # --- AVX2 detection ---
30
+ include(CheckCXXCompilerFlag)
31
+ check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
32
+ check_cxx_compiler_flag("-mfma" COMPILER_SUPPORTS_FMA)
33
+
34
+ # --- Library target ---
35
+ add_library(vextor STATIC
36
+ src/core/distance.cpp
37
+ src/core/sq8.cpp
38
+ src/store/in_memory_store.cpp
39
+ src/store/mmap_store.cpp
40
+ src/segment/id_mapping.cpp
41
+ src/segment/active_segment.cpp
42
+ src/segment/sealed_segment.cpp
43
+ src/segment/segment_manager.cpp
44
+ src/persistence/serializer.cpp
45
+ src/persistence/loader.cpp
46
+ )
47
+
48
+ target_include_directories(vextor PUBLIC
49
+ ${CMAKE_CURRENT_SOURCE_DIR}/src
50
+ )
51
+
52
+ target_compile_features(vextor PUBLIC cxx_std_20)
53
+ set_target_properties(vextor PROPERTIES POSITION_INDEPENDENT_CODE ON)
54
+
55
+ if(COMPILER_SUPPORTS_AVX2 AND COMPILER_SUPPORTS_FMA)
56
+ target_compile_options(vextor PRIVATE -mavx2 -mfma)
57
+ target_compile_definitions(vextor PUBLIC VEXTOR_AVX2=1)
58
+ endif()
59
+
60
+ # --- Tests ---
61
+ if(VEXTOR_BUILD_TESTS)
62
+ enable_testing()
63
+ add_subdirectory(tests)
64
+ endif()
65
+
66
+ # --- Benchmarks ---
67
+ if(VEXTOR_BUILD_BENCHMARKS)
68
+ add_subdirectory(benchmarks)
69
+ endif()
70
+
71
+ # --- Python bindings ---
72
+ option(VEXTOR_BUILD_PYTHON "Build Python bindings" OFF)
73
+ if(VEXTOR_BUILD_PYTHON)
74
+ add_subdirectory(python)
75
+ endif()
@@ -0,0 +1,52 @@
1
+ {
2
+ "version": 6,
3
+ "cmakeMinimumRequired": {
4
+ "major": 3,
5
+ "minor": 20,
6
+ "patch": 0
7
+ },
8
+ "configurePresets": [
9
+ {
10
+ "name": "dev",
11
+ "displayName": "Debug + Sanitizers",
12
+ "generator": "Ninja",
13
+ "binaryDir": "${sourceDir}/build-dev",
14
+ "cacheVariables": {
15
+ "CMAKE_BUILD_TYPE": "Debug",
16
+ "VEXTOR_BUILD_TESTS": "ON",
17
+ "VEXTOR_BUILD_BENCHMARKS": "ON",
18
+ "VEXTOR_ENABLE_ASAN": "ON",
19
+ "VEXTOR_ENABLE_UBSAN": "ON",
20
+ "VEXTOR_BUILD_PYTHON": "OFF"
21
+ }
22
+ },
23
+ {
24
+ "name": "release",
25
+ "displayName": "Release",
26
+ "generator": "Ninja",
27
+ "binaryDir": "${sourceDir}/build-release",
28
+ "cacheVariables": {
29
+ "CMAKE_BUILD_TYPE": "Release",
30
+ "VEXTOR_BUILD_TESTS": "ON",
31
+ "VEXTOR_BUILD_BENCHMARKS": "ON",
32
+ "VEXTOR_ENABLE_ASAN": "OFF",
33
+ "VEXTOR_ENABLE_UBSAN": "OFF",
34
+ "VEXTOR_BUILD_PYTHON": "OFF"
35
+ }
36
+ },
37
+ {
38
+ "name": "release-python",
39
+ "displayName": "Release + Python bindings",
40
+ "generator": "Ninja",
41
+ "binaryDir": "${sourceDir}/build-release-python",
42
+ "cacheVariables": {
43
+ "CMAKE_BUILD_TYPE": "Release",
44
+ "VEXTOR_BUILD_TESTS": "ON",
45
+ "VEXTOR_BUILD_BENCHMARKS": "ON",
46
+ "VEXTOR_ENABLE_ASAN": "OFF",
47
+ "VEXTOR_ENABLE_UBSAN": "OFF",
48
+ "VEXTOR_BUILD_PYTHON": "ON"
49
+ }
50
+ }
51
+ ]
52
+ }
vextor-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 mariorch22
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
vextor-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,207 @@
1
+ Metadata-Version: 2.1
2
+ Name: vextor
3
+ Version: 0.1.0
4
+ Summary: Segmented vector database for approximate nearest neighbor search (HNSW, AVX2, mmap)
5
+ Author: Mario Raach
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 mariorch22
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Development Status :: 3 - Alpha
29
+ Classifier: Programming Language :: C++
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: Operating System :: POSIX :: Linux
32
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
33
+ Project-URL: Repository, https://github.com/mariorch22/vextor
34
+ Requires-Python: >=3.8
35
+ Requires-Dist: numpy
36
+ Description-Content-Type: text/markdown
37
+
38
+ # vextor
39
+
40
+ A segmented vector database for Approximate Nearest Neighbor search, written in C++20. Uses AVX2 SIMD distance kernels, HNSW graph indexing, and memory-mapped storage.
41
+
42
+ Vectors are written to an active in-memory segment, sealed to disk when full, and served as read-only mmap-backed segments. Search fans out across all segments and merges results.
43
+
44
+ ## Architecture
45
+
46
+ ```mermaid
47
+ graph LR
48
+ A[core] --> B[store] --> C[index] --> D[segment] --> E[persistence]
49
+ ```
50
+
51
+ | Layer | Contents |
52
+ |---|---|
53
+ | **core** | types, L2 distance (AVX2 + scalar, compile-time dispatch), SQ8 quantization |
54
+ | **store** | VectorStore concept, InMemoryStore, MmapStore |
55
+ | **index** | HnswIndex\<Store\>, FlatIndex\<Store\> |
56
+ | **segment** | ActiveSegment, SealedSegment, SegmentManager |
57
+ | **persistence** | Serializer, Loader, VEX0/HNSW/IDS binary formats |
58
+
59
+ Templates live in `store/` and `index/`. Everything from `segment/` up exposes only concrete types.
60
+
61
+ See [docs/PRD.md](docs/PRD.md) for the full design rationale.
62
+
63
+ ## Code style
64
+
65
+ Code follows STL/snake_case naming convention: types in `PascalCase`, functions and variables in `snake_case`, namespace `vextor` in lowercase.
66
+
67
+ ## Build
68
+
69
+ Requires CMake 3.20+, Ninja, and a C++20 compiler (GCC 14+ or Clang 18+). Three presets are available:
70
+
71
+ | Preset | Description |
72
+ |---|---|
73
+ | `dev` | Debug build with ASan + UBSan |
74
+ | `release` | Optimized build |
75
+ | `release-python` | Optimized build + Python bindings |
76
+
77
+ ```bash
78
+ cmake --preset release
79
+ cmake --build build-release
80
+ ```
81
+
82
+ Run tests and benchmarks:
83
+
84
+ ```bash
85
+ ctest --test-dir build-release --output-on-failure
86
+ ./build-release/benchmarks/vextor_bench
87
+ ```
88
+
89
+ ### SIFT1M benchmark (optional)
90
+
91
+ Requires the SIFT1M dataset (~160 MB download).
92
+
93
+ ```bash
94
+ ./benchmarks/sift1m/download.sh
95
+ cmake --preset release -DVEXTOR_BUILD_SIFT1M=ON
96
+ cmake --build build-release
97
+ ./build-release/benchmarks/sift1m/vextor_sift1m
98
+ ```
99
+
100
+ Results are written to `benchmarks/sift1m/results.md`.
101
+
102
+ ### Python bindings (optional)
103
+
104
+ Requires Python 3.8+ and NumPy.
105
+
106
+ Via pip (builds a wheel using scikit-build-core):
107
+
108
+ ```bash
109
+ pip install .
110
+ python3 -c "import vextor; print('ok')"
111
+ ```
112
+
113
+ On CPython ≥ 3.12 this produces an abi3 wheel that works across Python versions. Note: the wheel is built with the host compiler's AVX2 support — a wheel built on an AVX2 machine requires AVX2 at runtime.
114
+
115
+ Alternatively, as part of a CMake build:
116
+
117
+ ```bash
118
+ cmake --preset release-python
119
+ cmake --build build-release-python
120
+ PYTHONPATH=build-release-python/python python3 -c "import vextor; print('ok')"
121
+ ```
122
+
123
+ ## Usage
124
+
125
+ ### C++
126
+
127
+ ```cpp
128
+ #include <vector>
129
+ #include <vextor/vextor.h>
130
+
131
+ // In-memory only
132
+ vextor::Database db(/*dim=*/768, /*segment_capacity=*/1000000);
133
+
134
+ // Insert
135
+ std::vector<float> vec(768, 0.0f);
136
+ db.insert(/*user_id=*/42, vec);
137
+
138
+ // Search
139
+ std::vector<float> query(768, 1.0f);
140
+ auto results = db.search(query, /*k=*/10);
141
+ for (const auto& r : results) {
142
+ // r.user_id, r.distance
143
+ }
144
+
145
+ // With persistence
146
+ vextor::Database db2(768, 1000000, "path/to/db");
147
+ db2.insert(42, vec);
148
+ db2.save();
149
+ auto loaded = vextor::Database::load("path/to/db");
150
+ ```
151
+
152
+ ### Python
153
+
154
+ ```python
155
+ import numpy as np
156
+ import vextor
157
+
158
+ db = vextor.Database(dimensions=768, segment_capacity=1_000_000, path="path/to/db")
159
+
160
+ db.insert(user_id=42, vector=np.random.randn(768).astype(np.float32))
161
+
162
+ results = db.search(query=np.random.randn(768).astype(np.float32), k=10)
163
+ for user_id, distance in results:
164
+ print(f" {user_id}: {distance:.4f}")
165
+
166
+ db.save()
167
+ db2 = vextor.Database.load("path/to/db")
168
+ ```
169
+
170
+ ## Benchmarks
171
+
172
+ Release build, single-threaded. Selected results from local runs:
173
+
174
+ | | Time |
175
+ |---|---|
176
+ | L2 distance (scalar, 128d) | 43 ns |
177
+ | L2 distance (AVX2, 128d) | 9 ns |
178
+ | L2 distance (AVX2, 768d) | 80 ns |
179
+ | FlatIndex search (10K, 128d) | 163 μs |
180
+ | HNSW search (10K, 128d) | 39 μs |
181
+ | HNSW search (100K, 128d) | 145 μs |
182
+
183
+ HNSW is 4.2x faster than brute-force at 10K vectors. At 100K, HNSW search time grows sub-linearly (39 μs → 145 μs for 10x more vectors).
184
+
185
+ ### SIFT1M results
186
+
187
+ 1M vectors, 128d float32, single-threaded, via `SegmentManager` (capacity 1.1M, no seal during build).
188
+
189
+ **Machine:** 12th Gen Intel(R) Core(TM) i7-1260P | 12 GB RAM | Linux 5.15.153.1-microsoft-standard-WSL2
190
+
191
+ | M | ef_construction | ef_search | Recall@1 | Recall@10 | Recall@100 | QPS | Build (s) |
192
+ |---|---|---|---|---|---|---|---|
193
+ | 16 | 200 | 64 | 0.9902 | 0.9903 | 0.9478 | 3503 | 603.7 |
194
+ | 16 | 200 | 128 | 0.9919 | 0.9941 | 0.9664 | 2810 | 603.7 |
195
+ | 16 | 200 | 256 | 0.9939 | 0.9986 | 0.9923 | 1523 | 603.7 |
196
+ | 32 | 400 | 128 | 0.9937 | 0.9985 | 0.9911 | 1605 | 1868.3 |
197
+ | 32 | 400 | 256 | 0.9940 | 0.9993 | 0.9986 | 944 | 1868.3 |
198
+
199
+ v0.1 gate (Recall@10 > 0.90): **PASSED** — alle 5 Configs erfüllen das Kriterium.
200
+
201
+ ## Project status
202
+
203
+ v0.1 — MVP. Single-node, single-threaded. See [milestones](https://github.com/mariorch22/vextor/milestones) for the roadmap.
204
+
205
+ ## License
206
+
207
+ MIT