vextor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vextor-0.1.0/.clang-format +5 -0
- vextor-0.1.0/.clangd +3 -0
- vextor-0.1.0/.github/workflows/ci.yml +101 -0
- vextor-0.1.0/.github/workflows/publish.yml +49 -0
- vextor-0.1.0/.gitignore +16 -0
- vextor-0.1.0/.pre-commit-config.yaml +6 -0
- vextor-0.1.0/CMakeLists.txt +75 -0
- vextor-0.1.0/CMakePresets.json +52 -0
- vextor-0.1.0/LICENSE +21 -0
- vextor-0.1.0/PKG-INFO +207 -0
- vextor-0.1.0/README.md +170 -0
- vextor-0.1.0/benchmarks/CMakeLists.txt +30 -0
- vextor-0.1.0/benchmarks/core_distance_bench.cpp +81 -0
- vextor-0.1.0/benchmarks/core_sq8_bench.cpp +100 -0
- vextor-0.1.0/benchmarks/index_flat_index_bench.cpp +52 -0
- vextor-0.1.0/benchmarks/index_hnsw_index_bench.cpp +150 -0
- vextor-0.1.0/benchmarks/perf_test.cpp +48 -0
- vextor-0.1.0/benchmarks/sift1m/CMakeLists.txt +2 -0
- vextor-0.1.0/benchmarks/sift1m/download.sh +42 -0
- vextor-0.1.0/benchmarks/sift1m/results.md +11 -0
- vextor-0.1.0/benchmarks/sift1m/sift1m_bench.cpp +307 -0
- vextor-0.1.0/docs/PRD.md +197 -0
- vextor-0.1.0/pyproject.toml +34 -0
- vextor-0.1.0/python/CMakeLists.txt +48 -0
- vextor-0.1.0/python/bindings.cpp +66 -0
- vextor-0.1.0/python/test_vextor.py +79 -0
- vextor-0.1.0/src/core/distance.cpp +48 -0
- vextor-0.1.0/src/core/distance.h +10 -0
- vextor-0.1.0/src/core/query_result.h +12 -0
- vextor-0.1.0/src/core/search_result.h +23 -0
- vextor-0.1.0/src/core/sq8.cpp +127 -0
- vextor-0.1.0/src/core/sq8.h +34 -0
- vextor-0.1.0/src/core/types.h +11 -0
- vextor-0.1.0/src/index/.gitkeep +0 -0
- vextor-0.1.0/src/index/flat_index.h +53 -0
- vextor-0.1.0/src/index/hnsw_index.h +387 -0
- vextor-0.1.0/src/persistence/.gitkeep +0 -0
- vextor-0.1.0/src/persistence/format.h +47 -0
- vextor-0.1.0/src/persistence/loader.cpp +136 -0
- vextor-0.1.0/src/persistence/loader.h +15 -0
- vextor-0.1.0/src/persistence/serializer.cpp +87 -0
- vextor-0.1.0/src/persistence/serializer.h +12 -0
- vextor-0.1.0/src/segment/.gitkeep +0 -0
- vextor-0.1.0/src/segment/active_segment.cpp +55 -0
- vextor-0.1.0/src/segment/active_segment.h +44 -0
- vextor-0.1.0/src/segment/id_mapping.cpp +33 -0
- vextor-0.1.0/src/segment/id_mapping.h +27 -0
- vextor-0.1.0/src/segment/sealed_segment.cpp +52 -0
- vextor-0.1.0/src/segment/sealed_segment.h +60 -0
- vextor-0.1.0/src/segment/segment_manager.cpp +172 -0
- vextor-0.1.0/src/segment/segment_manager.h +50 -0
- vextor-0.1.0/src/store/.gitkeep +0 -0
- vextor-0.1.0/src/store/concept.h +17 -0
- vextor-0.1.0/src/store/in_memory_store.cpp +35 -0
- vextor-0.1.0/src/store/in_memory_store.h +33 -0
- vextor-0.1.0/src/store/mmap_store.cpp +149 -0
- vextor-0.1.0/src/store/mmap_store.h +40 -0
- vextor-0.1.0/src/vextor/vextor.h +19 -0
- vextor-0.1.0/tests/CMakeLists.txt +29 -0
- vextor-0.1.0/tests/core_distance_test.cpp +68 -0
- vextor-0.1.0/tests/core_sq8_test.cpp +141 -0
- vextor-0.1.0/tests/core_types_test.cpp +79 -0
- vextor-0.1.0/tests/index_flat_index_test.cpp +140 -0
- vextor-0.1.0/tests/index_hnsw_index_test.cpp +254 -0
- vextor-0.1.0/tests/persistence_test.cpp +150 -0
- vextor-0.1.0/tests/segment_active_segment_test.cpp +122 -0
- vextor-0.1.0/tests/segment_id_mapping_test.cpp +67 -0
- vextor-0.1.0/tests/segment_sealed_segment_test.cpp +108 -0
- vextor-0.1.0/tests/segment_segment_manager_test.cpp +206 -0
- vextor-0.1.0/tests/store_in_memory_store_test.cpp +82 -0
- vextor-0.1.0/tests/store_mmap_store_test.cpp +144 -0
- vextor-0.1.0/tests/vextor_public_api_test.cpp +27 -0
vextor-0.1.0/.clangd
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [main]
|
|
6
|
+
push:
|
|
7
|
+
branches: [main]
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
actions: write
|
|
13
|
+
|
|
14
|
+
concurrency:
|
|
15
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
16
|
+
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
format-check:
|
|
20
|
+
name: clang-format check
|
|
21
|
+
runs-on: ubuntu-24.04
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Run clang-format check
|
|
26
|
+
uses: jidicula/clang-format-action@v4.14.0
|
|
27
|
+
with:
|
|
28
|
+
clang-format-version: '18'
|
|
29
|
+
check-path: 'src'
|
|
30
|
+
|
|
31
|
+
- name: Check test formatting
|
|
32
|
+
uses: jidicula/clang-format-action@v4.14.0
|
|
33
|
+
with:
|
|
34
|
+
clang-format-version: '18'
|
|
35
|
+
check-path: 'tests'
|
|
36
|
+
|
|
37
|
+
- name: Check benchmark formatting
|
|
38
|
+
uses: jidicula/clang-format-action@v4.14.0
|
|
39
|
+
with:
|
|
40
|
+
clang-format-version: '18'
|
|
41
|
+
check-path: 'benchmarks'
|
|
42
|
+
|
|
43
|
+
build-and-test:
|
|
44
|
+
name: ${{ matrix.compiler }} (${{ matrix.build_type }})
|
|
45
|
+
runs-on: ubuntu-24.04
|
|
46
|
+
strategy:
|
|
47
|
+
fail-fast: false
|
|
48
|
+
matrix:
|
|
49
|
+
include:
|
|
50
|
+
- compiler: gcc-14
|
|
51
|
+
cc: gcc-14
|
|
52
|
+
cxx: g++-14
|
|
53
|
+
build_type: Release
|
|
54
|
+
sanitizers: false
|
|
55
|
+
|
|
56
|
+
- compiler: gcc-14
|
|
57
|
+
cc: gcc-14
|
|
58
|
+
cxx: g++-14
|
|
59
|
+
build_type: Debug
|
|
60
|
+
sanitizers: true
|
|
61
|
+
|
|
62
|
+
- compiler: clang-18
|
|
63
|
+
cc: clang-18
|
|
64
|
+
cxx: clang++-18
|
|
65
|
+
build_type: Release
|
|
66
|
+
sanitizers: false
|
|
67
|
+
|
|
68
|
+
- compiler: clang-18
|
|
69
|
+
cc: clang-18
|
|
70
|
+
cxx: clang++-18
|
|
71
|
+
build_type: Debug
|
|
72
|
+
sanitizers: true
|
|
73
|
+
|
|
74
|
+
steps:
|
|
75
|
+
- uses: actions/checkout@v4
|
|
76
|
+
|
|
77
|
+
- name: Cache FetchContent dependencies
|
|
78
|
+
uses: actions/cache@v4
|
|
79
|
+
with:
|
|
80
|
+
path: build/_deps
|
|
81
|
+
key: fetchcontent-${{ matrix.compiler }}-${{ hashFiles('CMakeLists.txt', 'tests/CMakeLists.txt', 'benchmarks/CMakeLists.txt') }}
|
|
82
|
+
|
|
83
|
+
- name: Configure
|
|
84
|
+
env:
|
|
85
|
+
CC: ${{ matrix.cc }}
|
|
86
|
+
CXX: ${{ matrix.cxx }}
|
|
87
|
+
# -O1 for Debug: sanitizers don't need -O0, and the HNSW construction
|
|
88
|
+
# tests are unusably slow unoptimized (ASan docs recommend -O1).
|
|
89
|
+
# Asserts stay active (still a Debug build, NDEBUG undefined).
|
|
90
|
+
run: |
|
|
91
|
+
cmake -B build \
|
|
92
|
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
|
93
|
+
-DCMAKE_CXX_FLAGS_DEBUG="-O1 -g" \
|
|
94
|
+
-DVEXTOR_ENABLE_ASAN=${{ matrix.sanitizers && 'ON' || 'OFF' }} \
|
|
95
|
+
-DVEXTOR_ENABLE_UBSAN=${{ matrix.sanitizers && 'ON' || 'OFF' }}
|
|
96
|
+
|
|
97
|
+
- name: Build
|
|
98
|
+
run: cmake --build build -j$(nproc)
|
|
99
|
+
|
|
100
|
+
- name: Test
|
|
101
|
+
run: ctest --test-dir build --output-on-failure --parallel $(nproc)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build-sdist:
|
|
13
|
+
name: Build and verify sdist
|
|
14
|
+
runs-on: ubuntu-24.04
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Build sdist
|
|
19
|
+
run: pipx run build --sdist
|
|
20
|
+
|
|
21
|
+
- name: Verify sdist installs and passes smoke tests
|
|
22
|
+
env:
|
|
23
|
+
CC: gcc-14
|
|
24
|
+
CXX: g++-14
|
|
25
|
+
run: |
|
|
26
|
+
python3 -m pip install dist/*.tar.gz
|
|
27
|
+
python3 python/test_vextor.py
|
|
28
|
+
|
|
29
|
+
- uses: actions/upload-artifact@v4
|
|
30
|
+
with:
|
|
31
|
+
name: sdist
|
|
32
|
+
path: dist/*.tar.gz
|
|
33
|
+
|
|
34
|
+
publish:
|
|
35
|
+
name: Publish to PyPI (trusted publishing)
|
|
36
|
+
needs: build-sdist
|
|
37
|
+
runs-on: ubuntu-24.04
|
|
38
|
+
environment:
|
|
39
|
+
name: pypi
|
|
40
|
+
url: https://pypi.org/p/vextor
|
|
41
|
+
permissions:
|
|
42
|
+
id-token: write
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: sdist
|
|
47
|
+
path: dist
|
|
48
|
+
|
|
49
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
vextor-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
CMakeLists.txt.user
|
|
2
|
+
CMakeCache.txt
|
|
3
|
+
CMakeFiles
|
|
4
|
+
CMakeScripts
|
|
5
|
+
Testing
|
|
6
|
+
Makefile
|
|
7
|
+
cmake_install.cmake
|
|
8
|
+
install_manifest.txt
|
|
9
|
+
compile_commands.json
|
|
10
|
+
CTestTestfile.cmake
|
|
11
|
+
_deps
|
|
12
|
+
CMakeUserPresets.json
|
|
13
|
+
build*/
|
|
14
|
+
ai-notes/
|
|
15
|
+
benchmarks/sift1m/data/
|
|
16
|
+
.claude/
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.20)
|
|
2
|
+
project(vextor VERSION 0.1.0 LANGUAGES CXX)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 20)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
7
|
+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
8
|
+
|
|
9
|
+
# --- Options ---
|
|
10
|
+
option(VEXTOR_BUILD_TESTS "Build unit tests" ON)
|
|
11
|
+
option(VEXTOR_BUILD_BENCHMARKS "Build benchmarks" ON)
|
|
12
|
+
option(VEXTOR_ENABLE_ASAN "Enable AddressSanitizer" OFF)
|
|
13
|
+
option(VEXTOR_ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer" OFF)
|
|
14
|
+
|
|
15
|
+
# --- Compiler warnings ---
|
|
16
|
+
add_compile_options(-Wall -Wextra -Wpedantic)
|
|
17
|
+
|
|
18
|
+
# --- Sanitizers ---
|
|
19
|
+
if(VEXTOR_ENABLE_ASAN)
|
|
20
|
+
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
|
21
|
+
add_link_options(-fsanitize=address)
|
|
22
|
+
endif()
|
|
23
|
+
|
|
24
|
+
if(VEXTOR_ENABLE_UBSAN)
|
|
25
|
+
add_compile_options(-fsanitize=undefined)
|
|
26
|
+
add_link_options(-fsanitize=undefined)
|
|
27
|
+
endif()
|
|
28
|
+
|
|
29
|
+
# --- AVX2 detection ---
|
|
30
|
+
include(CheckCXXCompilerFlag)
|
|
31
|
+
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
|
|
32
|
+
check_cxx_compiler_flag("-mfma" COMPILER_SUPPORTS_FMA)
|
|
33
|
+
|
|
34
|
+
# --- Library target ---
|
|
35
|
+
add_library(vextor STATIC
|
|
36
|
+
src/core/distance.cpp
|
|
37
|
+
src/core/sq8.cpp
|
|
38
|
+
src/store/in_memory_store.cpp
|
|
39
|
+
src/store/mmap_store.cpp
|
|
40
|
+
src/segment/id_mapping.cpp
|
|
41
|
+
src/segment/active_segment.cpp
|
|
42
|
+
src/segment/sealed_segment.cpp
|
|
43
|
+
src/segment/segment_manager.cpp
|
|
44
|
+
src/persistence/serializer.cpp
|
|
45
|
+
src/persistence/loader.cpp
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
target_include_directories(vextor PUBLIC
|
|
49
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
target_compile_features(vextor PUBLIC cxx_std_20)
|
|
53
|
+
set_target_properties(vextor PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
54
|
+
|
|
55
|
+
if(COMPILER_SUPPORTS_AVX2 AND COMPILER_SUPPORTS_FMA)
|
|
56
|
+
target_compile_options(vextor PRIVATE -mavx2 -mfma)
|
|
57
|
+
target_compile_definitions(vextor PUBLIC VEXTOR_AVX2=1)
|
|
58
|
+
endif()
|
|
59
|
+
|
|
60
|
+
# --- Tests ---
|
|
61
|
+
if(VEXTOR_BUILD_TESTS)
|
|
62
|
+
enable_testing()
|
|
63
|
+
add_subdirectory(tests)
|
|
64
|
+
endif()
|
|
65
|
+
|
|
66
|
+
# --- Benchmarks ---
|
|
67
|
+
if(VEXTOR_BUILD_BENCHMARKS)
|
|
68
|
+
add_subdirectory(benchmarks)
|
|
69
|
+
endif()
|
|
70
|
+
|
|
71
|
+
# --- Python bindings ---
|
|
72
|
+
option(VEXTOR_BUILD_PYTHON "Build Python bindings" OFF)
|
|
73
|
+
if(VEXTOR_BUILD_PYTHON)
|
|
74
|
+
add_subdirectory(python)
|
|
75
|
+
endif()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 6,
|
|
3
|
+
"cmakeMinimumRequired": {
|
|
4
|
+
"major": 3,
|
|
5
|
+
"minor": 20,
|
|
6
|
+
"patch": 0
|
|
7
|
+
},
|
|
8
|
+
"configurePresets": [
|
|
9
|
+
{
|
|
10
|
+
"name": "dev",
|
|
11
|
+
"displayName": "Debug + Sanitizers",
|
|
12
|
+
"generator": "Ninja",
|
|
13
|
+
"binaryDir": "${sourceDir}/build-dev",
|
|
14
|
+
"cacheVariables": {
|
|
15
|
+
"CMAKE_BUILD_TYPE": "Debug",
|
|
16
|
+
"VEXTOR_BUILD_TESTS": "ON",
|
|
17
|
+
"VEXTOR_BUILD_BENCHMARKS": "ON",
|
|
18
|
+
"VEXTOR_ENABLE_ASAN": "ON",
|
|
19
|
+
"VEXTOR_ENABLE_UBSAN": "ON",
|
|
20
|
+
"VEXTOR_BUILD_PYTHON": "OFF"
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "release",
|
|
25
|
+
"displayName": "Release",
|
|
26
|
+
"generator": "Ninja",
|
|
27
|
+
"binaryDir": "${sourceDir}/build-release",
|
|
28
|
+
"cacheVariables": {
|
|
29
|
+
"CMAKE_BUILD_TYPE": "Release",
|
|
30
|
+
"VEXTOR_BUILD_TESTS": "ON",
|
|
31
|
+
"VEXTOR_BUILD_BENCHMARKS": "ON",
|
|
32
|
+
"VEXTOR_ENABLE_ASAN": "OFF",
|
|
33
|
+
"VEXTOR_ENABLE_UBSAN": "OFF",
|
|
34
|
+
"VEXTOR_BUILD_PYTHON": "OFF"
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"name": "release-python",
|
|
39
|
+
"displayName": "Release + Python bindings",
|
|
40
|
+
"generator": "Ninja",
|
|
41
|
+
"binaryDir": "${sourceDir}/build-release-python",
|
|
42
|
+
"cacheVariables": {
|
|
43
|
+
"CMAKE_BUILD_TYPE": "Release",
|
|
44
|
+
"VEXTOR_BUILD_TESTS": "ON",
|
|
45
|
+
"VEXTOR_BUILD_BENCHMARKS": "ON",
|
|
46
|
+
"VEXTOR_ENABLE_ASAN": "OFF",
|
|
47
|
+
"VEXTOR_ENABLE_UBSAN": "OFF",
|
|
48
|
+
"VEXTOR_BUILD_PYTHON": "ON"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
vextor-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 mariorch22
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
vextor-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: vextor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Segmented vector database for approximate nearest neighbor search (HNSW, AVX2, mmap)
|
|
5
|
+
Author: Mario Raach
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 mariorch22
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Classifier: Development Status :: 3 - Alpha
|
|
29
|
+
Classifier: Programming Language :: C++
|
|
30
|
+
Classifier: Programming Language :: Python :: 3
|
|
31
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
32
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
33
|
+
Project-URL: Repository, https://github.com/mariorch22/vextor
|
|
34
|
+
Requires-Python: >=3.8
|
|
35
|
+
Requires-Dist: numpy
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# vextor
|
|
39
|
+
|
|
40
|
+
A segmented vector database for Approximate Nearest Neighbor search, written in C++20. Uses AVX2 SIMD distance kernels, HNSW graph indexing, and memory-mapped storage.
|
|
41
|
+
|
|
42
|
+
Vectors are written to an active in-memory segment, sealed to disk when full, and served as read-only mmap-backed segments. Search fans out across all segments and merges results.
|
|
43
|
+
|
|
44
|
+
## Architecture
|
|
45
|
+
|
|
46
|
+
```mermaid
|
|
47
|
+
graph LR
|
|
48
|
+
A[core] --> B[store] --> C[index] --> D[segment] --> E[persistence]
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
| Layer | Contents |
|
|
52
|
+
|---|---|
|
|
53
|
+
| **core** | types, L2 distance (AVX2 + scalar, compile-time dispatch), SQ8 quantization |
|
|
54
|
+
| **store** | VectorStore concept, InMemoryStore, MmapStore |
|
|
55
|
+
| **index** | HnswIndex\<Store\>, FlatIndex\<Store\> |
|
|
56
|
+
| **segment** | ActiveSegment, SealedSegment, SegmentManager |
|
|
57
|
+
| **persistence** | Serializer, Loader, VEX0/HNSW/IDS binary formats |
|
|
58
|
+
|
|
59
|
+
Templates live in `store/` and `index/`. Everything from `segment/` up exposes only concrete types.
|
|
60
|
+
|
|
61
|
+
See [docs/PRD.md](docs/PRD.md) for the full design rationale.
|
|
62
|
+
|
|
63
|
+
## Code style
|
|
64
|
+
|
|
65
|
+
Code follows STL/snake_case naming convention: types in `PascalCase`, functions and variables in `snake_case`, namespace `vextor` in lowercase.
|
|
66
|
+
|
|
67
|
+
## Build
|
|
68
|
+
|
|
69
|
+
Requires CMake 3.20+, Ninja, and a C++20 compiler (GCC 14+ or Clang 18+). Three presets are available:
|
|
70
|
+
|
|
71
|
+
| Preset | Description |
|
|
72
|
+
|---|---|
|
|
73
|
+
| `dev` | Debug build with ASan + UBSan |
|
|
74
|
+
| `release` | Optimized build |
|
|
75
|
+
| `release-python` | Optimized build + Python bindings |
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
cmake --preset release
|
|
79
|
+
cmake --build build-release
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Run tests and benchmarks:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
ctest --test-dir build-release --output-on-failure
|
|
86
|
+
./build-release/benchmarks/vextor_bench
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### SIFT1M benchmark (optional)
|
|
90
|
+
|
|
91
|
+
Requires the SIFT1M dataset (~160 MB download).
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
./benchmarks/sift1m/download.sh
|
|
95
|
+
cmake --preset release -DVEXTOR_BUILD_SIFT1M=ON
|
|
96
|
+
cmake --build build-release
|
|
97
|
+
./build-release/benchmarks/sift1m/vextor_sift1m
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Results are written to `benchmarks/sift1m/results.md`.
|
|
101
|
+
|
|
102
|
+
### Python bindings (optional)
|
|
103
|
+
|
|
104
|
+
Requires Python 3.8+ and NumPy.
|
|
105
|
+
|
|
106
|
+
Via pip (builds a wheel using scikit-build-core):
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pip install .
|
|
110
|
+
python3 -c "import vextor; print('ok')"
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
On CPython ≥ 3.12 this produces an abi3 wheel that works across Python versions. Note: the wheel is built with the host compiler's AVX2 support — a wheel built on an AVX2 machine requires AVX2 at runtime.
|
|
114
|
+
|
|
115
|
+
Alternatively, as part of a CMake build:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
cmake --preset release-python
|
|
119
|
+
cmake --build build-release-python
|
|
120
|
+
PYTHONPATH=build-release-python/python python3 -c "import vextor; print('ok')"
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Usage
|
|
124
|
+
|
|
125
|
+
### C++
|
|
126
|
+
|
|
127
|
+
```cpp
|
|
128
|
+
#include <vector>
|
|
129
|
+
#include <vextor/vextor.h>
|
|
130
|
+
|
|
131
|
+
// In-memory only
|
|
132
|
+
vextor::Database db(/*dim=*/768, /*segment_capacity=*/1000000);
|
|
133
|
+
|
|
134
|
+
// Insert
|
|
135
|
+
std::vector<float> vec(768, 0.0f);
|
|
136
|
+
db.insert(/*user_id=*/42, vec);
|
|
137
|
+
|
|
138
|
+
// Search
|
|
139
|
+
std::vector<float> query(768, 1.0f);
|
|
140
|
+
auto results = db.search(query, /*k=*/10);
|
|
141
|
+
for (const auto& r : results) {
|
|
142
|
+
// r.user_id, r.distance
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// With persistence
|
|
146
|
+
vextor::Database db2(768, 1000000, "path/to/db");
|
|
147
|
+
db2.insert(42, vec);
|
|
148
|
+
db2.save();
|
|
149
|
+
auto loaded = vextor::Database::load("path/to/db");
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Python
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
import numpy as np
|
|
156
|
+
import vextor
|
|
157
|
+
|
|
158
|
+
db = vextor.Database(dimensions=768, segment_capacity=1_000_000, path="path/to/db")
|
|
159
|
+
|
|
160
|
+
db.insert(user_id=42, vector=np.random.randn(768).astype(np.float32))
|
|
161
|
+
|
|
162
|
+
results = db.search(query=np.random.randn(768).astype(np.float32), k=10)
|
|
163
|
+
for user_id, distance in results:
|
|
164
|
+
print(f" {user_id}: {distance:.4f}")
|
|
165
|
+
|
|
166
|
+
db.save()
|
|
167
|
+
db2 = vextor.Database.load("path/to/db")
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Benchmarks
|
|
171
|
+
|
|
172
|
+
Release build, single-threaded. Selected results from local runs:
|
|
173
|
+
|
|
174
|
+
| | Time |
|
|
175
|
+
|---|---|
|
|
176
|
+
| L2 distance (scalar, 128d) | 43 ns |
|
|
177
|
+
| L2 distance (AVX2, 128d) | 9 ns |
|
|
178
|
+
| L2 distance (AVX2, 768d) | 80 ns |
|
|
179
|
+
| FlatIndex search (10K, 128d) | 163 μs |
|
|
180
|
+
| HNSW search (10K, 128d) | 39 μs |
|
|
181
|
+
| HNSW search (100K, 128d) | 145 μs |
|
|
182
|
+
|
|
183
|
+
HNSW is 4.2x faster than brute-force at 10K vectors. At 100K, HNSW search time grows sub-linearly (39 μs → 145 μs for 10x more vectors).
|
|
184
|
+
|
|
185
|
+
### SIFT1M results
|
|
186
|
+
|
|
187
|
+
1M vectors, 128d float32, single-threaded, via `SegmentManager` (capacity 1.1M, no seal during build).
|
|
188
|
+
|
|
189
|
+
**Machine:** 12th Gen Intel(R) Core(TM) i7-1260P | 12 GB RAM | Linux 5.15.153.1-microsoft-standard-WSL2
|
|
190
|
+
|
|
191
|
+
| M | ef_construction | ef_search | Recall@1 | Recall@10 | Recall@100 | QPS | Build (s) |
|
|
192
|
+
|---|---|---|---|---|---|---|---|
|
|
193
|
+
| 16 | 200 | 64 | 0.9902 | 0.9903 | 0.9478 | 3503 | 603.7 |
|
|
194
|
+
| 16 | 200 | 128 | 0.9919 | 0.9941 | 0.9664 | 2810 | 603.7 |
|
|
195
|
+
| 16 | 200 | 256 | 0.9939 | 0.9986 | 0.9923 | 1523 | 603.7 |
|
|
196
|
+
| 32 | 400 | 128 | 0.9937 | 0.9985 | 0.9911 | 1605 | 1868.3 |
|
|
197
|
+
| 32 | 400 | 256 | 0.9940 | 0.9993 | 0.9986 | 944 | 1868.3 |
|
|
198
|
+
|
|
199
|
+
v0.1 gate (Recall@10 > 0.90): **PASSED** — alle 5 Configs erfüllen das Kriterium.
|
|
200
|
+
|
|
201
|
+
## Project status
|
|
202
|
+
|
|
203
|
+
v0.1 — MVP. Single-node, single-threaded. See [milestones](https://github.com/mariorch22/vextor/milestones) for the roadmap.
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
MIT
|