vexor 0.21.1__py3-none-any.whl → 0.23.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vexor/__init__.py +17 -2
- vexor/api.py +851 -86
- vexor/cache.py +140 -16
- vexor/cli.py +59 -2
- vexor/config.py +197 -9
- vexor/providers/openai.py +14 -4
- vexor/search.py +16 -1
- vexor/services/config_service.py +30 -2
- vexor/services/content_extract_service.py +6 -0
- vexor/services/index_service.py +56 -4
- vexor/services/init_service.py +12 -2
- vexor/services/search_service.py +105 -30
- vexor/text.py +17 -3
- {vexor-0.21.1.dist-info → vexor-0.23.0rc1.dist-info}/METADATA +41 -5
- vexor-0.23.0rc1.dist-info/RECORD +33 -0
- vexor-0.21.1.dist-info/RECORD +0 -33
- {vexor-0.21.1.dist-info → vexor-0.23.0rc1.dist-info}/WHEEL +0 -0
- {vexor-0.21.1.dist-info → vexor-0.23.0rc1.dist-info}/entry_points.txt +0 -0
- {vexor-0.21.1.dist-info → vexor-0.23.0rc1.dist-info}/licenses/LICENSE +0 -0
vexor/services/init_service.py
CHANGED
|
@@ -231,6 +231,11 @@ def _collect_remote_settings() -> dict[str, object]:
|
|
|
231
231
|
)
|
|
232
232
|
_print_option(
|
|
233
233
|
"C",
|
|
234
|
+
Messages.INIT_OPTION_PROVIDER_VOYAGEAI,
|
|
235
|
+
Messages.INIT_OPTION_PROVIDER_VOYAGEAI_DESC,
|
|
236
|
+
)
|
|
237
|
+
_print_option(
|
|
238
|
+
"D",
|
|
234
239
|
Messages.INIT_OPTION_PROVIDER_CUSTOM,
|
|
235
240
|
Messages.INIT_OPTION_PROVIDER_CUSTOM_DESC,
|
|
236
241
|
)
|
|
@@ -242,11 +247,14 @@ def _collect_remote_settings() -> dict[str, object]:
|
|
|
242
247
|
"openai": "openai",
|
|
243
248
|
"b": "gemini",
|
|
244
249
|
"gemini": "gemini",
|
|
245
|
-
"c": "
|
|
250
|
+
"c": "voyageai",
|
|
251
|
+
"voyageai": "voyageai",
|
|
252
|
+
"voyage": "voyageai",
|
|
253
|
+
"d": "custom",
|
|
246
254
|
"custom": "custom",
|
|
247
255
|
},
|
|
248
256
|
default="A",
|
|
249
|
-
allowed="A/B/C",
|
|
257
|
+
allowed="A/B/C/D",
|
|
250
258
|
)
|
|
251
259
|
console.print()
|
|
252
260
|
|
|
@@ -266,6 +274,8 @@ def _collect_remote_settings() -> dict[str, object]:
|
|
|
266
274
|
|
|
267
275
|
if provider == "gemini":
|
|
268
276
|
api_key = _prompt_api_key(Messages.INIT_PROMPT_API_KEY_GEMINI, provider)
|
|
277
|
+
elif provider == "voyageai":
|
|
278
|
+
api_key = _prompt_api_key(Messages.INIT_PROMPT_API_KEY_VOYAGE, provider)
|
|
269
279
|
else:
|
|
270
280
|
api_key = _prompt_api_key(Messages.INIT_PROMPT_API_KEY_OPENAI, provider)
|
|
271
281
|
updates["api_key"] = api_key
|
vexor/services/search_service.py
CHANGED
|
@@ -56,6 +56,7 @@ class SearchRequest:
|
|
|
56
56
|
rerank: str = DEFAULT_RERANK
|
|
57
57
|
flashrank_model: str | None = None
|
|
58
58
|
remote_rerank: RemoteRerankConfig | None = None
|
|
59
|
+
embedding_dimensions: int | None = None
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
@dataclass(slots=True)
|
|
@@ -409,6 +410,7 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
409
410
|
exclude_patterns=request.exclude_patterns,
|
|
410
411
|
extensions=request.extensions,
|
|
411
412
|
no_cache=request.no_cache,
|
|
413
|
+
embedding_dimensions=request.embedding_dimensions,
|
|
412
414
|
)
|
|
413
415
|
if result.status == IndexStatus.EMPTY:
|
|
414
416
|
return SearchResponse(
|
|
@@ -493,6 +495,7 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
493
495
|
exclude_patterns=index_excludes,
|
|
494
496
|
extensions=index_extensions,
|
|
495
497
|
no_cache=request.no_cache,
|
|
498
|
+
embedding_dimensions=request.embedding_dimensions,
|
|
496
499
|
)
|
|
497
500
|
if result.status == IndexStatus.EMPTY:
|
|
498
501
|
return SearchResponse(
|
|
@@ -570,6 +573,7 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
570
573
|
base_url=request.base_url,
|
|
571
574
|
api_key=request.api_key,
|
|
572
575
|
local_cuda=request.local_cuda,
|
|
576
|
+
embedding_dimensions=request.embedding_dimensions,
|
|
573
577
|
)
|
|
574
578
|
query_vector = None
|
|
575
579
|
query_hash = None
|
|
@@ -586,8 +590,10 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
586
590
|
query_vector = None
|
|
587
591
|
|
|
588
592
|
if query_vector is None and not request.no_cache:
|
|
589
|
-
query_text_hash = embedding_cache_key(request.query)
|
|
590
|
-
cached = load_embedding_cache(
|
|
593
|
+
query_text_hash = embedding_cache_key(request.query, dimension=request.embedding_dimensions)
|
|
594
|
+
cached = load_embedding_cache(
|
|
595
|
+
request.model_name, [query_text_hash], dimension=request.embedding_dimensions
|
|
596
|
+
)
|
|
591
597
|
query_vector = cached.get(query_text_hash)
|
|
592
598
|
if query_vector is not None and query_vector.size != file_vectors.shape[1]:
|
|
593
599
|
query_vector = None
|
|
@@ -596,11 +602,12 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
596
602
|
query_vector = searcher.embed_texts([request.query])[0]
|
|
597
603
|
if not request.no_cache:
|
|
598
604
|
if query_text_hash is None:
|
|
599
|
-
query_text_hash = embedding_cache_key(request.query)
|
|
605
|
+
query_text_hash = embedding_cache_key(request.query, dimension=request.embedding_dimensions)
|
|
600
606
|
try:
|
|
601
607
|
store_embedding_cache(
|
|
602
608
|
model=request.model_name,
|
|
603
609
|
embeddings={query_text_hash: query_vector},
|
|
610
|
+
dimension=request.embedding_dimensions,
|
|
604
611
|
)
|
|
605
612
|
except Exception: # pragma: no cover - best-effort cache storage
|
|
606
613
|
pass
|
|
@@ -624,6 +631,18 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
624
631
|
candidate_count = min(len(paths), candidate_limit)
|
|
625
632
|
|
|
626
633
|
query_vector = np.asarray(query_vector, dtype=np.float32).ravel()
|
|
634
|
+
|
|
635
|
+
# Validate dimension compatibility between query and index
|
|
636
|
+
index_dimension = file_vectors.shape[1] if file_vectors.ndim == 2 else 0
|
|
637
|
+
query_dimension = query_vector.shape[0]
|
|
638
|
+
if index_dimension != query_dimension:
|
|
639
|
+
raise ValueError(
|
|
640
|
+
f"Embedding dimension mismatch: index has {index_dimension}-dim vectors, "
|
|
641
|
+
f"but query embedding is {query_dimension}-dim. "
|
|
642
|
+
f"This typically happens when embedding_dimensions was changed after building the index. "
|
|
643
|
+
f"Rebuild the index with: vexor index {request.directory}"
|
|
644
|
+
)
|
|
645
|
+
|
|
627
646
|
similarities = np.asarray(file_vectors @ query_vector, dtype=np.float32)
|
|
628
647
|
top_indices = _top_indices(similarities, candidate_count)
|
|
629
648
|
chunk_meta_by_id: dict[int, dict] = {}
|
|
@@ -689,35 +708,22 @@ def perform_search(request: SearchRequest) -> SearchResponse:
|
|
|
689
708
|
)
|
|
690
709
|
|
|
691
710
|
|
|
692
|
-
def
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
paths
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
model_name=request.model_name,
|
|
702
|
-
batch_size=request.batch_size,
|
|
703
|
-
embed_concurrency=request.embed_concurrency,
|
|
704
|
-
extract_concurrency=request.extract_concurrency,
|
|
705
|
-
extract_backend=request.extract_backend,
|
|
706
|
-
provider=request.provider,
|
|
707
|
-
base_url=request.base_url,
|
|
708
|
-
api_key=request.api_key,
|
|
709
|
-
local_cuda=request.local_cuda,
|
|
710
|
-
exclude_patterns=request.exclude_patterns,
|
|
711
|
-
extensions=request.extensions,
|
|
712
|
-
no_cache=request.no_cache,
|
|
713
|
-
)
|
|
711
|
+
def search_from_vectors(
|
|
712
|
+
request: SearchRequest,
|
|
713
|
+
*,
|
|
714
|
+
paths: Sequence[Path],
|
|
715
|
+
file_vectors: np.ndarray,
|
|
716
|
+
metadata: dict,
|
|
717
|
+
is_stale: bool = False,
|
|
718
|
+
) -> SearchResponse:
|
|
719
|
+
"""Return ranked results from an in-memory index."""
|
|
714
720
|
|
|
715
721
|
if not len(paths):
|
|
716
722
|
return SearchResponse(
|
|
717
723
|
base_path=request.directory,
|
|
718
724
|
backend=None,
|
|
719
725
|
results=[],
|
|
720
|
-
is_stale=
|
|
726
|
+
is_stale=is_stale,
|
|
721
727
|
index_empty=True,
|
|
722
728
|
)
|
|
723
729
|
|
|
@@ -731,14 +737,17 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
|
|
|
731
737
|
base_url=request.base_url,
|
|
732
738
|
api_key=request.api_key,
|
|
733
739
|
local_cuda=request.local_cuda,
|
|
740
|
+
embedding_dimensions=request.embedding_dimensions,
|
|
734
741
|
)
|
|
735
742
|
query_vector = None
|
|
736
743
|
query_text_hash = None
|
|
737
744
|
if not request.no_cache:
|
|
738
745
|
from ..cache import embedding_cache_key, load_embedding_cache, store_embedding_cache
|
|
739
746
|
|
|
740
|
-
query_text_hash = embedding_cache_key(request.query)
|
|
741
|
-
cached = load_embedding_cache(
|
|
747
|
+
query_text_hash = embedding_cache_key(request.query, dimension=request.embedding_dimensions)
|
|
748
|
+
cached = load_embedding_cache(
|
|
749
|
+
request.model_name, [query_text_hash], dimension=request.embedding_dimensions
|
|
750
|
+
)
|
|
742
751
|
query_vector = cached.get(query_text_hash)
|
|
743
752
|
if query_vector is not None and query_vector.size != file_vectors.shape[1]:
|
|
744
753
|
query_vector = None
|
|
@@ -749,11 +758,12 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
|
|
|
749
758
|
if query_text_hash is None:
|
|
750
759
|
from ..cache import embedding_cache_key, store_embedding_cache
|
|
751
760
|
|
|
752
|
-
query_text_hash = embedding_cache_key(request.query)
|
|
761
|
+
query_text_hash = embedding_cache_key(request.query, dimension=request.embedding_dimensions)
|
|
753
762
|
try:
|
|
754
763
|
store_embedding_cache(
|
|
755
764
|
model=request.model_name,
|
|
756
765
|
embeddings={query_text_hash: query_vector},
|
|
766
|
+
dimension=request.embedding_dimensions,
|
|
757
767
|
)
|
|
758
768
|
except Exception: # pragma: no cover - best-effort cache storage
|
|
759
769
|
pass
|
|
@@ -767,6 +777,18 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
|
|
|
767
777
|
candidate_count = min(len(paths), candidate_limit)
|
|
768
778
|
|
|
769
779
|
query_vector = np.asarray(query_vector, dtype=np.float32).ravel()
|
|
780
|
+
|
|
781
|
+
# Validate dimension compatibility between query and index
|
|
782
|
+
index_dimension = file_vectors.shape[1] if file_vectors.ndim == 2 else 0
|
|
783
|
+
query_dimension = query_vector.shape[0]
|
|
784
|
+
if index_dimension != query_dimension:
|
|
785
|
+
raise ValueError(
|
|
786
|
+
f"Embedding dimension mismatch: index has {index_dimension}-dim vectors, "
|
|
787
|
+
f"but query embedding is {query_dimension}-dim. "
|
|
788
|
+
f"This typically happens when embedding_dimensions was changed after building the index. "
|
|
789
|
+
f"Rebuild the index with: vexor index {request.directory}"
|
|
790
|
+
)
|
|
791
|
+
|
|
770
792
|
similarities = np.asarray(file_vectors @ query_vector, dtype=np.float32)
|
|
771
793
|
top_indices = _top_indices(similarities, candidate_count)
|
|
772
794
|
chunk_entries = metadata.get("chunks", [])
|
|
@@ -813,12 +835,44 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
|
|
|
813
835
|
base_path=request.directory,
|
|
814
836
|
backend=searcher.device,
|
|
815
837
|
results=results,
|
|
816
|
-
is_stale=
|
|
838
|
+
is_stale=is_stale,
|
|
817
839
|
index_empty=False,
|
|
818
840
|
reranker=reranker,
|
|
819
841
|
)
|
|
820
842
|
|
|
821
843
|
|
|
844
|
+
def _perform_search_with_temporary_index(request: SearchRequest) -> SearchResponse:
|
|
845
|
+
from .index_service import build_index_in_memory # local import
|
|
846
|
+
|
|
847
|
+
paths, file_vectors, metadata = build_index_in_memory(
|
|
848
|
+
request.directory,
|
|
849
|
+
include_hidden=request.include_hidden,
|
|
850
|
+
respect_gitignore=request.respect_gitignore,
|
|
851
|
+
mode=request.mode,
|
|
852
|
+
recursive=request.recursive,
|
|
853
|
+
model_name=request.model_name,
|
|
854
|
+
batch_size=request.batch_size,
|
|
855
|
+
embed_concurrency=request.embed_concurrency,
|
|
856
|
+
extract_concurrency=request.extract_concurrency,
|
|
857
|
+
extract_backend=request.extract_backend,
|
|
858
|
+
provider=request.provider,
|
|
859
|
+
base_url=request.base_url,
|
|
860
|
+
api_key=request.api_key,
|
|
861
|
+
local_cuda=request.local_cuda,
|
|
862
|
+
exclude_patterns=request.exclude_patterns,
|
|
863
|
+
extensions=request.extensions,
|
|
864
|
+
no_cache=request.no_cache,
|
|
865
|
+
embedding_dimensions=request.embedding_dimensions,
|
|
866
|
+
)
|
|
867
|
+
return search_from_vectors(
|
|
868
|
+
request,
|
|
869
|
+
paths=paths,
|
|
870
|
+
file_vectors=file_vectors,
|
|
871
|
+
metadata=metadata,
|
|
872
|
+
is_stale=False,
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
|
|
822
876
|
def _load_index_vectors_for_request(
|
|
823
877
|
request: SearchRequest,
|
|
824
878
|
*,
|
|
@@ -845,6 +899,18 @@ def _load_index_vectors_for_request(
|
|
|
845
899
|
request.extensions,
|
|
846
900
|
respect_gitignore=request.respect_gitignore,
|
|
847
901
|
)
|
|
902
|
+
# Check dimension compatibility when user explicitly requests a specific dimension
|
|
903
|
+
cached_dimension = metadata.get("dimension")
|
|
904
|
+
requested_dimension = request.embedding_dimensions
|
|
905
|
+
if (
|
|
906
|
+
cached_dimension is not None
|
|
907
|
+
and requested_dimension is not None
|
|
908
|
+
and cached_dimension != requested_dimension
|
|
909
|
+
):
|
|
910
|
+
raise FileNotFoundError(
|
|
911
|
+
f"Cached index has dimension {cached_dimension}, "
|
|
912
|
+
f"but requested {requested_dimension}"
|
|
913
|
+
)
|
|
848
914
|
return (
|
|
849
915
|
paths,
|
|
850
916
|
file_vectors,
|
|
@@ -915,6 +981,15 @@ def _select_cache_superset(
|
|
|
915
981
|
continue
|
|
916
982
|
if entry.get("mode") != request.mode:
|
|
917
983
|
continue
|
|
984
|
+
# Check embedding dimension compatibility when user explicitly requests a specific dimension
|
|
985
|
+
cached_dimension = entry.get("dimension")
|
|
986
|
+
requested_dimension = request.embedding_dimensions
|
|
987
|
+
if (
|
|
988
|
+
cached_dimension is not None
|
|
989
|
+
and requested_dimension is not None
|
|
990
|
+
and cached_dimension != requested_dimension
|
|
991
|
+
):
|
|
992
|
+
continue
|
|
918
993
|
cached_excludes = tuple(entry.get("exclude_patterns") or ())
|
|
919
994
|
cached_exclude_set = set(normalize_exclude_patterns(cached_excludes))
|
|
920
995
|
if requested_exclude_set:
|
vexor/text.py
CHANGED
|
@@ -61,7 +61,9 @@ class Messages:
|
|
|
61
61
|
HELP_SET_EMBED_CONCURRENCY = "Set the number of concurrent embedding requests."
|
|
62
62
|
HELP_SET_EXTRACT_CONCURRENCY = "Set the number of concurrent file extraction workers."
|
|
63
63
|
HELP_SET_EXTRACT_BACKEND = "Set the extraction backend (auto, thread, process)."
|
|
64
|
-
HELP_SET_PROVIDER =
|
|
64
|
+
HELP_SET_PROVIDER = (
|
|
65
|
+
"Set the default embedding provider (e.g., openai, gemini, voyageai, custom, or local)."
|
|
66
|
+
)
|
|
65
67
|
HELP_SET_BASE_URL = "Override the provider's base URL (leave unset for official endpoints)."
|
|
66
68
|
HELP_CLEAR_BASE_URL = "Remove the custom base URL override."
|
|
67
69
|
HELP_SET_AUTO_INDEX = "Enable/disable automatic indexing before search (default: enabled)."
|
|
@@ -90,6 +92,11 @@ class Messages:
|
|
|
90
92
|
HELP_LOCAL_CLEANUP = "Delete the local model cache stored under ~/.vexor/models."
|
|
91
93
|
HELP_LOCAL_CUDA = "Enable CUDA for local embedding (requires onnxruntime-gpu)."
|
|
92
94
|
HELP_LOCAL_CPU = "Disable CUDA and use CPU for local embedding."
|
|
95
|
+
HELP_SET_EMBEDDING_DIMENSIONS = (
|
|
96
|
+
"Set the embedding dimensions for providers that support it "
|
|
97
|
+
"(e.g., Voyage AI: 256, 512, 1024, 2048; OpenAI text-embedding-3: 256-3072)."
|
|
98
|
+
)
|
|
99
|
+
HELP_CLEAR_EMBEDDING_DIMENSIONS = "Clear the embedding dimensions setting (use model default)."
|
|
93
100
|
|
|
94
101
|
ERROR_API_KEY_MISSING = (
|
|
95
102
|
"API key is missing or still set to the placeholder. "
|
|
@@ -176,13 +183,16 @@ class Messages:
|
|
|
176
183
|
INIT_OPTION_PROVIDER_OPENAI_DESC = "default"
|
|
177
184
|
INIT_OPTION_PROVIDER_GEMINI = "Gemini"
|
|
178
185
|
INIT_OPTION_PROVIDER_GEMINI_DESC = "Google AI"
|
|
186
|
+
INIT_OPTION_PROVIDER_VOYAGEAI = "Voyage AI"
|
|
187
|
+
INIT_OPTION_PROVIDER_VOYAGEAI_DESC = "embedding API"
|
|
179
188
|
INIT_OPTION_PROVIDER_CUSTOM = "Custom"
|
|
180
189
|
INIT_OPTION_PROVIDER_CUSTOM_DESC = "OpenAI-compatible"
|
|
181
|
-
INIT_PROMPT_PROVIDER = "Choose A/B/C"
|
|
190
|
+
INIT_PROMPT_PROVIDER = "Choose A/B/C/D"
|
|
182
191
|
INIT_PROMPT_CUSTOM_BASE_URL = "Custom base URL"
|
|
183
192
|
INIT_PROMPT_CUSTOM_MODEL = "Custom model name"
|
|
184
193
|
INIT_PROMPT_API_KEY_OPENAI = "OpenAI API key (leave blank to use environment variables)"
|
|
185
194
|
INIT_PROMPT_API_KEY_GEMINI = "Gemini API key (leave blank to use environment variables)"
|
|
195
|
+
INIT_PROMPT_API_KEY_VOYAGE = "Voyage API key (leave blank to use environment variables)"
|
|
186
196
|
INIT_PROMPT_API_KEY_CUSTOM = "API key (leave blank to use environment variables)"
|
|
187
197
|
INIT_CONFIRM_SKIP_API_KEY = "No API key found. Continue without it?"
|
|
188
198
|
INIT_USING_ENV_API_KEY = "Using API key from environment."
|
|
@@ -293,6 +303,9 @@ class Messages:
|
|
|
293
303
|
INFO_LOCAL_SETUP_DONE = "Local model ready: {model}. Provider set to local."
|
|
294
304
|
INFO_LOCAL_CUDA_ENABLED = "Local embeddings will use CUDA."
|
|
295
305
|
INFO_LOCAL_CUDA_DISABLED = "Local embeddings will use CPU."
|
|
306
|
+
INFO_EMBEDDING_DIMENSIONS_SET = "Embedding dimensions set to {value}."
|
|
307
|
+
INFO_EMBEDDING_DIMENSIONS_CLEARED = "Embedding dimensions cleared (using model default)."
|
|
308
|
+
ERROR_EMBEDDING_DIMENSIONS_INVALID = "Embedding dimensions must be a positive integer."
|
|
296
309
|
INFO_FLASHRANK_SETUP_START = "Preparing FlashRank model..."
|
|
297
310
|
INFO_FLASHRANK_SETUP_DONE = "FlashRank model ready."
|
|
298
311
|
DOCTOR_LOCAL_CUDA_MISSING = "CUDA provider not available for local embeddings"
|
|
@@ -314,6 +327,7 @@ class Messages:
|
|
|
314
327
|
"API key set: {api}\n"
|
|
315
328
|
"Default provider: {provider}\n"
|
|
316
329
|
"Default model: {model}\n"
|
|
330
|
+
"Embedding dimensions: {embedding_dimensions}\n"
|
|
317
331
|
"Default batch size: {batch}\n"
|
|
318
332
|
"Embedding concurrency: {concurrency}\n"
|
|
319
333
|
"Extract concurrency: {extract_concurrency}\n"
|
|
@@ -350,7 +364,7 @@ class Messages:
|
|
|
350
364
|
DOCTOR_API_KEY_MISSING = "API key not configured"
|
|
351
365
|
DOCTOR_API_KEY_MISSING_DETAIL = (
|
|
352
366
|
"Run `vexor config --set-api-key <KEY>` or set VEXOR_API_KEY / "
|
|
353
|
-
"OPENAI_API_KEY / GOOGLE_GENAI_API_KEY environment variable."
|
|
367
|
+
"OPENAI_API_KEY / GOOGLE_GENAI_API_KEY / VOYAGE_API_KEY environment variable."
|
|
354
368
|
)
|
|
355
369
|
DOCTOR_API_KEY_NOT_REQUIRED = "Local provider selected (no API key required)"
|
|
356
370
|
DOCTOR_API_SKIPPED = "Skipped (no API key)"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vexor
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.23.0rc1
|
|
4
4
|
Summary: A vector-powered CLI for semantic search over files.
|
|
5
5
|
Project-URL: Repository, https://github.com/scarletkc/vexor
|
|
6
6
|
Author: scarletkc
|
|
@@ -22,7 +22,7 @@ Classifier: Topic :: Text Processing :: Indexing
|
|
|
22
22
|
Classifier: Topic :: Utilities
|
|
23
23
|
Requires-Python: >=3.9
|
|
24
24
|
Requires-Dist: charset-normalizer>=3.3.0
|
|
25
|
-
Requires-Dist: google-genai>=
|
|
25
|
+
Requires-Dist: google-genai>=1.57.0
|
|
26
26
|
Requires-Dist: numpy>=1.23.0
|
|
27
27
|
Requires-Dist: openai>=1.0.0
|
|
28
28
|
Requires-Dist: pathspec>=0.12.1
|
|
@@ -64,6 +64,7 @@ Description-Content-Type: text/markdown
|
|
|
64
64
|
[](https://github.com/scarletkc/vexor/actions/workflows/publish.yml)
|
|
65
65
|
[](https://codecov.io/github/scarletkc/vexor)
|
|
66
66
|
[](https://github.com/scarletkc/vexor/blob/main/LICENSE)
|
|
67
|
+
[](https://deepwiki.com/scarletkc/vexor)
|
|
67
68
|
|
|
68
69
|
</div>
|
|
69
70
|
|
|
@@ -76,6 +77,13 @@ It supports configurable embedding and reranking providers, and exposes the same
|
|
|
76
77
|
Vexor Demo Video
|
|
77
78
|
</video>
|
|
78
79
|
|
|
80
|
+
## Featured In
|
|
81
|
+
|
|
82
|
+
Vexor has been recognized and featured by the community:
|
|
83
|
+
|
|
84
|
+
- **[Ruan Yifeng's Weekly (Issue #379)](https://github.com/ruanyf/weekly/blob/master/docs/issue-379.md#ai-%E7%9B%B8%E5%85%B3)** - A leading tech newsletter in the Chinese developer community.
|
|
85
|
+
- **[Awesome Claude Skills](https://github.com/VoltAgent/awesome-claude-skills?tab=readme-ov-file#development-and-testing)** - Curated list of best-in-class skills for AI agents.
|
|
86
|
+
|
|
79
87
|
## Why Vexor?
|
|
80
88
|
|
|
81
89
|
When you remember what a file *does* but forget its name or location, Vexor finds it instantly—no grep patterns or directory traversal needed.
|
|
@@ -164,12 +172,15 @@ Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/ve
|
|
|
164
172
|
## Configuration
|
|
165
173
|
|
|
166
174
|
```bash
|
|
167
|
-
vexor config --set-provider openai # default; also supports gemini/custom/local
|
|
175
|
+
vexor config --set-provider openai # default; also supports gemini/voyageai/custom/local
|
|
168
176
|
vexor config --set-model text-embedding-3-small
|
|
177
|
+
vexor config --set-provider voyageai # uses voyage defaults when model/base_url are unset
|
|
169
178
|
vexor config --set-batch-size 0 # 0 = single request
|
|
170
179
|
vexor config --set-embed-concurrency 4 # parallel embedding requests
|
|
171
180
|
vexor config --set-extract-concurrency 4 # parallel file extraction workers
|
|
172
181
|
vexor config --set-extract-backend auto # auto|thread|process (default: auto)
|
|
182
|
+
vexor config --set-embedding-dimensions 1024 # optional, model/provider dependent
|
|
183
|
+
vexor config --clear-embedding-dimensions # reset to model default dimension
|
|
173
184
|
vexor config --set-auto-index true # auto-index before search (default)
|
|
174
185
|
vexor config --rerank bm25 # optional BM25 rerank for top-k results
|
|
175
186
|
vexor config --rerank flashrank # FlashRank rerank (requires optional extra)
|
|
@@ -195,7 +206,7 @@ Config stored in `~/.vexor/config.json`.
|
|
|
195
206
|
```bash
|
|
196
207
|
vexor config --set-api-key "YOUR_KEY"
|
|
197
208
|
```
|
|
198
|
-
Or via environment: `VEXOR_API_KEY`, `OPENAI_API_KEY`, or `
|
|
209
|
+
Or via environment: `VEXOR_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_GENAI_API_KEY`, or `VOYAGE_API_KEY`.
|
|
199
210
|
|
|
200
211
|
### Rerank
|
|
201
212
|
|
|
@@ -215,11 +226,30 @@ Recommended defaults:
|
|
|
215
226
|
|
|
216
227
|
### Providers: Remote vs Local
|
|
217
228
|
|
|
218
|
-
Vexor supports both remote API providers (`openai`, `gemini`, `custom`) and a local provider (`local`):
|
|
229
|
+
Vexor supports both remote API providers (`openai`, `gemini`, `voyageai`, `custom`) and a local provider (`local`):
|
|
219
230
|
- Remote providers use `api_key` and optional `base_url`.
|
|
231
|
+
- `voyageai` defaults to `https://api.voyageai.com/v1` when `base_url` is not set.
|
|
220
232
|
- `custom` is OpenAI-compatible and requires both `model` and `base_url`.
|
|
221
233
|
- Local provider ignores `api_key/base_url` and only uses `model` plus `local_cuda` (CPU/GPU switch).
|
|
222
234
|
|
|
235
|
+
### Embedding Dimensions
|
|
236
|
+
|
|
237
|
+
Embedding dimensions are optional. If unset, the provider/model default is used.
|
|
238
|
+
Custom dimensions are validated for:
|
|
239
|
+
- OpenAI `text-embedding-3-*`
|
|
240
|
+
- Voyage `voyage-3*` and `voyage-code-3*`
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
vexor config --set-embedding-dimensions 1024
|
|
244
|
+
vexor config --clear-embedding-dimensions
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
If you change dimensions after an index is built, rebuild the index:
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
vexor index --path .
|
|
251
|
+
```
|
|
252
|
+
|
|
223
253
|
### Local Model (Offline)
|
|
224
254
|
|
|
225
255
|
Install the lightweight local backend:
|
|
@@ -315,8 +345,14 @@ Porcelain output fields: `rank`, `similarity`, `path`, `chunk_index`, `start_lin
|
|
|
315
345
|
|
|
316
346
|
See [docs](https://github.com/scarletkc/vexor/tree/main/docs) for more details.
|
|
317
347
|
|
|
348
|
+
## Contributing
|
|
349
|
+
|
|
318
350
|
Contributions, issues, and PRs welcome! Star if you find it helpful.
|
|
319
351
|
|
|
352
|
+
## Star History
|
|
353
|
+
|
|
354
|
+
[](https://www.star-history.com/#scarletkc/vexor&type=date&legend=top-left)
|
|
355
|
+
|
|
320
356
|
## License
|
|
321
357
|
|
|
322
358
|
[MIT](http://github.com/scarletkc/vexor/blob/main/LICENSE)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
vexor/__init__.py,sha256=B7BRcp5NDsgB_zUnP_wghH_Orw5RZ-VTRWam3R7It6Q,635
|
|
2
|
+
vexor/__main__.py,sha256=ZFzom1wCfP6TPXe3aoDFpNcUgjbCZ7Quy_vfzNsH5Fw,426
|
|
3
|
+
vexor/api.py,sha256=VKTTKbp1I82eZ3LT-J6PI9WOoaKSF1cgGIPwq5w2BYE,38137
|
|
4
|
+
vexor/cache.py,sha256=cjnYUWp0sYnlviSRMTopmQlepDEk8_4tlwf7SnwX_ck,58766
|
|
5
|
+
vexor/cli.py,sha256=PCUIY4LF-xiifuH9D82CuLLq9tNs9uRNfAgcSap2gLw,70895
|
|
6
|
+
vexor/config.py,sha256=DiXZiUsZ1hidIQ5LI-2Fik6qpGRmK6Eh7h0DugT6oCM,24303
|
|
7
|
+
vexor/modes.py,sha256=N_wAWoqbxmCfko-v520p59tpAYvUwraCSSQRtMaF4ac,11549
|
|
8
|
+
vexor/output.py,sha256=iooZgLlK8dh7ajJ4XMHUNNx0qyTVtD_OAAwrBx5MeqE,864
|
|
9
|
+
vexor/search.py,sha256=fWzbTcRhWGaMkOwjWMGUUSoJLTvMm6472Yhij-VWeZE,7593
|
|
10
|
+
vexor/text.py,sha256=VM7CQH8f1tqn_N19l4ycjAekVMxi002Y5Qsfp3EcDD0,25655
|
|
11
|
+
vexor/utils.py,sha256=GzfYW2rz1-EuJjkevqZVe8flLRtrQ60OWMmFNbMh62k,12472
|
|
12
|
+
vexor/providers/__init__.py,sha256=kCEoV03TSLKcxDUYVNjXnrVoLU5NpfNXjp1w1Ak2imE,92
|
|
13
|
+
vexor/providers/gemini.py,sha256=IWHHjCMJC0hUHQPhuaJ_L_97c_mnOXkPkCVdrIR6z-g,5705
|
|
14
|
+
vexor/providers/local.py,sha256=5X_WYCXgyBGIVvvVLgMnDjTkPR4GBF0ksNPyviBlB7w,4838
|
|
15
|
+
vexor/providers/openai.py,sha256=uGI2qYc8BixxL58s6yQHr7OuocBhpLG4m93C4Y2JYYg,5994
|
|
16
|
+
vexor/services/__init__.py,sha256=dA_i2N03vlYmbZbEK2knzJLWviunkNWbzN2LWPNvMk0,160
|
|
17
|
+
vexor/services/cache_service.py,sha256=ywt6AgupCJ7_wC3je4znCMw5_VBouw3skbDTAt8xw6o,1639
|
|
18
|
+
vexor/services/config_service.py,sha256=uU-jd-H18GW6R0-AJSoCXFYJ1vRqf28YzwZRjn1-S9E,6159
|
|
19
|
+
vexor/services/content_extract_service.py,sha256=oO7Hbadwp3uiyqCbr_4MRXQsUeMix2D98i-Yp94PwFk,26495
|
|
20
|
+
vexor/services/index_service.py,sha256=iyIP8ZcqjwjPbQOcGiJLYTFMIsX1CxswhHC9v6c-W00,34940
|
|
21
|
+
vexor/services/init_service.py,sha256=9foGfQqLK-iomEjH820yMue3AQeveLJzQtaQA4nvYRo,27217
|
|
22
|
+
vexor/services/js_parser.py,sha256=eRtW6KlK4JBYDGbyoecHVqLZ0hcx-Cc0kx6bOujHPAQ,16254
|
|
23
|
+
vexor/services/keyword_service.py,sha256=vmke8tII9kTwRDdBaLHBc6Hpy_B3p98L65iGkCQgtMU,2211
|
|
24
|
+
vexor/services/search_service.py,sha256=HsN0QpGc7yWIj-xbuQjwUZp8PbQKGgR4_0aJTI_tsR0,41948
|
|
25
|
+
vexor/services/skill_service.py,sha256=Rrgt3OMsKPPiXOiRhSNAWjBM9UNz9qmSWQe3uYGzq4M,4863
|
|
26
|
+
vexor/services/system_service.py,sha256=KPlv83v3rTvBiNiH7vrp6tDmt_AqHxuUd-5RI0TfvWs,24638
|
|
27
|
+
vexor/_bundled_skills/vexor-cli/SKILL.md,sha256=m3FlyqgHBdRwyGPEp8PrUS21K0G2jEl88tRvhSPta08,2798
|
|
28
|
+
vexor/_bundled_skills/vexor-cli/references/install-vexor.md,sha256=IUBShLI1mAxugwUIMAJQ5_j6KcaPWfobe0gSd6MWU7w,1245
|
|
29
|
+
vexor-0.23.0rc1.dist-info/METADATA,sha256=HRGdeudEBPih9In7KEm-kWCJNDOUcq_cmZOZlXsnTGc,15048
|
|
30
|
+
vexor-0.23.0rc1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
31
|
+
vexor-0.23.0rc1.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
|
|
32
|
+
vexor-0.23.0rc1.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
|
|
33
|
+
vexor-0.23.0rc1.dist-info/RECORD,,
|
vexor-0.21.1.dist-info/RECORD
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
vexor/__init__.py,sha256=Ab63nROf2nbDW-xY4wuNU_DS0K8hsqfPa1KjvCaKJzA,441
|
|
2
|
-
vexor/__main__.py,sha256=ZFzom1wCfP6TPXe3aoDFpNcUgjbCZ7Quy_vfzNsH5Fw,426
|
|
3
|
-
vexor/api.py,sha256=YCHpiydbPbRJUqdQYrpwe1JrRI-w_7LRuyZDGBP1_d4,11506
|
|
4
|
-
vexor/cache.py,sha256=20SaiBKkPJIDXHtflX6uHiQXI4DtD6wx7RtWbz2l6LU,54339
|
|
5
|
-
vexor/cli.py,sha256=M9GKdD_mJ068Zpm62znTp0KhhKp1dkh_WHmfJHR9hwU,68094
|
|
6
|
-
vexor/config.py,sha256=CiPfEH7Ilt6XepEx4p02qfW5HfkpNDBjhEMyckbSWaA,17413
|
|
7
|
-
vexor/modes.py,sha256=N_wAWoqbxmCfko-v520p59tpAYvUwraCSSQRtMaF4ac,11549
|
|
8
|
-
vexor/output.py,sha256=iooZgLlK8dh7ajJ4XMHUNNx0qyTVtD_OAAwrBx5MeqE,864
|
|
9
|
-
vexor/search.py,sha256=MSU4RmH6waFYOofkIdo8_ElTiz1oNaKuvr-3umif7Bs,6826
|
|
10
|
-
vexor/text.py,sha256=2aK5nJHkosmbmyzp9o_Tzb3YlmVnju_IX8BcEPUdhTA,24794
|
|
11
|
-
vexor/utils.py,sha256=GzfYW2rz1-EuJjkevqZVe8flLRtrQ60OWMmFNbMh62k,12472
|
|
12
|
-
vexor/providers/__init__.py,sha256=kCEoV03TSLKcxDUYVNjXnrVoLU5NpfNXjp1w1Ak2imE,92
|
|
13
|
-
vexor/providers/gemini.py,sha256=IWHHjCMJC0hUHQPhuaJ_L_97c_mnOXkPkCVdrIR6z-g,5705
|
|
14
|
-
vexor/providers/local.py,sha256=5X_WYCXgyBGIVvvVLgMnDjTkPR4GBF0ksNPyviBlB7w,4838
|
|
15
|
-
vexor/providers/openai.py,sha256=YnJDY9gJW7RfGGdkgswVHvmOKNvgLRQUsbpA1MUuLPg,5356
|
|
16
|
-
vexor/services/__init__.py,sha256=dA_i2N03vlYmbZbEK2knzJLWviunkNWbzN2LWPNvMk0,160
|
|
17
|
-
vexor/services/cache_service.py,sha256=ywt6AgupCJ7_wC3je4znCMw5_VBouw3skbDTAt8xw6o,1639
|
|
18
|
-
vexor/services/config_service.py,sha256=PojolfbSKh9pW8slF4qxCOs9hz5L6xvjf_nB7vfVlsU,5039
|
|
19
|
-
vexor/services/content_extract_service.py,sha256=zdhLxpNv70BU7irLf3Uc0ou9rKSvdjtrDcHkgRKlMn4,26421
|
|
20
|
-
vexor/services/index_service.py,sha256=FXf1bBoqj4-K1l38ItxHf6Oh7QHVIdNAdVY2kg_Zoq8,32265
|
|
21
|
-
vexor/services/init_service.py,sha256=3D04hylGA9FRQhLHCfR95nMko3vb5MNBcRb9nWWaUE8,26863
|
|
22
|
-
vexor/services/js_parser.py,sha256=eRtW6KlK4JBYDGbyoecHVqLZ0hcx-Cc0kx6bOujHPAQ,16254
|
|
23
|
-
vexor/services/keyword_service.py,sha256=vmke8tII9kTwRDdBaLHBc6Hpy_B3p98L65iGkCQgtMU,2211
|
|
24
|
-
vexor/services/search_service.py,sha256=K7SiAuMA7bGeyPWOHPMKpFFvzzkj5kHWwa3p94NakJs,38663
|
|
25
|
-
vexor/services/skill_service.py,sha256=Rrgt3OMsKPPiXOiRhSNAWjBM9UNz9qmSWQe3uYGzq4M,4863
|
|
26
|
-
vexor/services/system_service.py,sha256=KPlv83v3rTvBiNiH7vrp6tDmt_AqHxuUd-5RI0TfvWs,24638
|
|
27
|
-
vexor/_bundled_skills/vexor-cli/SKILL.md,sha256=m3FlyqgHBdRwyGPEp8PrUS21K0G2jEl88tRvhSPta08,2798
|
|
28
|
-
vexor/_bundled_skills/vexor-cli/references/install-vexor.md,sha256=IUBShLI1mAxugwUIMAJQ5_j6KcaPWfobe0gSd6MWU7w,1245
|
|
29
|
-
vexor-0.21.1.dist-info/METADATA,sha256=jS_xdqPXD8WsDNKd684w5eHmj_f1CHvNMR-DY-MvBQg,13494
|
|
30
|
-
vexor-0.21.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
31
|
-
vexor-0.21.1.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
|
|
32
|
-
vexor-0.21.1.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
|
|
33
|
-
vexor-0.21.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|