PyPI - vexor - Versions diffs - 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

vexor 0.21.1py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

vexor/__init__.py +17 -2
vexor/api.py +796 -86
vexor/cache.py +101 -9
vexor/config.py +47 -6
vexor/services/content_extract_service.py +6 -0
vexor/services/search_service.py +42 -24
{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/METADATA +14 -1
{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/RECORD +11 -11
{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/WHEEL +0 -0
{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/entry_points.txt +0 -0
{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/licenses/LICENSE +0 -0

vexor/cache.py CHANGED Viewed

@@ -5,9 +5,13 @@ from __future__ import annotations
 import hashlib
 import os
 import sqlite3
+from collections import OrderedDict
 from dataclasses import dataclass
+from contextlib import contextmanager
+from contextvars import ContextVar
 from datetime import datetime, timezone, timedelta
 from pathlib import Path
+from threading import Lock
 from typing import Iterable, Mapping, Sequence
 import numpy as np
@@ -16,10 +20,18 @@ from .utils import collect_files
 DEFAULT_CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
 CACHE_DIR = DEFAULT_CACHE_DIR
+_CACHE_DIR_OVERRIDE: ContextVar[Path | None] = ContextVar(
+    "vexor_cache_dir_override",
+    default=None,
+)
 CACHE_VERSION = 6
 DB_FILENAME = "index.db"
 EMBED_CACHE_TTL_DAYS = 30
 EMBED_CACHE_MAX_ENTRIES = 50_000
+EMBED_MEMORY_CACHE_MAX_ENTRIES = 2_048
+_EMBED_MEMORY_CACHE: "OrderedDict[tuple[str, str], np.ndarray]" = OrderedDict()
+_EMBED_MEMORY_LOCK = Lock()
 @dataclass(slots=True)
@@ -84,6 +96,55 @@ def embedding_cache_key(text: str) -> str:
     return hashlib.sha1(clean_text.encode("utf-8")).hexdigest()
+def _clear_embedding_memory_cache() -> None:
+    if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0:
+        return
+    with _EMBED_MEMORY_LOCK:
+        _EMBED_MEMORY_CACHE.clear()
+def _load_embedding_memory_cache(
+    model: str,
+    text_hashes: Sequence[str],
+) -> dict[str, np.ndarray]:
+    if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0:
+        return {}
+    results: dict[str, np.ndarray] = {}
+    with _EMBED_MEMORY_LOCK:
+        for text_hash in text_hashes:
+            if not text_hash:
+                continue
+            key = (model, text_hash)
+            vector = _EMBED_MEMORY_CACHE.pop(key, None)
+            if vector is None:
+                continue
+            _EMBED_MEMORY_CACHE[key] = vector
+            results[text_hash] = vector
+    return results
+def _store_embedding_memory_cache(
+    *,
+    model: str,
+    embeddings: Mapping[str, np.ndarray],
+) -> None:
+    if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0 or not embeddings:
+        return
+    with _EMBED_MEMORY_LOCK:
+        for text_hash, vector in embeddings.items():
+            if not text_hash:
+                continue
+            array = np.asarray(vector, dtype=np.float32)
+            if array.size == 0:
+                continue
+            key = (model, text_hash)
+            if key in _EMBED_MEMORY_CACHE:
+                _EMBED_MEMORY_CACHE.pop(key, None)
+            _EMBED_MEMORY_CACHE[key] = array
+        while len(_EMBED_MEMORY_CACHE) > EMBED_MEMORY_CACHE_MAX_ENTRIES:
+            _EMBED_MEMORY_CACHE.popitem(last=False)
 def _serialize_extensions(extensions: Sequence[str] | None) -> str:
     if not extensions:
         return ""
@@ -115,9 +176,32 @@ def _chunk_values(values: Sequence[object], size: int) -> Iterable[Sequence[obje
         yield values[idx : idx + size]
+def _resolve_cache_dir() -> Path:
+    override = _CACHE_DIR_OVERRIDE.get()
+    return override if override is not None else CACHE_DIR
+@contextmanager
+def cache_dir_context(path: Path | str | None):
+    """Temporarily override the cache directory for the current context."""
+    if path is None:
+        yield
+        return
+    dir_path = Path(path).expanduser().resolve()
+    if dir_path.exists() and not dir_path.is_dir():
+        raise NotADirectoryError(f"Path is not a directory: {dir_path}")
+    token = _CACHE_DIR_OVERRIDE.set(dir_path)
+    try:
+        yield
+    finally:
+        _CACHE_DIR_OVERRIDE.reset(token)
 def ensure_cache_dir() -> Path:
-    CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    return CACHE_DIR
+    cache_dir = _resolve_cache_dir()
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    return cache_dir
 def set_cache_dir(path: Path | str | None) -> None:
@@ -134,8 +218,8 @@ def set_cache_dir(path: Path | str | None) -> None:
 def cache_db_path() -> Path:
     """Return the absolute path to the shared SQLite cache database."""
-    ensure_cache_dir()
-    return CACHE_DIR / DB_FILENAME
+    cache_dir = ensure_cache_dir()
+    return cache_dir / DB_FILENAME
 def cache_file(root: Path, model: str, include_hidden: bool) -> Path:  # pragma: no cover - kept for API parity
@@ -1310,19 +1394,23 @@ def load_embedding_cache(
     unique_hashes = list(dict.fromkeys([value for value in text_hashes if value]))
     if not unique_hashes:
         return {}
+    results = _load_embedding_memory_cache(model, unique_hashes)
+    missing = [value for value in unique_hashes if value not in results]
+    if not missing:
+        return results
     db_path = cache_db_path()
     owns_connection = conn is None
     try:
         connection = conn or _connect(db_path, readonly=True)
     except sqlite3.OperationalError:
-        return {}
+        return results
     try:
         try:
             _ensure_schema_readonly(connection, tables=("embedding_cache",))
         except sqlite3.OperationalError:
-            return {}
-        results: dict[str, np.ndarray] = {}
-        for chunk in _chunk_values(unique_hashes, 900):
+            return results
+        disk_results: dict[str, np.ndarray] = {}
+        for chunk in _chunk_values(missing, 900):
             placeholders = ", ".join("?" for _ in chunk)
             rows = connection.execute(
                 f"""
@@ -1339,7 +1427,10 @@ def load_embedding_cache(
                 vector = np.frombuffer(blob, dtype=np.float32)
                 if vector.size == 0:
                     continue
-                results[row["text_hash"]] = vector
+                disk_results[row["text_hash"]] = vector
+        if disk_results:
+            _store_embedding_memory_cache(model=model, embeddings=disk_results)
+            results.update(disk_results)
         return results
     finally:
         if owns_connection:
@@ -1356,6 +1447,7 @@ def store_embedding_cache(
     if not embeddings:
         return
+    _store_embedding_memory_cache(model=model, embeddings=embeddings)
     db_path = cache_db_path()
     owns_connection = conn is None
     connection = conn or _connect(db_path)

vexor/config.py CHANGED Viewed

@@ -5,6 +5,8 @@ from __future__ import annotations
 import json
 import os
 from dataclasses import dataclass
+from contextlib import contextmanager
+from contextvars import ContextVar
 from collections.abc import Mapping
 from pathlib import Path
 from typing import Any, Dict
@@ -15,6 +17,10 @@ from .text import Messages
 DEFAULT_CONFIG_DIR = Path(os.path.expanduser("~")) / ".vexor"
 CONFIG_DIR = DEFAULT_CONFIG_DIR
 CONFIG_FILE = CONFIG_DIR / "config.json"
+_CONFIG_DIR_OVERRIDE: ContextVar[Path | None] = ContextVar(
+    "vexor_config_dir_override",
+    default=None,
+)
 DEFAULT_MODEL = "text-embedding-3-small"
 DEFAULT_GEMINI_MODEL = "gemini-embedding-001"
 DEFAULT_LOCAL_MODEL = "intfloat/multilingual-e5-small"
@@ -74,10 +80,40 @@ def _parse_remote_rerank(raw: object) -> RemoteRerankConfig | None:
     )
+def _resolve_config_dir() -> Path:
+    override = _CONFIG_DIR_OVERRIDE.get()
+    return override if override is not None else CONFIG_DIR
+def _resolve_config_file() -> Path:
+    override = _CONFIG_DIR_OVERRIDE.get()
+    if override is not None:
+        return override / "config.json"
+    return CONFIG_FILE
+@contextmanager
+def config_dir_context(path: Path | str | None):
+    """Temporarily override the config directory for the current context."""
+    if path is None:
+        yield
+        return
+    dir_path = Path(path).expanduser().resolve()
+    if dir_path.exists() and not dir_path.is_dir():
+        raise NotADirectoryError(f"Path is not a directory: {dir_path}")
+    token = _CONFIG_DIR_OVERRIDE.set(dir_path)
+    try:
+        yield
+    finally:
+        _CONFIG_DIR_OVERRIDE.reset(token)
 def load_config() -> Config:
-    if not CONFIG_FILE.exists():
+    config_file = _resolve_config_file()
+    if not config_file.exists():
         return Config()
-    raw = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
+    raw = json.loads(config_file.read_text(encoding="utf-8"))
     rerank = (raw.get("rerank") or DEFAULT_RERANK).strip().lower()
     if rerank not in SUPPORTED_RERANKERS:
         rerank = DEFAULT_RERANK
@@ -101,7 +137,8 @@ def load_config() -> Config:
 def save_config(config: Config) -> None:
-    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    config_dir = _resolve_config_dir()
+    config_dir.mkdir(parents=True, exist_ok=True)
     data: Dict[str, Any] = {}
     if config.api_key:
         data["api_key"] = config.api_key
@@ -130,15 +167,19 @@ def save_config(config: Config) -> None:
             remote_data["model"] = config.remote_rerank.model
         if remote_data:
             data["remote_rerank"] = remote_data
-    CONFIG_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+    config_file = _resolve_config_file()
+    config_file.write_text(
+        json.dumps(data, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
 def local_model_dir() -> Path:
-    return CONFIG_DIR / "models"
+    return _resolve_config_dir() / "models"
 def flashrank_cache_dir(*, create: bool = True) -> Path:
-    cache_dir = CONFIG_DIR / "flashrank"
+    cache_dir = _resolve_config_dir() / "flashrank"
     if create:
         cache_dir.mkdir(parents=True, exist_ok=True)
     return cache_dir

vexor/services/content_extract_service.py CHANGED Viewed

@@ -108,6 +108,12 @@ TEXT_EXTENSIONS = (
     ".vb",
     ".ps1",
     ".bash",
+    ".zsh",
+    ".fish",
+    ".vue",
+    ".jsx",
+    ".tsx",
+    ".scss",
 )

vexor/services/search_service.py CHANGED Viewed

@@ -689,35 +689,22 @@ def perform_search(request: SearchRequest) -> SearchResponse:
     )
-def _perform_search_with_temporary_index(request: SearchRequest) -> SearchResponse:
-    from .index_service import build_index_in_memory  # local import
-    paths, file_vectors, metadata = build_index_in_memory(
-        request.directory,
-        include_hidden=request.include_hidden,
-        respect_gitignore=request.respect_gitignore,
-        mode=request.mode,
-        recursive=request.recursive,
-        model_name=request.model_name,
-        batch_size=request.batch_size,
-        embed_concurrency=request.embed_concurrency,
-        extract_concurrency=request.extract_concurrency,
-        extract_backend=request.extract_backend,
-        provider=request.provider,
-        base_url=request.base_url,
-        api_key=request.api_key,
-        local_cuda=request.local_cuda,
-        exclude_patterns=request.exclude_patterns,
-        extensions=request.extensions,
-        no_cache=request.no_cache,
-    )
+def search_from_vectors(
+    request: SearchRequest,
+    *,
+    paths: Sequence[Path],
+    file_vectors: np.ndarray,
+    metadata: dict,
+    is_stale: bool = False,
+) -> SearchResponse:
+    """Return ranked results from an in-memory index."""
     if not len(paths):
         return SearchResponse(
             base_path=request.directory,
             backend=None,
             results=[],
-            is_stale=False,
+            is_stale=is_stale,
             index_empty=True,
         )
@@ -813,12 +800,43 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
         base_path=request.directory,
         backend=searcher.device,
         results=results,
-        is_stale=False,
+        is_stale=is_stale,
         index_empty=False,
         reranker=reranker,
     )
+def _perform_search_with_temporary_index(request: SearchRequest) -> SearchResponse:
+    from .index_service import build_index_in_memory  # local import
+    paths, file_vectors, metadata = build_index_in_memory(
+        request.directory,
+        include_hidden=request.include_hidden,
+        respect_gitignore=request.respect_gitignore,
+        mode=request.mode,
+        recursive=request.recursive,
+        model_name=request.model_name,
+        batch_size=request.batch_size,
+        embed_concurrency=request.embed_concurrency,
+        extract_concurrency=request.extract_concurrency,
+        extract_backend=request.extract_backend,
+        provider=request.provider,
+        base_url=request.base_url,
+        api_key=request.api_key,
+        local_cuda=request.local_cuda,
+        exclude_patterns=request.exclude_patterns,
+        extensions=request.extensions,
+        no_cache=request.no_cache,
+    )
+    return search_from_vectors(
+        request,
+        paths=paths,
+        file_vectors=file_vectors,
+        metadata=metadata,
+        is_stale=False,
+    )
 def _load_index_vectors_for_request(
     request: SearchRequest,
     *,

{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vexor
-Version: 0.21.1
+Version: 0.22.0
 Summary: A vector-powered CLI for semantic search over files.
 Project-URL: Repository, https://github.com/scarletkc/vexor
 Author: scarletkc
@@ -76,6 +76,13 @@ It supports configurable embedding and reranking providers, and exposes the same
       Vexor Demo Video
     </video>
+## Featured In
+Vexor has been recognized and featured by the community:
+- **[Ruan Yifeng's Weekly (Issue #379)](https://github.com/ruanyf/weekly/blob/master/docs/issue-379.md#ai-%E7%9B%B8%E5%85%B3)** - A leading tech newsletter in the Chinese developer community.
+- **[Awesome Claude Skills](https://github.com/VoltAgent/awesome-claude-skills?tab=readme-ov-file#development-and-testing)** - Curated list of best-in-class skills for AI agents.
 ## Why Vexor?
 When you remember what a file *does* but forget its name or location, Vexor finds it instantly—no grep patterns or directory traversal needed.
@@ -315,8 +322,14 @@ Porcelain output fields: `rank`, `similarity`, `path`, `chunk_index`, `start_lin
 See [docs](https://github.com/scarletkc/vexor/tree/main/docs) for more details.
+## Contributing
 Contributions, issues, and PRs welcome! Star if you find it helpful.
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=scarletkc/vexor&type=date&legend=top-left)](https://www.star-history.com/#scarletkc/vexor&type=date&legend=top-left)
 ## License
 [MIT](http://github.com/scarletkc/vexor/blob/main/LICENSE)

{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-vexor/__init__.py,sha256=Ab63nROf2nbDW-xY4wuNU_DS0K8hsqfPa1KjvCaKJzA,441
+vexor/__init__.py,sha256=EQXPbwsXfHJAK3mNlhYfc4UwVqOMqL52cO46xg1GNCo,632
 vexor/__main__.py,sha256=ZFzom1wCfP6TPXe3aoDFpNcUgjbCZ7Quy_vfzNsH5Fw,426
-vexor/api.py,sha256=YCHpiydbPbRJUqdQYrpwe1JrRI-w_7LRuyZDGBP1_d4,11506
-vexor/cache.py,sha256=20SaiBKkPJIDXHtflX6uHiQXI4DtD6wx7RtWbz2l6LU,54339
+vexor/api.py,sha256=W6eJLNbg5uBC2gcE8kq34iUciJCYZjY9Zsytxunl-vs,35860
+vexor/cache.py,sha256=irCGy5XIcRKX5EFk7plKDVqGHrTjRpaWgXnFDiVChXk,57323
 vexor/cli.py,sha256=M9GKdD_mJ068Zpm62znTp0KhhKp1dkh_WHmfJHR9hwU,68094
-vexor/config.py,sha256=CiPfEH7Ilt6XepEx4p02qfW5HfkpNDBjhEMyckbSWaA,17413
+vexor/config.py,sha256=CEL5u7afZV81Y0i9FVsj8GVzZlb2C2gC17r5lNKu-aM,18570
 vexor/modes.py,sha256=N_wAWoqbxmCfko-v520p59tpAYvUwraCSSQRtMaF4ac,11549
 vexor/output.py,sha256=iooZgLlK8dh7ajJ4XMHUNNx0qyTVtD_OAAwrBx5MeqE,864
 vexor/search.py,sha256=MSU4RmH6waFYOofkIdo8_ElTiz1oNaKuvr-3umif7Bs,6826
@@ -16,18 +16,18 @@ vexor/providers/openai.py,sha256=YnJDY9gJW7RfGGdkgswVHvmOKNvgLRQUsbpA1MUuLPg,535
 vexor/services/__init__.py,sha256=dA_i2N03vlYmbZbEK2knzJLWviunkNWbzN2LWPNvMk0,160
 vexor/services/cache_service.py,sha256=ywt6AgupCJ7_wC3je4znCMw5_VBouw3skbDTAt8xw6o,1639
 vexor/services/config_service.py,sha256=PojolfbSKh9pW8slF4qxCOs9hz5L6xvjf_nB7vfVlsU,5039
-vexor/services/content_extract_service.py,sha256=zdhLxpNv70BU7irLf3Uc0ou9rKSvdjtrDcHkgRKlMn4,26421
+vexor/services/content_extract_service.py,sha256=oO7Hbadwp3uiyqCbr_4MRXQsUeMix2D98i-Yp94PwFk,26495
 vexor/services/index_service.py,sha256=FXf1bBoqj4-K1l38ItxHf6Oh7QHVIdNAdVY2kg_Zoq8,32265
 vexor/services/init_service.py,sha256=3D04hylGA9FRQhLHCfR95nMko3vb5MNBcRb9nWWaUE8,26863
 vexor/services/js_parser.py,sha256=eRtW6KlK4JBYDGbyoecHVqLZ0hcx-Cc0kx6bOujHPAQ,16254
 vexor/services/keyword_service.py,sha256=vmke8tII9kTwRDdBaLHBc6Hpy_B3p98L65iGkCQgtMU,2211
-vexor/services/search_service.py,sha256=K7SiAuMA7bGeyPWOHPMKpFFvzzkj5kHWwa3p94NakJs,38663
+vexor/services/search_service.py,sha256=-7qHfebMOmXWiVBVYoBji6eaZr8OOY3q1vbtJaY3I6E,39076
 vexor/services/skill_service.py,sha256=Rrgt3OMsKPPiXOiRhSNAWjBM9UNz9qmSWQe3uYGzq4M,4863
 vexor/services/system_service.py,sha256=KPlv83v3rTvBiNiH7vrp6tDmt_AqHxuUd-5RI0TfvWs,24638
 vexor/_bundled_skills/vexor-cli/SKILL.md,sha256=m3FlyqgHBdRwyGPEp8PrUS21K0G2jEl88tRvhSPta08,2798
 vexor/_bundled_skills/vexor-cli/references/install-vexor.md,sha256=IUBShLI1mAxugwUIMAJQ5_j6KcaPWfobe0gSd6MWU7w,1245
-vexor-0.21.1.dist-info/METADATA,sha256=jS_xdqPXD8WsDNKd684w5eHmj_f1CHvNMR-DY-MvBQg,13494
-vexor-0.21.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-vexor-0.21.1.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
-vexor-0.21.1.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
-vexor-0.21.1.dist-info/RECORD,,
+vexor-0.22.0.dist-info/METADATA,sha256=UAqD6ciQCaP3eBrOGA3unO_XLX0eArcYHfcXuRhjN8c,14154
+vexor-0.22.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+vexor-0.22.0.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
+vexor-0.22.0.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
+vexor-0.22.0.dist-info/RECORD,,

{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vexor-0.21.1.dist-info → vexor-0.22.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

vexor 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

vexor 0.21.1py3-none-any.whl → 0.22.0py3-none-any.whl