PyPI - vexor - Versions diffs - 0.19.0a1__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

vexor 0.19.0a1py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

vexor/__init__.py +4 -2
vexor/_bundled_skills/vexor-cli/SKILL.md +1 -0
vexor/api.py +87 -1
vexor/cache.py +483 -275
vexor/cli.py +78 -5
vexor/config.py +240 -2
vexor/providers/gemini.py +79 -13
vexor/providers/openai.py +79 -13
vexor/services/config_service.py +14 -0
vexor/services/index_service.py +285 -4
vexor/services/search_service.py +235 -24
vexor/text.py +14 -0
{vexor-0.19.0a1.dist-info → vexor-0.21.0.dist-info}/METADATA +42 -30
vexor-0.21.0.dist-info/RECORD +33 -0
vexor-0.19.0a1.dist-info/RECORD +0 -33
{vexor-0.19.0a1.dist-info → vexor-0.21.0.dist-info}/WHEEL +0 -0
{vexor-0.19.0a1.dist-info → vexor-0.21.0.dist-info}/entry_points.txt +0 -0
{vexor-0.19.0a1.dist-info → vexor-0.21.0.dist-info}/licenses/LICENSE +0 -0

vexor/providers/openai.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
 from typing import Iterator, Sequence
 import numpy as np
@@ -35,14 +36,19 @@ class OpenAIEmbeddingBackend:
         if base_url:
             client_kwargs["base_url"] = base_url.rstrip("/")
         self._client = OpenAI(**client_kwargs)
+        self._executor: ThreadPoolExecutor | None = None
     def embed(self, texts: Sequence[str]) -> np.ndarray:
         if not texts:
             return np.empty((0, 0), dtype=np.float32)
-        batches = list(_chunk(texts, self.chunk_size))
-        if self.concurrency > 1 and len(batches) > 1:
-            vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
-            with ThreadPoolExecutor(max_workers=min(self.concurrency, len(batches))) as executor:
+        if self.concurrency > 1:
+            batches = list(_chunk(texts, self.chunk_size))
+            if len(batches) > 1:
+                vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
+                executor = self._executor
+                if executor is None:
+                    executor = ThreadPoolExecutor(max_workers=self.concurrency)
+                    self._executor = executor
                 future_map = {
                     executor.submit(self._embed_batch, batch): idx
                     for idx, batch in enumerate(batches)
@@ -50,23 +56,34 @@ class OpenAIEmbeddingBackend:
                 for future in as_completed(future_map):
                     idx = future_map[future]
                     vectors_by_batch[idx] = future.result()
-            vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
+                vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
+            else:
+                vectors = []
+                for batch in batches:
+                    vectors.extend(self._embed_batch(batch))
         else:
             vectors = []
-            for batch in batches:
+            for batch in _chunk(texts, self.chunk_size):
                 vectors.extend(self._embed_batch(batch))
         if not vectors:
             raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
         return np.vstack(vectors)
     def _embed_batch(self, batch: Sequence[str]) -> list[np.ndarray]:
-        try:
-            response = self._client.embeddings.create(
-                model=self.model_name,
-                input=list(batch),
-            )
-        except Exception as exc:  # pragma: no cover - API client variations
-            raise RuntimeError(_format_openai_error(exc)) from exc
+        attempt = 0
+        while True:
+            try:
+                response = self._client.embeddings.create(
+                    model=self.model_name,
+                    input=list(batch),
+                )
+                break
+            except Exception as exc:  # pragma: no cover - API client variations
+                if _should_retry_openai_error(exc) and attempt < _MAX_RETRIES:
+                    _sleep(_backoff_delay(attempt))
+                    attempt += 1
+                    continue
+                raise RuntimeError(_format_openai_error(exc)) from exc
         data = getattr(response, "data", None) or []
         if not data:
             raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
@@ -87,6 +104,55 @@ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
         yield items[idx : idx + size]
+_RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
+_MAX_RETRIES = 2
+_RETRY_BASE_DELAY = 0.5
+_RETRY_MAX_DELAY = 4.0
+def _sleep(seconds: float) -> None:
+    time.sleep(seconds)
+def _backoff_delay(attempt: int) -> float:
+    return min(_RETRY_MAX_DELAY, _RETRY_BASE_DELAY * (2**attempt))
+def _extract_status_code(exc: Exception) -> int | None:
+    for attr in ("status_code", "status", "http_status"):
+        value = getattr(exc, attr, None)
+        if isinstance(value, int):
+            return value
+    response = getattr(exc, "response", None)
+    if response is not None:
+        value = getattr(response, "status_code", None)
+        if isinstance(value, int):
+            return value
+    return None
+def _should_retry_openai_error(exc: Exception) -> bool:
+    status = _extract_status_code(exc)
+    if status in _RETRYABLE_STATUS_CODES:
+        return True
+    name = exc.__class__.__name__.lower()
+    if "ratelimit" in name or "timeout" in name or "temporarily" in name:
+        return True
+    message = str(exc).lower()
+    return any(
+        token in message
+        for token in (
+            "rate limit",
+            "timeout",
+            "temporar",
+            "overload",
+            "try again",
+            "too many requests",
+            "service unavailable",
+        )
+    )
 def _format_openai_error(exc: Exception) -> str:
     message = getattr(exc, "message", None) or str(exc)
     return f"{Messages.ERROR_OPENAI_PREFIX}{message}"

vexor/services/config_service.py CHANGED Viewed

@@ -11,6 +11,8 @@ from ..config import (
     set_base_url,
     set_batch_size,
     set_embed_concurrency,
+    set_extract_concurrency,
+    set_extract_backend,
     set_auto_index,
     set_flashrank_model,
     set_local_cuda,
@@ -28,6 +30,8 @@ class ConfigUpdateResult:
     model_set: bool = False
     batch_size_set: bool = False
     embed_concurrency_set: bool = False
+    extract_concurrency_set: bool = False
+    extract_backend_set: bool = False
     provider_set: bool = False
     base_url_set: bool = False
     base_url_cleared: bool = False
@@ -49,6 +53,8 @@ class ConfigUpdateResult:
                 self.model_set,
                 self.batch_size_set,
                 self.embed_concurrency_set,
+                self.extract_concurrency_set,
+                self.extract_backend_set,
                 self.provider_set,
                 self.base_url_set,
                 self.base_url_cleared,
@@ -71,6 +77,8 @@ def apply_config_updates(
     model: str | None = None,
     batch_size: int | None = None,
     embed_concurrency: int | None = None,
+    extract_concurrency: int | None = None,
+    extract_backend: str | None = None,
     provider: str | None = None,
     base_url: str | None = None,
     clear_base_url: bool = False,
@@ -101,6 +109,12 @@ def apply_config_updates(
     if embed_concurrency is not None:
         set_embed_concurrency(embed_concurrency)
         result.embed_concurrency_set = True
+    if extract_concurrency is not None:
+        set_extract_concurrency(extract_concurrency)
+        result.extract_concurrency_set = True
+    if extract_backend is not None:
+        set_extract_backend(extract_backend)
+        result.extract_backend_set = True
     if provider is not None:
         set_provider(provider)
         result.provider_set = True

vexor/services/index_service.py CHANGED Viewed

@@ -2,8 +2,11 @@
 from __future__ import annotations
+import itertools
 import os
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass, field
+from datetime import datetime, timezone
 from enum import Enum
 from pathlib import Path
 from typing import MutableMapping, Sequence
@@ -14,12 +17,18 @@ from .cache_service import load_index_metadata_safe
 from .content_extract_service import TEXT_EXTENSIONS
 from .js_parser import JSTS_EXTENSIONS
 from ..cache import CACHE_VERSION, IndexedChunk, backfill_chunk_lines
-from ..config import DEFAULT_EMBED_CONCURRENCY
+from ..config import (
+    DEFAULT_EMBED_CONCURRENCY,
+    DEFAULT_EXTRACT_BACKEND,
+    DEFAULT_EXTRACT_CONCURRENCY,
+)
 from ..modes import get_strategy, ModePayload
 INCREMENTAL_CHANGE_THRESHOLD = 0.5
 MTIME_TOLERANCE = 5e-1
 MARKDOWN_EXTENSIONS = {".md", ".markdown", ".mdx"}
+_EXTRACT_PROCESS_MIN_FILES = 16
+_CPU_HEAVY_MODES = {"auto", "code", "outline", "full"}
 class IndexStatus(str, Enum):
@@ -35,6 +44,85 @@ class IndexResult:
     files_indexed: int = 0
+def _resolve_extract_concurrency(value: int) -> int:
+    return max(int(value or 1), 1)
+def _resolve_extract_backend(
+    value: str | None,
+    *,
+    mode: str,
+    file_count: int,
+    concurrency: int,
+) -> str:
+    normalized = (value or DEFAULT_EXTRACT_BACKEND).strip().lower()
+    if normalized not in {"auto", "thread", "process"}:
+        normalized = DEFAULT_EXTRACT_BACKEND
+    if normalized == "auto":
+        if (
+            concurrency > 1
+            and file_count >= _EXTRACT_PROCESS_MIN_FILES
+            and mode in _CPU_HEAVY_MODES
+        ):
+            return "process"
+        return "thread"
+    return normalized
+def _extract_payloads_for_mode(path: Path, mode: str) -> list[ModePayload]:
+    strategy = get_strategy(mode)
+    return strategy.payloads_for_files([path])
+def _payloads_for_files(
+    strategy,
+    files: Sequence[Path],
+    *,
+    mode: str,
+    extract_concurrency: int,
+    extract_backend: str,
+) -> list[ModePayload]:
+    if not files:
+        return []
+    concurrency = _resolve_extract_concurrency(extract_concurrency)
+    if concurrency <= 1 or len(files) <= 1:
+        return strategy.payloads_for_files(files)
+    max_workers = min(concurrency, len(files))
+    def _extract_with_thread_pool() -> list[ModePayload]:
+        def _extract_one(path: Path) -> list[ModePayload]:
+            return strategy.payloads_for_files([path])
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            results = executor.map(_extract_one, files)
+            payloads: list[ModePayload] = []
+            for batch in results:
+                payloads.extend(batch)
+            return payloads
+    effective_backend = _resolve_extract_backend(
+        extract_backend,
+        mode=mode,
+        file_count=len(files),
+        concurrency=concurrency,
+    )
+    if effective_backend == "process":
+        try:
+            with ProcessPoolExecutor(max_workers=max_workers) as executor:
+                results = executor.map(
+                    _extract_payloads_for_mode,
+                    files,
+                    itertools.repeat(mode),
+                )
+                payloads: list[ModePayload] = []
+                for batch in results:
+                    payloads.extend(batch)
+                return payloads
+        except Exception:
+            return _extract_with_thread_pool()
+    return _extract_with_thread_pool()
 def build_index(
     directory: Path,
     *,
@@ -45,12 +133,15 @@ def build_index(
     model_name: str,
     batch_size: int,
     embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY,
+    extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY,
+    extract_backend: str = DEFAULT_EXTRACT_BACKEND,
     provider: str,
     base_url: str | None,
     api_key: str | None,
     local_cuda: bool = False,
     exclude_patterns: Sequence[str] | None = None,
     extensions: Sequence[str] | None = None,
+    no_cache: bool = False,
 ) -> IndexResult:
     """Create or refresh the cached index for *directory*."""
@@ -69,6 +160,7 @@ def build_index(
     if not files:
         return IndexResult(status=IndexStatus.EMPTY)
     stat_cache: dict[Path, os.stat_result] = {}
+    extract_concurrency = _resolve_extract_concurrency(extract_concurrency)
     existing_meta = load_index_metadata_safe(
         directory,
@@ -109,6 +201,9 @@ def build_index(
                     files=files,
                     missing_rel_paths=missing_line_files,
                     root=directory,
+                    extract_concurrency=extract_concurrency,
+                    extract_backend=extract_backend,
+                    mode=mode,
                 )
                 cache_path = backfill_chunk_lines(
                     root=directory,
@@ -167,7 +262,15 @@ def build_index(
                 path for rel, path in files_with_rel if rel in changed_rel_paths
             ]
             changed_payloads = (
-                strategy.payloads_for_files(changed_files) if changed_files else []
+                _payloads_for_files(
+                    strategy,
+                    changed_files,
+                    mode=mode,
+                    extract_concurrency=extract_concurrency,
+                    extract_backend=extract_backend,
+                )
+                if changed_files
+                else []
             )
             cache_path = _apply_incremental_update(
@@ -187,6 +290,7 @@ def build_index(
                 exclude_patterns=exclude_patterns,
                 extensions=extensions,
                 stat_cache=stat_cache,
+                no_cache=no_cache,
             )
             line_backfill_targets = missing_line_files - changed_rel_paths - removed_rel_paths
@@ -196,6 +300,9 @@ def build_index(
                     files=files,
                     missing_rel_paths=line_backfill_targets,
                     root=directory,
+                    extract_concurrency=extract_concurrency,
+                    extract_backend=extract_backend,
+                    mode=mode,
                 )
                 cache_path = backfill_chunk_lines(
                     root=directory,
@@ -214,12 +321,19 @@ def build_index(
                 files_indexed=len(files),
             )
-    payloads = strategy.payloads_for_files(files)
+    payloads = _payloads_for_files(
+        strategy,
+        files,
+        mode=mode,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
+    )
     file_labels = [payload.label for payload in payloads]
     embeddings = _embed_labels_with_cache(
         searcher=searcher,
         model_name=model_name,
         labels=file_labels,
+        no_cache=no_cache,
     )
     entries = _build_index_entries(payloads, embeddings, directory, stat_cache=stat_cache)
@@ -241,6 +355,158 @@ def build_index(
     )
+def build_index_in_memory(
+    directory: Path,
+    *,
+    include_hidden: bool,
+    respect_gitignore: bool = True,
+    mode: str,
+    recursive: bool,
+    model_name: str,
+    batch_size: int,
+    embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY,
+    extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY,
+    extract_backend: str = DEFAULT_EXTRACT_BACKEND,
+    provider: str,
+    base_url: str | None,
+    api_key: str | None,
+    local_cuda: bool = False,
+    exclude_patterns: Sequence[str] | None = None,
+    extensions: Sequence[str] | None = None,
+    no_cache: bool = False,
+) -> tuple[list[Path], np.ndarray, dict]:
+    """Build an index in memory without writing to disk."""
+    from ..search import VexorSearcher  # local import
+    from ..utils import collect_files  # local import
+    files = collect_files(
+        directory,
+        include_hidden=include_hidden,
+        recursive=recursive,
+        extensions=extensions,
+        exclude_patterns=exclude_patterns,
+        respect_gitignore=respect_gitignore,
+    )
+    if not files:
+        empty = np.empty((0, 0), dtype=np.float32)
+        metadata = {
+            "index_id": None,
+            "version": CACHE_VERSION,
+            "generated_at": datetime.now(timezone.utc).isoformat(),
+            "root": str(directory),
+            "model": model_name,
+            "include_hidden": include_hidden,
+            "respect_gitignore": respect_gitignore,
+            "recursive": recursive,
+            "mode": mode,
+            "dimension": 0,
+            "exclude_patterns": tuple(exclude_patterns or ()),
+            "extensions": tuple(extensions or ()),
+            "files": [],
+            "chunks": [],
+        }
+        return [], empty, metadata
+    stat_cache: dict[Path, os.stat_result] = {}
+    strategy = get_strategy(mode)
+    searcher = VexorSearcher(
+        model_name=model_name,
+        batch_size=batch_size,
+        embed_concurrency=embed_concurrency,
+        provider=provider,
+        base_url=base_url,
+        api_key=api_key,
+        local_cuda=local_cuda,
+    )
+    payloads = _payloads_for_files(
+        strategy,
+        files,
+        mode=mode,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
+    )
+    if not payloads:
+        empty = np.empty((0, 0), dtype=np.float32)
+        metadata = {
+            "index_id": None,
+            "version": CACHE_VERSION,
+            "generated_at": datetime.now(timezone.utc).isoformat(),
+            "root": str(directory),
+            "model": model_name,
+            "include_hidden": include_hidden,
+            "respect_gitignore": respect_gitignore,
+            "recursive": recursive,
+            "mode": mode,
+            "dimension": 0,
+            "exclude_patterns": tuple(exclude_patterns or ()),
+            "extensions": tuple(extensions or ()),
+            "files": [],
+            "chunks": [],
+        }
+        return [], empty, metadata
+    labels = [payload.label for payload in payloads]
+    if no_cache:
+        embeddings = searcher.embed_texts(labels)
+        vectors = np.asarray(embeddings, dtype=np.float32)
+    else:
+        vectors = _embed_labels_with_cache(
+            searcher=searcher,
+            model_name=model_name,
+            labels=labels,
+        )
+    entries = _build_index_entries(
+        payloads,
+        vectors,
+        directory,
+        stat_cache=stat_cache,
+    )
+    paths = [entry.path for entry in entries]
+    file_snapshot: dict[str, dict] = {}
+    chunk_entries: list[dict] = []
+    for entry in entries:
+        rel_path = entry.rel_path
+        chunk_entries.append(
+            {
+                "path": rel_path,
+                "absolute": str(entry.path),
+                "mtime": entry.mtime,
+                "size": entry.size_bytes,
+                "preview": entry.preview,
+                "label_hash": entry.label_hash,
+                "chunk_index": entry.chunk_index,
+                "start_line": entry.start_line,
+                "end_line": entry.end_line,
+            }
+        )
+        if rel_path not in file_snapshot:
+            file_snapshot[rel_path] = {
+                "path": rel_path,
+                "absolute": str(entry.path),
+                "mtime": entry.mtime,
+                "size": entry.size_bytes,
+            }
+    metadata = {
+        "index_id": None,
+        "version": CACHE_VERSION,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "root": str(directory),
+        "model": model_name,
+        "include_hidden": include_hidden,
+        "respect_gitignore": respect_gitignore,
+        "recursive": recursive,
+        "mode": mode,
+        "dimension": int(vectors.shape[1]) if vectors.size else 0,
+        "exclude_patterns": tuple(exclude_patterns or ()),
+        "extensions": tuple(extensions or ()),
+        "files": list(file_snapshot.values()),
+        "chunks": chunk_entries,
+    }
+    return paths, vectors, metadata
 def clear_index_entries(
     directory: Path,
     *,
@@ -367,6 +633,7 @@ def _apply_incremental_update(
     exclude_patterns: Sequence[str] | None,
     extensions: Sequence[str] | None,
     stat_cache: MutableMapping[Path, os.stat_result] | None = None,
+    no_cache: bool = False,
 ) -> Path:
     payloads_to_embed, payloads_to_touch = _split_payloads_by_label(
         changed_payloads,
@@ -387,6 +654,7 @@ def _apply_incremental_update(
             searcher=searcher,
             model_name=model_name,
             labels=labels,
+            no_cache=no_cache,
         )
         changed_entries = _build_index_entries(
             payloads_to_embed,
@@ -424,9 +692,13 @@ def _embed_labels_with_cache(
     searcher,
     model_name: str,
     labels: Sequence[str],
+    no_cache: bool = False,
 ) -> np.ndarray:
     if not labels:
         return np.empty((0, 0), dtype=np.float32)
+    if no_cache:
+        vectors = searcher.embed_texts(labels)
+        return np.asarray(vectors, dtype=np.float32)
     from ..cache import embedding_cache_key, load_embedding_cache, store_embedding_cache
     hashes = [embedding_cache_key(label) for label in labels]
@@ -655,6 +927,9 @@ def _build_line_backfill_updates(
     files: Sequence[Path],
     missing_rel_paths: set[str],
     root: Path,
+    extract_concurrency: int,
+    extract_backend: str,
+    mode: str,
 ) -> list[tuple[str, int, int | None, int | None]]:
     if not missing_rel_paths:
         return []
@@ -662,7 +937,13 @@ def _build_line_backfill_updates(
     targets = [files_by_rel[rel] for rel in missing_rel_paths if rel in files_by_rel]
     if not targets:
         return []
-    payloads = strategy.payloads_for_files(targets)
+    payloads = _payloads_for_files(
+        strategy,
+        targets,
+        mode=mode,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
+    )
     return [
         (
             _relative_to_root(payload.file, root),

vexor 0.19.0a1__py3-none-any.whl → 0.21.0__py3-none-any.whl

vexor 0.19.0a1py3-none-any.whl → 0.21.0py3-none-any.whl