PyPI - vexor - Versions diffs - 0.20.0__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

vexor 0.20.0py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

vexor/__init__.py +1 -1
vexor/api.py +26 -0
vexor/cache.py +470 -274
vexor/cli.py +53 -0
vexor/config.py +54 -1
vexor/providers/gemini.py +79 -13
vexor/providers/openai.py +79 -13
vexor/services/config_service.py +14 -0
vexor/services/index_service.py +132 -5
vexor/services/search_service.py +94 -27
vexor/text.py +10 -0
{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/METADATA +15 -13
{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/RECORD +16 -16
{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/WHEEL +0 -0
{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/entry_points.txt +0 -0
{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/licenses/LICENSE +0 -0

vexor/services/index_service.py CHANGED Viewed

@@ -2,7 +2,9 @@
 from __future__ import annotations
+import itertools
 import os
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from enum import Enum
@@ -15,12 +17,18 @@ from .cache_service import load_index_metadata_safe
 from .content_extract_service import TEXT_EXTENSIONS
 from .js_parser import JSTS_EXTENSIONS
 from ..cache import CACHE_VERSION, IndexedChunk, backfill_chunk_lines
-from ..config import DEFAULT_EMBED_CONCURRENCY
+from ..config import (
+    DEFAULT_EMBED_CONCURRENCY,
+    DEFAULT_EXTRACT_BACKEND,
+    DEFAULT_EXTRACT_CONCURRENCY,
+)
 from ..modes import get_strategy, ModePayload
 INCREMENTAL_CHANGE_THRESHOLD = 0.5
 MTIME_TOLERANCE = 5e-1
 MARKDOWN_EXTENSIONS = {".md", ".markdown", ".mdx"}
+_EXTRACT_PROCESS_MIN_FILES = 16
+_CPU_HEAVY_MODES = {"auto", "code", "outline", "full"}
 class IndexStatus(str, Enum):
@@ -36,6 +44,85 @@ class IndexResult:
     files_indexed: int = 0
+def _resolve_extract_concurrency(value: int) -> int:
+    return max(int(value or 1), 1)
+def _resolve_extract_backend(
+    value: str | None,
+    *,
+    mode: str,
+    file_count: int,
+    concurrency: int,
+) -> str:
+    normalized = (value or DEFAULT_EXTRACT_BACKEND).strip().lower()
+    if normalized not in {"auto", "thread", "process"}:
+        normalized = DEFAULT_EXTRACT_BACKEND
+    if normalized == "auto":
+        if (
+            concurrency > 1
+            and file_count >= _EXTRACT_PROCESS_MIN_FILES
+            and mode in _CPU_HEAVY_MODES
+        ):
+            return "process"
+        return "thread"
+    return normalized
+def _extract_payloads_for_mode(path: Path, mode: str) -> list[ModePayload]:
+    strategy = get_strategy(mode)
+    return strategy.payloads_for_files([path])
+def _payloads_for_files(
+    strategy,
+    files: Sequence[Path],
+    *,
+    mode: str,
+    extract_concurrency: int,
+    extract_backend: str,
+) -> list[ModePayload]:
+    if not files:
+        return []
+    concurrency = _resolve_extract_concurrency(extract_concurrency)
+    if concurrency <= 1 or len(files) <= 1:
+        return strategy.payloads_for_files(files)
+    max_workers = min(concurrency, len(files))
+    def _extract_with_thread_pool() -> list[ModePayload]:
+        def _extract_one(path: Path) -> list[ModePayload]:
+            return strategy.payloads_for_files([path])
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            results = executor.map(_extract_one, files)
+            payloads: list[ModePayload] = []
+            for batch in results:
+                payloads.extend(batch)
+            return payloads
+    effective_backend = _resolve_extract_backend(
+        extract_backend,
+        mode=mode,
+        file_count=len(files),
+        concurrency=concurrency,
+    )
+    if effective_backend == "process":
+        try:
+            with ProcessPoolExecutor(max_workers=max_workers) as executor:
+                results = executor.map(
+                    _extract_payloads_for_mode,
+                    files,
+                    itertools.repeat(mode),
+                )
+                payloads: list[ModePayload] = []
+                for batch in results:
+                    payloads.extend(batch)
+                return payloads
+        except Exception:
+            return _extract_with_thread_pool()
+    return _extract_with_thread_pool()
 def build_index(
     directory: Path,
     *,
@@ -46,6 +133,8 @@ def build_index(
     model_name: str,
     batch_size: int,
     embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY,
+    extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY,
+    extract_backend: str = DEFAULT_EXTRACT_BACKEND,
     provider: str,
     base_url: str | None,
     api_key: str | None,
@@ -71,6 +160,7 @@ def build_index(
     if not files:
         return IndexResult(status=IndexStatus.EMPTY)
     stat_cache: dict[Path, os.stat_result] = {}
+    extract_concurrency = _resolve_extract_concurrency(extract_concurrency)
     existing_meta = load_index_metadata_safe(
         directory,
@@ -111,6 +201,9 @@ def build_index(
                     files=files,
                     missing_rel_paths=missing_line_files,
                     root=directory,
+                    extract_concurrency=extract_concurrency,
+                    extract_backend=extract_backend,
+                    mode=mode,
                 )
                 cache_path = backfill_chunk_lines(
                     root=directory,
@@ -169,7 +262,15 @@ def build_index(
                 path for rel, path in files_with_rel if rel in changed_rel_paths
             ]
             changed_payloads = (
-                strategy.payloads_for_files(changed_files) if changed_files else []
+                _payloads_for_files(
+                    strategy,
+                    changed_files,
+                    mode=mode,
+                    extract_concurrency=extract_concurrency,
+                    extract_backend=extract_backend,
+                )
+                if changed_files
+                else []
             )
             cache_path = _apply_incremental_update(
@@ -199,6 +300,9 @@ def build_index(
                     files=files,
                     missing_rel_paths=line_backfill_targets,
                     root=directory,
+                    extract_concurrency=extract_concurrency,
+                    extract_backend=extract_backend,
+                    mode=mode,
                 )
                 cache_path = backfill_chunk_lines(
                     root=directory,
@@ -217,7 +321,13 @@ def build_index(
                 files_indexed=len(files),
             )
-    payloads = strategy.payloads_for_files(files)
+    payloads = _payloads_for_files(
+        strategy,
+        files,
+        mode=mode,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
+    )
     file_labels = [payload.label for payload in payloads]
     embeddings = _embed_labels_with_cache(
         searcher=searcher,
@@ -255,6 +365,8 @@ def build_index_in_memory(
     model_name: str,
     batch_size: int,
     embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY,
+    extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY,
+    extract_backend: str = DEFAULT_EXTRACT_BACKEND,
     provider: str,
     base_url: str | None,
     api_key: str | None,
@@ -307,7 +419,13 @@ def build_index_in_memory(
         api_key=api_key,
         local_cuda=local_cuda,
     )
-    payloads = strategy.payloads_for_files(files)
+    payloads = _payloads_for_files(
+        strategy,
+        files,
+        mode=mode,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
+    )
     if not payloads:
         empty = np.empty((0, 0), dtype=np.float32)
         metadata = {
@@ -809,6 +927,9 @@ def _build_line_backfill_updates(
     files: Sequence[Path],
     missing_rel_paths: set[str],
     root: Path,
+    extract_concurrency: int,
+    extract_backend: str,
+    mode: str,
 ) -> list[tuple[str, int, int | None, int | None]]:
     if not missing_rel_paths:
         return []
@@ -816,7 +937,13 @@ def _build_line_backfill_updates(
     targets = [files_by_rel[rel] for rel in missing_rel_paths if rel in files_by_rel]
     if not targets:
         return []
-    payloads = strategy.payloads_for_files(targets)
+    payloads = _payloads_for_files(
+        strategy,
+        targets,
+        mode=mode,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
+    )
     return [
         (
             _relative_to_root(payload.file, root),

vexor/services/search_service.py CHANGED Viewed

@@ -7,12 +7,15 @@ from functools import lru_cache
 from pathlib import Path
 import json
 import re
+import numpy as np
 from typing import Sequence, TYPE_CHECKING
 from urllib import error as urlerror
 from urllib import request as urlrequest
 from ..config import (
     DEFAULT_EMBED_CONCURRENCY,
+    DEFAULT_EXTRACT_BACKEND,
+    DEFAULT_EXTRACT_CONCURRENCY,
     DEFAULT_FLASHRANK_MAX_LENGTH,
     DEFAULT_FLASHRANK_MODEL,
     DEFAULT_RERANK,
@@ -48,6 +51,8 @@ class SearchRequest:
     temporary_index: bool = False
     no_cache: bool = False
     embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY
+    extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY
+    extract_backend: str = DEFAULT_EXTRACT_BACKEND
     rerank: str = DEFAULT_RERANK
     flashrank_model: str | None = None
     remote_rerank: RemoteRerankConfig | None = None
@@ -112,6 +117,15 @@ def _resolve_rerank_candidates(top_k: int) -> int:
     return max(20, min(candidate, 150))
+def _top_indices(scores: np.ndarray, limit: int) -> list[int]:
+    if limit <= 0:
+        return []
+    if limit >= scores.size:
+        return sorted(range(scores.size), key=lambda idx: (-scores[idx], idx))
+    indices = np.argpartition(-scores, limit - 1)[:limit]
+    return sorted(indices.tolist(), key=lambda idx: (-scores[idx], idx))
 def _bm25_scores(
     query_tokens: Sequence[str],
     documents: Sequence[Sequence[str]],
@@ -349,6 +363,7 @@ def perform_search(request: SearchRequest) -> SearchResponse:
     from ..cache import (  # local import
         embedding_cache_key,
         list_cache_entries,
+        load_chunk_metadata,
         load_embedding_cache,
         load_index_vectors,
         load_query_vector,
@@ -385,6 +400,8 @@ def perform_search(request: SearchRequest) -> SearchResponse:
             model_name=request.model_name,
             batch_size=request.batch_size,
             embed_concurrency=request.embed_concurrency,
+            extract_concurrency=request.extract_concurrency,
+            extract_backend=request.extract_backend,
             provider=request.provider,
             base_url=request.base_url,
             api_key=request.api_key,
@@ -446,6 +463,7 @@ def perform_search(request: SearchRequest) -> SearchResponse:
     file_snapshot = metadata.get("files", [])
     chunk_entries = metadata.get("chunks", [])
+    chunk_ids = metadata.get("chunk_ids", [])
     stale = bool(file_snapshot) and not is_cache_current(
         request.directory,
         request.include_hidden,
@@ -466,6 +484,8 @@ def perform_search(request: SearchRequest) -> SearchResponse:
             model_name=request.model_name,
             batch_size=request.batch_size,
             embed_concurrency=request.embed_concurrency,
+            extract_concurrency=request.extract_concurrency,
+            extract_backend=request.extract_backend,
             provider=request.provider,
             base_url=request.base_url,
             api_key=request.api_key,
@@ -541,7 +561,6 @@ def perform_search(request: SearchRequest) -> SearchResponse:
             index_empty=True,
         )
-    from sklearn.metrics.pairwise import cosine_similarity  # local import
     from ..search import SearchResult, VexorSearcher  # local import
     searcher = VexorSearcher(
         model_name=request.model_name,
@@ -595,13 +614,37 @@ def perform_search(request: SearchRequest) -> SearchResponse:
             store_query_vector(int(index_id), query_hash, request.query, query_vector)
         except Exception:  # pragma: no cover - best-effort cache storage
             pass
-    similarities = cosine_similarity(
-        query_vector.reshape(1, -1),
-        file_vectors,
-    )[0]
-    scored = []
-    for idx, (path, score) in enumerate(zip(paths, similarities)):
-        chunk_meta = chunk_entries[idx] if idx < len(chunk_entries) else {}
+    reranker = None
+    rerank = (request.rerank or DEFAULT_RERANK).strip().lower()
+    use_rerank = rerank in {"bm25", "flashrank", "remote"}
+    if use_rerank:
+        candidate_limit = _resolve_rerank_candidates(request.top_k)
+    else:
+        candidate_limit = request.top_k
+    candidate_count = min(len(paths), candidate_limit)
+    query_vector = np.asarray(query_vector, dtype=np.float32).ravel()
+    similarities = np.asarray(file_vectors @ query_vector, dtype=np.float32)
+    top_indices = _top_indices(similarities, candidate_count)
+    chunk_meta_by_id: dict[int, dict] = {}
+    if chunk_ids:
+        candidate_ids = [
+            chunk_ids[idx] for idx in top_indices if idx < len(chunk_ids)
+        ]
+        if candidate_ids:
+            try:
+                chunk_meta_by_id = load_chunk_metadata(candidate_ids)
+            except Exception:  # pragma: no cover - best-effort metadata lookup
+                chunk_meta_by_id = {}
+    scored: list[SearchResult] = []
+    for idx in top_indices:
+        path = paths[idx]
+        score = similarities[idx]
+        chunk_meta = {}
+        if chunk_ids and idx < len(chunk_ids):
+            chunk_meta = chunk_meta_by_id.get(chunk_ids[idx], {})
+        elif idx < len(chunk_entries):
+            chunk_meta = chunk_entries[idx]
         start_line = chunk_meta.get("start_line")
         end_line = chunk_meta.get("end_line")
         scored.append(
@@ -614,12 +657,8 @@ def perform_search(request: SearchRequest) -> SearchResponse:
                 end_line=int(end_line) if end_line is not None else None,
             )
         )
-    scored.sort(key=lambda item: item.score, reverse=True)
-    reranker = None
-    rerank = (request.rerank or DEFAULT_RERANK).strip().lower()
-    if rerank in {"bm25", "flashrank", "remote"}:
-        candidate_count = min(len(scored), _resolve_rerank_candidates(request.top_k))
-        candidates = scored[:candidate_count]
+    if use_rerank:
+        candidates = scored
         if rerank == "bm25":
             candidates = _apply_bm25_rerank(request.query, candidates)
             reranker = "bm25"
@@ -662,6 +701,8 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
         model_name=request.model_name,
         batch_size=request.batch_size,
         embed_concurrency=request.embed_concurrency,
+        extract_concurrency=request.extract_concurrency,
+        extract_backend=request.extract_backend,
         provider=request.provider,
         base_url=request.base_url,
         api_key=request.api_key,
@@ -680,7 +721,6 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
             index_empty=True,
         )
-    from sklearn.metrics.pairwise import cosine_similarity  # local import
     from ..search import SearchResult, VexorSearcher  # local import
     searcher = VexorSearcher(
@@ -717,13 +757,23 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
                 )
             except Exception:  # pragma: no cover - best-effort cache storage
                 pass
-    similarities = cosine_similarity(
-        query_vector.reshape(1, -1),
-        file_vectors,
-    )[0]
+    reranker = None
+    rerank = (request.rerank or DEFAULT_RERANK).strip().lower()
+    use_rerank = rerank in {"bm25", "flashrank", "remote"}
+    if use_rerank:
+        candidate_limit = _resolve_rerank_candidates(request.top_k)
+    else:
+        candidate_limit = request.top_k
+    candidate_count = min(len(paths), candidate_limit)
+    query_vector = np.asarray(query_vector, dtype=np.float32).ravel()
+    similarities = np.asarray(file_vectors @ query_vector, dtype=np.float32)
+    top_indices = _top_indices(similarities, candidate_count)
     chunk_entries = metadata.get("chunks", [])
-    scored = []
-    for idx, (path, score) in enumerate(zip(paths, similarities)):
+    scored: list[SearchResult] = []
+    for idx in top_indices:
+        path = paths[idx]
+        score = similarities[idx]
         chunk_meta = chunk_entries[idx] if idx < len(chunk_entries) else {}
         start_line = chunk_meta.get("start_line")
         end_line = chunk_meta.get("end_line")
@@ -737,12 +787,8 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
                 end_line=int(end_line) if end_line is not None else None,
             )
         )
-    scored.sort(key=lambda item: item.score, reverse=True)
-    reranker = None
-    rerank = (request.rerank or DEFAULT_RERANK).strip().lower()
-    if rerank in {"bm25", "flashrank", "remote"}:
-        candidate_count = min(len(scored), _resolve_rerank_candidates(request.top_k))
-        candidates = scored[:candidate_count]
+    if use_rerank:
+        candidates = scored
         if rerank == "bm25":
             candidates = _apply_bm25_rerank(request.query, candidates)
             reranker = "bm25"
@@ -908,6 +954,7 @@ def _filter_index_by_extensions(
     ext_set = {ext.lower() for ext in extensions if ext}
     if not ext_set:
         return list(paths), file_vectors, metadata
+    chunk_ids = metadata.get("chunk_ids")
     keep_indices: list[int] = []
     filtered_paths: list[Path] = []
     for idx, path in enumerate(paths):
@@ -922,6 +969,8 @@ def _filter_index_by_extensions(
             ext_set,
         )
         filtered_metadata["chunks"] = []
+        if chunk_ids is not None:
+            filtered_metadata["chunk_ids"] = []
         return [], filtered_vectors, filtered_metadata
     filtered_vectors = file_vectors[keep_indices]
     chunk_entries = metadata.get("chunks", [])
@@ -934,6 +983,10 @@ def _filter_index_by_extensions(
         ext_set,
     )
     filtered_metadata["chunks"] = filtered_chunks
+    if chunk_ids is not None:
+        filtered_metadata["chunk_ids"] = [
+            chunk_ids[idx] for idx in keep_indices if idx < len(chunk_ids)
+        ]
     return filtered_paths, filtered_vectors, filtered_metadata
@@ -946,6 +999,7 @@ def _filter_index_by_exclude_patterns(
 ) -> tuple[list[Path], Sequence[Sequence[float]], dict]:
     if exclude_spec is None:
         return list(paths), file_vectors, metadata
+    chunk_ids = metadata.get("chunk_ids")
     keep_indices: list[int] = []
     filtered_paths: list[Path] = []
     root_resolved = root.resolve()
@@ -966,6 +1020,8 @@ def _filter_index_by_exclude_patterns(
             exclude_spec,
         )
         filtered_metadata["chunks"] = []
+        if chunk_ids is not None:
+            filtered_metadata["chunk_ids"] = []
         return [], filtered_vectors, filtered_metadata
     filtered_vectors = file_vectors[keep_indices]
     chunk_entries = metadata.get("chunks", [])
@@ -978,6 +1034,10 @@ def _filter_index_by_exclude_patterns(
         exclude_spec,
     )
     filtered_metadata["chunks"] = filtered_chunks
+    if chunk_ids is not None:
+        filtered_metadata["chunk_ids"] = [
+            chunk_ids[idx] for idx in keep_indices if idx < len(chunk_ids)
+        ]
     return filtered_paths, filtered_vectors, filtered_metadata
@@ -994,6 +1054,7 @@ def _filter_index_by_directory(
         relative_dir = directory.resolve().relative_to(index_root.resolve())
     except ValueError:
         return list(paths), file_vectors, metadata
+    chunk_ids = metadata.get("chunk_ids")
     keep_indices: list[int] = []
     filtered_paths: list[Path] = []
     for idx, path in enumerate(paths):
@@ -1014,6 +1075,8 @@ def _filter_index_by_directory(
             recursive=recursive,
         )
         filtered_metadata["chunks"] = []
+        if chunk_ids is not None:
+            filtered_metadata["chunk_ids"] = []
         filtered_metadata["root"] = str(directory)
         return [], filtered_vectors, filtered_metadata
     filtered_vectors = file_vectors[keep_indices]
@@ -1028,6 +1091,10 @@ def _filter_index_by_directory(
         recursive=recursive,
     )
     filtered_metadata["chunks"] = filtered_chunks
+    if chunk_ids is not None:
+        filtered_metadata["chunk_ids"] = [
+            chunk_ids[idx] for idx in keep_indices if idx < len(chunk_ids)
+        ]
     filtered_metadata["root"] = str(directory)
     return filtered_paths, filtered_vectors, filtered_metadata

vexor/text.py CHANGED Viewed

@@ -59,6 +59,8 @@ class Messages:
     HELP_SET_MODEL = "Set the default embedding model."
     HELP_SET_BATCH = "Set the default batch size (0 = single request)."
     HELP_SET_EMBED_CONCURRENCY = "Set the number of concurrent embedding requests."
+    HELP_SET_EXTRACT_CONCURRENCY = "Set the number of concurrent file extraction workers."
+    HELP_SET_EXTRACT_BACKEND = "Set the extraction backend (auto, thread, process)."
     HELP_SET_PROVIDER = "Set the default embedding provider (e.g., gemini, openai, custom, or local)."
     HELP_SET_BASE_URL = "Override the provider's base URL (leave unset for official endpoints)."
     HELP_CLEAR_BASE_URL = "Remove the custom base URL override."
@@ -117,6 +119,10 @@ class Messages:
     ERROR_EMPTY_QUERY = "Query text must not be empty."
     ERROR_BATCH_NEGATIVE = "Batch size must be >= 0"
     ERROR_CONCURRENCY_INVALID = "Embedding concurrency must be >= 1"
+    ERROR_EXTRACT_CONCURRENCY_INVALID = "Extraction concurrency must be >= 1"
+    ERROR_EXTRACT_BACKEND_INVALID = (
+        "Unsupported extraction backend '{value}'. Allowed values: {allowed}."
+    )
     ERROR_MODE_INVALID = "Unsupported mode '{value}'. Allowed values: {allowed}."
     ERROR_PROVIDER_INVALID = "Unsupported provider '{value}'. Allowed values: {allowed}."
     ERROR_RERANK_INVALID = "Unsupported rerank value '{value}'. Allowed values: {allowed}."
@@ -266,6 +272,8 @@ class Messages:
     INFO_MODEL_SET = "Default model set to {value}."
     INFO_BATCH_SET = "Default batch size set to {value}."
     INFO_EMBED_CONCURRENCY_SET = "Embedding concurrency set to {value}."
+    INFO_EXTRACT_CONCURRENCY_SET = "Extraction concurrency set to {value}."
+    INFO_EXTRACT_BACKEND_SET = "Extraction backend set to {value}."
     INFO_PROVIDER_SET = "Default provider set to {value}."
     INFO_BASE_URL_SET = "Base URL override set to {value}."
     INFO_BASE_URL_CLEARED = "Base URL override cleared."
@@ -308,6 +316,8 @@ class Messages:
         "Default model: {model}\n"
         "Default batch size: {batch}\n"
         "Embedding concurrency: {concurrency}\n"
+        "Extract concurrency: {extract_concurrency}\n"
+        "Extract backend: {extract_backend}\n"
         "Auto index: {auto_index}\n"
         "Rerank: {rerank}\n"
         "{flashrank_line}"

{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vexor
-Version: 0.20.0
+Version: 0.21.0
 Summary: A vector-powered CLI for semantic search over files.
 Project-URL: Repository, https://github.com/scarletkc/vexor
 Author: scarletkc
@@ -150,13 +150,26 @@ for hit in response.results:
 By default it reads `~/.vexor/config.json`. For runtime config overrides, cache
 controls, and per-call options, see [`docs/api/python.md`](https://github.com/scarletkc/vexor/tree/main/docs/api/python.md).
+## AI Agent Skill
+This repo includes a skill for AI agents to use Vexor effectively:
+```bash
+vexor install --skills claude  # Claude Code
+vexor install --skills codex   # Codex
+```
+Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/vexor/raw/refs/heads/main/plugins/vexor/skills/vexor-cli/SKILL.md)
 ## Configuration
 ```bash
 vexor config --set-provider openai          # default; also supports gemini/custom/local
 vexor config --set-model text-embedding-3-small
 vexor config --set-batch-size 0             # 0 = single request
-vexor config --set-embed-concurrency 2       # parallel embedding requests
+vexor config --set-embed-concurrency 4       # parallel embedding requests
+vexor config --set-extract-concurrency 4     # parallel file extraction workers
+vexor config --set-extract-backend auto      # auto|thread|process (default: auto)
 vexor config --set-auto-index true          # auto-index before search (default)
 vexor config --rerank bm25                  # optional BM25 rerank for top-k results
 vexor config --rerank flashrank             # FlashRank rerank (requires optional extra)
@@ -298,17 +311,6 @@ Re-running `vexor index` only re-embeds changed files; >50% changes trigger full
 Porcelain output fields: `rank`, `similarity`, `path`, `chunk_index`, `start_line`, `end_line`, `preview` (line fields are `-` when unavailable).
-## AI Agent Skill
-This repo includes a skill for AI agents to use Vexor effectively:
-```bash
-vexor install --skills claude  # Claude Code
-vexor install --skills codex   # Codex
-```
-Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/vexor/raw/refs/heads/main/plugins/vexor/skills/vexor-cli/SKILL.md)
 ## Documentation
 See [docs](https://github.com/scarletkc/vexor/tree/main/docs) for more details.

{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/RECORD RENAMED Viewed

@@ -1,33 +1,33 @@
-vexor/__init__.py,sha256=DAhiDVmFBHG2bu_3wkHBS2OubxR3yjbqa1nFLjQ0-Uw,441
+vexor/__init__.py,sha256=i0ly8cFA4N_PEQ_rhYgoLp2NPRQc3_ln8Gfi8QWjXSQ,441
 vexor/__main__.py,sha256=ZFzom1wCfP6TPXe3aoDFpNcUgjbCZ7Quy_vfzNsH5Fw,426
-vexor/api.py,sha256=84GxMt4laq9bpfesPJSFUzkTTCrCpIDDRpVLXKGZ-rg,10470
-vexor/cache.py,sha256=B1seuKU0eYLNvFi7Lpy_X13cSvBfhsuQeH4rZ7Hc29Y,45106
-vexor/cli.py,sha256=hnANtRGO5ypEftMyuTmlZhttSuBZy9ivxymQK11gZ9c,65736
-vexor/config.py,sha256=f5Wom1yUzScp52xpdhLlCG6x7ZEgtFV7kQlarxvD9hU,15372
+vexor/api.py,sha256=YCHpiydbPbRJUqdQYrpwe1JrRI-w_7LRuyZDGBP1_d4,11506
+vexor/cache.py,sha256=3i9FKFLSyZ1kx-w1apc12umPaQxWqMP-P8_lvo67hBw,52832
+vexor/cli.py,sha256=M9GKdD_mJ068Zpm62znTp0KhhKp1dkh_WHmfJHR9hwU,68094
+vexor/config.py,sha256=CiPfEH7Ilt6XepEx4p02qfW5HfkpNDBjhEMyckbSWaA,17413
 vexor/modes.py,sha256=N_wAWoqbxmCfko-v520p59tpAYvUwraCSSQRtMaF4ac,11549
 vexor/output.py,sha256=iooZgLlK8dh7ajJ4XMHUNNx0qyTVtD_OAAwrBx5MeqE,864
 vexor/search.py,sha256=MSU4RmH6waFYOofkIdo8_ElTiz1oNaKuvr-3umif7Bs,6826
-vexor/text.py,sha256=ntqwx2hP5QtlUXnIvBh1NFSn8cxRVhvYtFb7aMt_Tus,24171
+vexor/text.py,sha256=2aK5nJHkosmbmyzp9o_Tzb3YlmVnju_IX8BcEPUdhTA,24794
 vexor/utils.py,sha256=GzfYW2rz1-EuJjkevqZVe8flLRtrQ60OWMmFNbMh62k,12472
 vexor/providers/__init__.py,sha256=kCEoV03TSLKcxDUYVNjXnrVoLU5NpfNXjp1w1Ak2imE,92
-vexor/providers/gemini.py,sha256=-bKSubZRELefJmZzclepXNSWUPsXo94EAM9l0JtfbFM,3739
+vexor/providers/gemini.py,sha256=IWHHjCMJC0hUHQPhuaJ_L_97c_mnOXkPkCVdrIR6z-g,5705
 vexor/providers/local.py,sha256=5X_WYCXgyBGIVvvVLgMnDjTkPR4GBF0ksNPyviBlB7w,4838
-vexor/providers/openai.py,sha256=KylfxVxoTRHrK7KHwMotgD7_fq-CLhx43MQGeGz2dfo,3388
+vexor/providers/openai.py,sha256=YnJDY9gJW7RfGGdkgswVHvmOKNvgLRQUsbpA1MUuLPg,5356
 vexor/services/__init__.py,sha256=dA_i2N03vlYmbZbEK2knzJLWviunkNWbzN2LWPNvMk0,160
 vexor/services/cache_service.py,sha256=ywt6AgupCJ7_wC3je4znCMw5_VBouw3skbDTAt8xw6o,1639
-vexor/services/config_service.py,sha256=yJTBbOmxpbzskHPuLlxYXQ-COJC6-qKtvMsSfuneJoA,4471
+vexor/services/config_service.py,sha256=PojolfbSKh9pW8slF4qxCOs9hz5L6xvjf_nB7vfVlsU,5039
 vexor/services/content_extract_service.py,sha256=zdhLxpNv70BU7irLf3Uc0ou9rKSvdjtrDcHkgRKlMn4,26421
-vexor/services/index_service.py,sha256=pteAG-eRA8FJmDc4GEwhHXGZE8Dm5L8uqzBB0Y8Rrgo,28312
+vexor/services/index_service.py,sha256=FXf1bBoqj4-K1l38ItxHf6Oh7QHVIdNAdVY2kg_Zoq8,32265
 vexor/services/init_service.py,sha256=3D04hylGA9FRQhLHCfR95nMko3vb5MNBcRb9nWWaUE8,26863
 vexor/services/js_parser.py,sha256=eRtW6KlK4JBYDGbyoecHVqLZ0hcx-Cc0kx6bOujHPAQ,16254
 vexor/services/keyword_service.py,sha256=vmke8tII9kTwRDdBaLHBc6Hpy_B3p98L65iGkCQgtMU,2211
-vexor/services/search_service.py,sha256=_3WMzHNV0MCGWFXqwYCQ-XF08aJwF9L4mOGGnmXOATs,36076
+vexor/services/search_service.py,sha256=K7SiAuMA7bGeyPWOHPMKpFFvzzkj5kHWwa3p94NakJs,38663
 vexor/services/skill_service.py,sha256=Rrgt3OMsKPPiXOiRhSNAWjBM9UNz9qmSWQe3uYGzq4M,4863
 vexor/services/system_service.py,sha256=KPlv83v3rTvBiNiH7vrp6tDmt_AqHxuUd-5RI0TfvWs,24638
 vexor/_bundled_skills/vexor-cli/SKILL.md,sha256=m3FlyqgHBdRwyGPEp8PrUS21K0G2jEl88tRvhSPta08,2798
 vexor/_bundled_skills/vexor-cli/references/install-vexor.md,sha256=IUBShLI1mAxugwUIMAJQ5_j6KcaPWfobe0gSd6MWU7w,1245
-vexor-0.20.0.dist-info/METADATA,sha256=RBTym4NL38S6OjijOg5fWPW9VmD7AuLEEqjZtQMkZqA,13331
-vexor-0.20.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-vexor-0.20.0.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
-vexor-0.20.0.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
-vexor-0.20.0.dist-info/RECORD,,
+vexor-0.21.0.dist-info/METADATA,sha256=Lc5PHY_Ir3F56ILYe6IBlkwhN6gMQGZvf48f7x_uVDg,13494
+vexor-0.21.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+vexor-0.21.0.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
+vexor-0.21.0.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
+vexor-0.21.0.dist-info/RECORD,,

{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vexor-0.20.0.dist-info → vexor-0.21.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

vexor 0.20.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

vexor 0.20.0py3-none-any.whl → 0.21.0py3-none-any.whl