PyPI - vexor - Versions diffs - 0.20.0__py3-none-any.whl → 0.21.1__py3-none-any.whl - Mend

vexor 0.20.0py3-none-any.whl → 0.21.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

vexor/__init__.py +1 -1
vexor/api.py +26 -0
vexor/cache.py +525 -286
vexor/cli.py +53 -0
vexor/config.py +54 -1
vexor/providers/gemini.py +79 -13
vexor/providers/openai.py +79 -13
vexor/services/config_service.py +14 -0
vexor/services/index_service.py +132 -5
vexor/services/search_service.py +94 -27
vexor/text.py +10 -0
{vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/METADATA +15 -13
{vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/RECORD +16 -16
{vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/WHEEL +0 -0
{vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/entry_points.txt +0 -0
{vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/licenses/LICENSE +0 -0

vexor/cli.py CHANGED Viewed

@@ -31,6 +31,7 @@ from .config import (
     DEFAULT_MODEL,
     DEFAULT_PROVIDER,
     DEFAULT_RERANK,
+    SUPPORTED_EXTRACT_BACKENDS,
     SUPPORTED_PROVIDERS,
     SUPPORTED_RERANKERS,
     flashrank_cache_dir,
@@ -401,6 +402,8 @@ def search(
     model_name = resolve_default_model(provider, config.model)
     batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
     embed_concurrency = config.embed_concurrency
+    extract_concurrency = config.extract_concurrency
+    extract_backend = config.extract_backend
     base_url = config.base_url
     api_key = config.api_key
     auto_index = bool(config.auto_index)
@@ -438,6 +441,8 @@ def search(
         model_name=model_name,
         batch_size=batch_size,
         embed_concurrency=embed_concurrency,
+        extract_concurrency=extract_concurrency,
+        extract_backend=extract_backend,
         provider=provider,
         base_url=base_url,
         api_key=api_key,
@@ -577,6 +582,8 @@ def index(
     model_name = resolve_default_model(provider, config.model)
     batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
     embed_concurrency = config.embed_concurrency
+    extract_concurrency = config.extract_concurrency
+    extract_backend = config.extract_backend
     base_url = config.base_url
     api_key = config.api_key
@@ -673,6 +680,8 @@ def index(
             model_name=model_name,
             batch_size=batch_size,
             embed_concurrency=embed_concurrency,
+            extract_concurrency=extract_concurrency,
+            extract_backend=extract_backend,
             provider=provider,
             base_url=base_url,
             api_key=api_key,
@@ -734,6 +743,16 @@ def config(
         "--set-embed-concurrency",
         help=Messages.HELP_SET_EMBED_CONCURRENCY,
     ),
+    set_extract_concurrency_option: int | None = typer.Option(
+        None,
+        "--set-extract-concurrency",
+        help=Messages.HELP_SET_EXTRACT_CONCURRENCY,
+    ),
+    set_extract_backend_option: str | None = typer.Option(
+        None,
+        "--set-extract-backend",
+        help=Messages.HELP_SET_EXTRACT_BACKEND,
+    ),
     set_provider_option: str | None = typer.Option(
         None,
         "--set-provider",
@@ -810,6 +829,8 @@ def config(
         raise typer.BadParameter(Messages.ERROR_BATCH_NEGATIVE)
     if set_embed_concurrency_option is not None and set_embed_concurrency_option < 1:
         raise typer.BadParameter(Messages.ERROR_CONCURRENCY_INVALID)
+    if set_extract_concurrency_option is not None and set_extract_concurrency_option < 1:
+        raise typer.BadParameter(Messages.ERROR_EXTRACT_CONCURRENCY_INVALID)
     if set_base_url_option and clear_base_url:
         raise typer.BadParameter(Messages.ERROR_BASE_URL_CONFLICT)
     flashrank_model_reset = False
@@ -835,6 +856,16 @@ def config(
         if not normalized_remote_key:
             raise typer.BadParameter(Messages.ERROR_REMOTE_RERANK_API_KEY_EMPTY)
         set_remote_rerank_api_key_option = normalized_remote_key
+    if set_extract_backend_option is not None:
+        normalized_backend = set_extract_backend_option.strip().lower()
+        if normalized_backend not in SUPPORTED_EXTRACT_BACKENDS:
+            allowed = ", ".join(SUPPORTED_EXTRACT_BACKENDS)
+            raise typer.BadParameter(
+                Messages.ERROR_EXTRACT_BACKEND_INVALID.format(
+                    value=set_extract_backend_option, allowed=allowed
+                )
+            )
+        set_extract_backend_option = normalized_backend
     if clear_remote_rerank and any(
         (
             set_remote_rerank_url_option is not None,
@@ -850,6 +881,8 @@ def config(
             set_model_option is not None,
             set_batch_option is not None,
             set_embed_concurrency_option is not None,
+            set_extract_concurrency_option is not None,
+            set_extract_backend_option is not None,
             set_provider_option is not None,
             set_base_url_option is not None,
             clear_base_url,
@@ -962,6 +995,8 @@ def config(
         model=set_model_option,
         batch_size=set_batch_option,
         embed_concurrency=set_embed_concurrency_option,
+        extract_concurrency=set_extract_concurrency_option,
+        extract_backend=set_extract_backend_option,
         provider=set_provider_option,
         base_url=set_base_url_option,
         clear_base_url=clear_base_url,
@@ -993,6 +1028,22 @@ def config(
                 Styles.SUCCESS,
             )
         )
+    if updates.extract_concurrency_set and set_extract_concurrency_option is not None:
+        console.print(
+            _styled(
+                Messages.INFO_EXTRACT_CONCURRENCY_SET.format(
+                    value=set_extract_concurrency_option
+                ),
+                Styles.SUCCESS,
+            )
+        )
+    if updates.extract_backend_set and set_extract_backend_option is not None:
+        console.print(
+            _styled(
+                Messages.INFO_EXTRACT_BACKEND_SET.format(value=set_extract_backend_option),
+                Styles.SUCCESS,
+            )
+        )
     if updates.provider_set and set_provider_option is not None:
         console.print(
             _styled(Messages.INFO_PROVIDER_SET.format(value=set_provider_option), Styles.SUCCESS)
@@ -1139,6 +1190,8 @@ def config(
                     model=resolve_default_model(provider, cfg.model),
                     batch=cfg.batch_size if cfg.batch_size is not None else DEFAULT_BATCH_SIZE,
                     concurrency=cfg.embed_concurrency,
+                    extract_concurrency=cfg.extract_concurrency,
+                    extract_backend=cfg.extract_backend,
                     auto_index="yes" if cfg.auto_index else "no",
                     rerank=rerank,
                     flashrank_line=flashrank_line,

vexor/config.py CHANGED Viewed

@@ -19,13 +19,16 @@ DEFAULT_MODEL = "text-embedding-3-small"
 DEFAULT_GEMINI_MODEL = "gemini-embedding-001"
 DEFAULT_LOCAL_MODEL = "intfloat/multilingual-e5-small"
 DEFAULT_BATCH_SIZE = 64
-DEFAULT_EMBED_CONCURRENCY = 2
+DEFAULT_EMBED_CONCURRENCY = 4
+DEFAULT_EXTRACT_CONCURRENCY = max(1, min(4, os.cpu_count() or 1))
+DEFAULT_EXTRACT_BACKEND = "auto"
 DEFAULT_PROVIDER = "openai"
 DEFAULT_RERANK = "off"
 DEFAULT_FLASHRANK_MODEL = "ms-marco-TinyBERT-L-2-v2"
 DEFAULT_FLASHRANK_MAX_LENGTH = 256
 SUPPORTED_PROVIDERS: tuple[str, ...] = (DEFAULT_PROVIDER, "gemini", "custom", "local")
 SUPPORTED_RERANKERS: tuple[str, ...] = ("off", "bm25", "flashrank", "remote")
+SUPPORTED_EXTRACT_BACKENDS: tuple[str, ...] = ("auto", "thread", "process")
 ENV_API_KEY = "VEXOR_API_KEY"
 REMOTE_RERANK_ENV = "VEXOR_REMOTE_RERANK_API_KEY"
 LEGACY_GEMINI_ENV = "GOOGLE_GENAI_API_KEY"
@@ -45,6 +48,8 @@ class Config:
     model: str = DEFAULT_MODEL
     batch_size: int = DEFAULT_BATCH_SIZE
     embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY
+    extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY
+    extract_backend: str = DEFAULT_EXTRACT_BACKEND
     provider: str = DEFAULT_PROVIDER
     base_url: str | None = None
     auto_index: bool = True
@@ -81,6 +86,10 @@ def load_config() -> Config:
         model=raw.get("model") or DEFAULT_MODEL,
         batch_size=int(raw.get("batch_size", DEFAULT_BATCH_SIZE)),
         embed_concurrency=int(raw.get("embed_concurrency", DEFAULT_EMBED_CONCURRENCY)),
+        extract_concurrency=int(
+            raw.get("extract_concurrency", DEFAULT_EXTRACT_CONCURRENCY)
+        ),
+        extract_backend=_coerce_extract_backend(raw.get("extract_backend")),
         provider=raw.get("provider") or DEFAULT_PROVIDER,
         base_url=raw.get("base_url") or None,
         auto_index=bool(raw.get("auto_index", True)),
@@ -100,6 +109,8 @@ def save_config(config: Config) -> None:
         data["model"] = config.model
     data["batch_size"] = config.batch_size
     data["embed_concurrency"] = config.embed_concurrency
+    data["extract_concurrency"] = config.extract_concurrency
+    data["extract_backend"] = config.extract_backend
     if config.provider:
         data["provider"] = config.provider
     if config.base_url:
@@ -189,6 +200,18 @@ def set_embed_concurrency(value: int) -> None:
     save_config(config)
+def set_extract_concurrency(value: int) -> None:
+    config = load_config()
+    config.extract_concurrency = value
+    save_config(config)
+def set_extract_backend(value: str) -> None:
+    config = load_config()
+    config.extract_backend = _normalize_extract_backend(value)
+    save_config(config)
 def set_provider(value: str) -> None:
     config = load_config()
     config.provider = value
@@ -341,6 +364,8 @@ def _clone_config(config: Config) -> Config:
         model=config.model,
         batch_size=config.batch_size,
         embed_concurrency=config.embed_concurrency,
+        extract_concurrency=config.extract_concurrency,
+        extract_backend=config.extract_backend,
         provider=config.provider,
         base_url=config.base_url,
         auto_index=config.auto_index,
@@ -374,6 +399,14 @@ def _apply_config_payload(config: Config, payload: Mapping[str, object]) -> None
             "embed_concurrency",
             DEFAULT_EMBED_CONCURRENCY,
         )
+    if "extract_concurrency" in payload:
+        config.extract_concurrency = _coerce_int(
+            payload["extract_concurrency"],
+            "extract_concurrency",
+            DEFAULT_EXTRACT_CONCURRENCY,
+        )
+    if "extract_backend" in payload:
+        config.extract_backend = _normalize_extract_backend(payload["extract_backend"])
     if "provider" in payload:
         config.provider = _coerce_required_str(
             payload["provider"], "provider", DEFAULT_PROVIDER
@@ -448,6 +481,26 @@ def _coerce_bool(value: object, field: str) -> bool:
     raise ValueError(Messages.ERROR_CONFIG_VALUE_INVALID.format(field=field))
+def _normalize_extract_backend(value: object) -> str:
+    if value is None:
+        return DEFAULT_EXTRACT_BACKEND
+    if isinstance(value, str):
+        normalized = value.strip().lower() or DEFAULT_EXTRACT_BACKEND
+        if normalized in SUPPORTED_EXTRACT_BACKENDS:
+            return normalized
+    raise ValueError(Messages.ERROR_CONFIG_VALUE_INVALID.format(field="extract_backend"))
+def _coerce_extract_backend(value: object) -> str:
+    if value is None:
+        return DEFAULT_EXTRACT_BACKEND
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in SUPPORTED_EXTRACT_BACKENDS:
+            return normalized
+    return DEFAULT_EXTRACT_BACKEND
 def _normalize_rerank(value: object) -> str:
     if value is None:
         normalized = DEFAULT_RERANK

vexor/providers/gemini.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
 from typing import Iterator, Sequence
 import numpy as np
@@ -38,14 +39,19 @@ class GeminiEmbeddingBackend:
         if base_url:
             client_kwargs["http_options"] = genai_types.HttpOptions(base_url=base_url)
         self._client = genai.Client(**client_kwargs)
+        self._executor: ThreadPoolExecutor | None = None
     def embed(self, texts: Sequence[str]) -> np.ndarray:
         if not texts:
             return np.empty((0, 0), dtype=np.float32)
-        batches = list(_chunk(texts, self.chunk_size))
-        if self.concurrency > 1 and len(batches) > 1:
-            vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
-            with ThreadPoolExecutor(max_workers=min(self.concurrency, len(batches))) as executor:
+        if self.concurrency > 1:
+            batches = list(_chunk(texts, self.chunk_size))
+            if len(batches) > 1:
+                vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
+                executor = self._executor
+                if executor is None:
+                    executor = ThreadPoolExecutor(max_workers=self.concurrency)
+                    self._executor = executor
                 future_map = {
                     executor.submit(self._embed_batch, batch): idx
                     for idx, batch in enumerate(batches)
@@ -53,23 +59,34 @@ class GeminiEmbeddingBackend:
                 for future in as_completed(future_map):
                     idx = future_map[future]
                     vectors_by_batch[idx] = future.result()
-            vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
+                vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
+            else:
+                vectors = []
+                for batch in batches:
+                    vectors.extend(self._embed_batch(batch))
         else:
             vectors = []
-            for batch in batches:
+            for batch in _chunk(texts, self.chunk_size):
                 vectors.extend(self._embed_batch(batch))
         if not vectors:
             raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
         return np.vstack(vectors)
     def _embed_batch(self, batch: Sequence[str]) -> list[np.ndarray]:
-        try:
-            response = self._client.models.embed_content(
-                model=self.model_name,
-                contents=list(batch),
-            )
-        except genai_errors.ClientError as exc:
-            raise RuntimeError(_format_genai_error(exc)) from exc
+        attempt = 0
+        while True:
+            try:
+                response = self._client.models.embed_content(
+                    model=self.model_name,
+                    contents=list(batch),
+                )
+                break
+            except genai_errors.ClientError as exc:
+                if _should_retry_genai_error(exc) and attempt < _MAX_RETRIES:
+                    _sleep(_backoff_delay(attempt))
+                    attempt += 1
+                    continue
+                raise RuntimeError(_format_genai_error(exc)) from exc
         embeddings = getattr(response, "embeddings", None)
         if not embeddings:
             raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
@@ -90,6 +107,55 @@ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
         yield items[idx : idx + size]
+_RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
+_MAX_RETRIES = 2
+_RETRY_BASE_DELAY = 0.5
+_RETRY_MAX_DELAY = 4.0
+def _sleep(seconds: float) -> None:
+    time.sleep(seconds)
+def _backoff_delay(attempt: int) -> float:
+    return min(_RETRY_MAX_DELAY, _RETRY_BASE_DELAY * (2**attempt))
+def _extract_status_code(exc: Exception) -> int | None:
+    for attr in ("status_code", "status", "http_status"):
+        value = getattr(exc, attr, None)
+        if isinstance(value, int):
+            return value
+    response = getattr(exc, "response", None)
+    if response is not None:
+        value = getattr(response, "status_code", None)
+        if isinstance(value, int):
+            return value
+    return None
+def _should_retry_genai_error(exc: Exception) -> bool:
+    status = _extract_status_code(exc)
+    if status in _RETRYABLE_STATUS_CODES:
+        return True
+    name = exc.__class__.__name__.lower()
+    if "ratelimit" in name or "timeout" in name or "temporarily" in name:
+        return True
+    message = str(exc).lower()
+    return any(
+        token in message
+        for token in (
+            "rate limit",
+            "timeout",
+            "temporar",
+            "overload",
+            "try again",
+            "too many requests",
+            "service unavailable",
+        )
+    )
 def _format_genai_error(exc: genai_errors.ClientError) -> str:
     message = getattr(exc, "message", None) or str(exc)
     if "API key" in message:

vexor/providers/openai.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
 from typing import Iterator, Sequence
 import numpy as np
@@ -35,14 +36,19 @@ class OpenAIEmbeddingBackend:
         if base_url:
             client_kwargs["base_url"] = base_url.rstrip("/")
         self._client = OpenAI(**client_kwargs)
+        self._executor: ThreadPoolExecutor | None = None
     def embed(self, texts: Sequence[str]) -> np.ndarray:
         if not texts:
             return np.empty((0, 0), dtype=np.float32)
-        batches = list(_chunk(texts, self.chunk_size))
-        if self.concurrency > 1 and len(batches) > 1:
-            vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
-            with ThreadPoolExecutor(max_workers=min(self.concurrency, len(batches))) as executor:
+        if self.concurrency > 1:
+            batches = list(_chunk(texts, self.chunk_size))
+            if len(batches) > 1:
+                vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
+                executor = self._executor
+                if executor is None:
+                    executor = ThreadPoolExecutor(max_workers=self.concurrency)
+                    self._executor = executor
                 future_map = {
                     executor.submit(self._embed_batch, batch): idx
                     for idx, batch in enumerate(batches)
@@ -50,23 +56,34 @@ class OpenAIEmbeddingBackend:
                 for future in as_completed(future_map):
                     idx = future_map[future]
                     vectors_by_batch[idx] = future.result()
-            vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
+                vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
+            else:
+                vectors = []
+                for batch in batches:
+                    vectors.extend(self._embed_batch(batch))
         else:
             vectors = []
-            for batch in batches:
+            for batch in _chunk(texts, self.chunk_size):
                 vectors.extend(self._embed_batch(batch))
         if not vectors:
             raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
         return np.vstack(vectors)
     def _embed_batch(self, batch: Sequence[str]) -> list[np.ndarray]:
-        try:
-            response = self._client.embeddings.create(
-                model=self.model_name,
-                input=list(batch),
-            )
-        except Exception as exc:  # pragma: no cover - API client variations
-            raise RuntimeError(_format_openai_error(exc)) from exc
+        attempt = 0
+        while True:
+            try:
+                response = self._client.embeddings.create(
+                    model=self.model_name,
+                    input=list(batch),
+                )
+                break
+            except Exception as exc:  # pragma: no cover - API client variations
+                if _should_retry_openai_error(exc) and attempt < _MAX_RETRIES:
+                    _sleep(_backoff_delay(attempt))
+                    attempt += 1
+                    continue
+                raise RuntimeError(_format_openai_error(exc)) from exc
         data = getattr(response, "data", None) or []
         if not data:
             raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
@@ -87,6 +104,55 @@ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
         yield items[idx : idx + size]
+_RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
+_MAX_RETRIES = 2
+_RETRY_BASE_DELAY = 0.5
+_RETRY_MAX_DELAY = 4.0
+def _sleep(seconds: float) -> None:
+    time.sleep(seconds)
+def _backoff_delay(attempt: int) -> float:
+    return min(_RETRY_MAX_DELAY, _RETRY_BASE_DELAY * (2**attempt))
+def _extract_status_code(exc: Exception) -> int | None:
+    for attr in ("status_code", "status", "http_status"):
+        value = getattr(exc, attr, None)
+        if isinstance(value, int):
+            return value
+    response = getattr(exc, "response", None)
+    if response is not None:
+        value = getattr(response, "status_code", None)
+        if isinstance(value, int):
+            return value
+    return None
+def _should_retry_openai_error(exc: Exception) -> bool:
+    status = _extract_status_code(exc)
+    if status in _RETRYABLE_STATUS_CODES:
+        return True
+    name = exc.__class__.__name__.lower()
+    if "ratelimit" in name or "timeout" in name or "temporarily" in name:
+        return True
+    message = str(exc).lower()
+    return any(
+        token in message
+        for token in (
+            "rate limit",
+            "timeout",
+            "temporar",
+            "overload",
+            "try again",
+            "too many requests",
+            "service unavailable",
+        )
+    )
 def _format_openai_error(exc: Exception) -> str:
     message = getattr(exc, "message", None) or str(exc)
     return f"{Messages.ERROR_OPENAI_PREFIX}{message}"

vexor/services/config_service.py CHANGED Viewed

@@ -11,6 +11,8 @@ from ..config import (
     set_base_url,
     set_batch_size,
     set_embed_concurrency,
+    set_extract_concurrency,
+    set_extract_backend,
     set_auto_index,
     set_flashrank_model,
     set_local_cuda,
@@ -28,6 +30,8 @@ class ConfigUpdateResult:
     model_set: bool = False
     batch_size_set: bool = False
     embed_concurrency_set: bool = False
+    extract_concurrency_set: bool = False
+    extract_backend_set: bool = False
     provider_set: bool = False
     base_url_set: bool = False
     base_url_cleared: bool = False
@@ -49,6 +53,8 @@ class ConfigUpdateResult:
                 self.model_set,
                 self.batch_size_set,
                 self.embed_concurrency_set,
+                self.extract_concurrency_set,
+                self.extract_backend_set,
                 self.provider_set,
                 self.base_url_set,
                 self.base_url_cleared,
@@ -71,6 +77,8 @@ def apply_config_updates(
     model: str | None = None,
     batch_size: int | None = None,
     embed_concurrency: int | None = None,
+    extract_concurrency: int | None = None,
+    extract_backend: str | None = None,
     provider: str | None = None,
     base_url: str | None = None,
     clear_base_url: bool = False,
@@ -101,6 +109,12 @@ def apply_config_updates(
     if embed_concurrency is not None:
         set_embed_concurrency(embed_concurrency)
         result.embed_concurrency_set = True
+    if extract_concurrency is not None:
+        set_extract_concurrency(extract_concurrency)
+        result.extract_concurrency_set = True
+    if extract_backend is not None:
+        set_extract_backend(extract_backend)
+        result.extract_backend_set = True
     if provider is not None:
         set_provider(provider)
         result.provider_set = True

vexor 0.20.0__py3-none-any.whl → 0.21.1__py3-none-any.whl

vexor 0.20.0py3-none-any.whl → 0.21.1py3-none-any.whl