vexor 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vexor/config.py CHANGED
@@ -12,7 +12,11 @@ CONFIG_DIR = Path(os.path.expanduser("~")) / ".vexor"
12
12
  CONFIG_FILE = CONFIG_DIR / "config.json"
13
13
  DEFAULT_MODEL = "gemini-embedding-001"
14
14
  DEFAULT_BATCH_SIZE = 0
15
- ENV_API_KEY = "GOOGLE_GENAI_API_KEY"
15
+ DEFAULT_PROVIDER = "gemini"
16
+ SUPPORTED_PROVIDERS: tuple[str, ...] = (DEFAULT_PROVIDER, "openai")
17
+ ENV_API_KEY = "VEXOR_API_KEY"
18
+ LEGACY_GEMINI_ENV = "GOOGLE_GENAI_API_KEY"
19
+ OPENAI_ENV = "OPENAI_API_KEY"
16
20
 
17
21
 
18
22
  @dataclass
@@ -20,6 +24,8 @@ class Config:
20
24
  api_key: str | None = None
21
25
  model: str = DEFAULT_MODEL
22
26
  batch_size: int = DEFAULT_BATCH_SIZE
27
+ provider: str = DEFAULT_PROVIDER
28
+ base_url: str | None = None
23
29
 
24
30
 
25
31
  def load_config() -> Config:
@@ -30,6 +36,8 @@ def load_config() -> Config:
30
36
  api_key=raw.get("api_key") or None,
31
37
  model=raw.get("model") or DEFAULT_MODEL,
32
38
  batch_size=int(raw.get("batch_size", DEFAULT_BATCH_SIZE)),
39
+ provider=raw.get("provider") or DEFAULT_PROVIDER,
40
+ base_url=raw.get("base_url") or None,
33
41
  )
34
42
 
35
43
 
@@ -41,6 +49,10 @@ def save_config(config: Config) -> None:
41
49
  if config.model:
42
50
  data["model"] = config.model
43
51
  data["batch_size"] = config.batch_size
52
+ if config.provider:
53
+ data["provider"] = config.provider
54
+ if config.base_url:
55
+ data["base_url"] = config.base_url
44
56
  CONFIG_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
45
57
 
46
58
 
@@ -60,3 +72,35 @@ def set_batch_size(value: int) -> None:
60
72
  config = load_config()
61
73
  config.batch_size = value
62
74
  save_config(config)
75
+
76
+
77
+ def set_provider(value: str) -> None:
78
+ config = load_config()
79
+ config.provider = value
80
+ save_config(config)
81
+
82
+
83
+ def set_base_url(value: str | None) -> None:
84
+ config = load_config()
85
+ config.base_url = value
86
+ save_config(config)
87
+
88
+
89
+ def resolve_api_key(configured: str | None, provider: str) -> str | None:
90
+ """Return the first available API key from config or environment."""
91
+
92
+ if configured:
93
+ return configured
94
+ general = os.getenv(ENV_API_KEY)
95
+ if general:
96
+ return general
97
+ normalized = (provider or DEFAULT_PROVIDER).lower()
98
+ if normalized == "gemini":
99
+ legacy = os.getenv(LEGACY_GEMINI_ENV)
100
+ if legacy:
101
+ return legacy
102
+ if normalized == "openai":
103
+ openai_key = os.getenv(OPENAI_ENV)
104
+ if openai_key:
105
+ return openai_key
106
+ return None
vexor/modes.py ADDED
@@ -0,0 +1,81 @@
1
+ """Index mode registry and strategy helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Dict, Protocol, Sequence
8
+
9
+ from .services.content_extract_service import extract_head
10
+
11
+ PREVIEW_CHAR_LIMIT = 160
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class ModePayload:
16
+ label: str
17
+ preview: str | None
18
+
19
+
20
+ class IndexModeStrategy(Protocol):
21
+ name: str
22
+
23
+ def payloads_for_files(self, files: Sequence[Path]) -> list[ModePayload]:
24
+ raise NotImplementedError
25
+
26
+ def payload_for_file(self, file: Path) -> ModePayload:
27
+ raise NotImplementedError
28
+
29
+
30
+ @dataclass(frozen=True, slots=True)
31
+ class NameStrategy(IndexModeStrategy):
32
+ name: str = "name"
33
+
34
+ def payloads_for_files(self, files: Sequence[Path]) -> list[ModePayload]:
35
+ return [self.payload_for_file(file) for file in files]
36
+
37
+ def payload_for_file(self, file: Path) -> ModePayload:
38
+ label = file.name.replace("_", " ")
39
+ preview = file.name
40
+ return ModePayload(label=label, preview=preview)
41
+
42
+
43
+ @dataclass(frozen=True, slots=True)
44
+ class HeadStrategy(IndexModeStrategy):
45
+ name: str = "head"
46
+ fallback: NameStrategy = NameStrategy()
47
+
48
+ def payloads_for_files(self, files: Sequence[Path]) -> list[ModePayload]:
49
+ return [self.payload_for_file(file) for file in files]
50
+
51
+ def payload_for_file(self, file: Path) -> ModePayload:
52
+ snippet = extract_head(file)
53
+ if snippet:
54
+ label = f"{file.name} :: {snippet}"
55
+ preview = _trim_preview(snippet)
56
+ return ModePayload(label=label, preview=preview)
57
+ return self.fallback.payload_for_file(file)
58
+
59
+
60
+ _STRATEGIES: Dict[str, IndexModeStrategy] = {
61
+ "name": NameStrategy(),
62
+ "head": HeadStrategy(),
63
+ }
64
+
65
+
66
+ def get_strategy(mode: str) -> IndexModeStrategy:
67
+ try:
68
+ return _STRATEGIES[mode]
69
+ except KeyError as exc:
70
+ raise ValueError(f"Unsupported mode: {mode}") from exc
71
+
72
+
73
+ def available_modes() -> list[str]:
74
+ return sorted(_STRATEGIES.keys())
75
+
76
+
77
+ def _trim_preview(text: str, limit: int = PREVIEW_CHAR_LIMIT) -> str:
78
+ stripped = text.strip()
79
+ if len(stripped) <= limit:
80
+ return stripped
81
+ return stripped[: limit - 1].rstrip() + "…"
@@ -0,0 +1,3 @@
1
+ """Embedding backend implementations for Vexor."""
2
+
3
+ __all__ = ["gemini", "openai"]
@@ -0,0 +1,74 @@
1
+ """Gemini-backed embedding backend for Vexor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterator, Sequence
6
+
7
+ import numpy as np
8
+ from dotenv import load_dotenv
9
+ from google import genai
10
+ from google.genai import errors as genai_errors
11
+ from google.genai import types as genai_types
12
+
13
+ from ..config import DEFAULT_MODEL
14
+ from ..text import Messages
15
+
16
+
17
+ class GeminiEmbeddingBackend:
18
+ """Embedding backend that calls the Gemini API via google-genai."""
19
+
20
+ def __init__(
21
+ self,
22
+ *,
23
+ model_name: str = DEFAULT_MODEL,
24
+ api_key: str | None = None,
25
+ chunk_size: int | None = None,
26
+ base_url: str | None = None,
27
+ ) -> None:
28
+ load_dotenv()
29
+ self.model_name = model_name
30
+ self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
31
+ self.api_key = api_key
32
+ if not self.api_key or self.api_key.strip().lower() == "your_api_key_here":
33
+ raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
34
+ client_kwargs: dict[str, object] = {"api_key": self.api_key}
35
+ if base_url:
36
+ client_kwargs["http_options"] = genai_types.HttpOptions(base_url=base_url)
37
+ self._client = genai.Client(**client_kwargs)
38
+
39
+ def embed(self, texts: Sequence[str]) -> np.ndarray:
40
+ if not texts:
41
+ return np.empty((0, 0), dtype=np.float32)
42
+ vectors: list[np.ndarray] = []
43
+ for chunk in _chunk(texts, self.chunk_size):
44
+ try:
45
+ response = self._client.models.embed_content(
46
+ model=self.model_name,
47
+ contents=list(chunk),
48
+ )
49
+ except genai_errors.ClientError as exc:
50
+ raise RuntimeError(_format_genai_error(exc)) from exc
51
+ embeddings = getattr(response, "embeddings", None)
52
+ if not embeddings:
53
+ raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
54
+ for embedding in embeddings:
55
+ values = getattr(embedding, "values", None) or getattr(
56
+ embedding, "value", None
57
+ )
58
+ vectors.append(np.asarray(values, dtype=np.float32))
59
+ return np.vstack(vectors)
60
+
61
+
62
+ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
63
+ if size is None or size <= 0:
64
+ yield items
65
+ return
66
+ for idx in range(0, len(items), size):
67
+ yield items[idx : idx + size]
68
+
69
+
70
+ def _format_genai_error(exc: genai_errors.ClientError) -> str:
71
+ message = getattr(exc, "message", None) or str(exc)
72
+ if "API key" in message:
73
+ return Messages.ERROR_API_KEY_INVALID
74
+ return f"{Messages.ERROR_GENAI_PREFIX}{message}"
@@ -0,0 +1,69 @@
1
+ """OpenAI-backed embedding backend for Vexor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterator, Sequence
6
+
7
+ import numpy as np
8
+ from dotenv import load_dotenv
9
+ from openai import OpenAI
10
+
11
+ from ..text import Messages
12
+
13
+
14
+ class OpenAIEmbeddingBackend:
15
+ """Embedding backend that calls OpenAI's embeddings API."""
16
+
17
+ def __init__(
18
+ self,
19
+ *,
20
+ model_name: str,
21
+ api_key: str | None,
22
+ chunk_size: int | None = None,
23
+ base_url: str | None = None,
24
+ ) -> None:
25
+ load_dotenv()
26
+ self.model_name = model_name
27
+ self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
28
+ self.api_key = api_key
29
+ if not self.api_key:
30
+ raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
31
+ client_kwargs: dict[str, object] = {"api_key": self.api_key}
32
+ if base_url:
33
+ client_kwargs["base_url"] = base_url.rstrip("/")
34
+ self._client = OpenAI(**client_kwargs)
35
+
36
+ def embed(self, texts: Sequence[str]) -> np.ndarray:
37
+ if not texts:
38
+ return np.empty((0, 0), dtype=np.float32)
39
+ vectors: list[np.ndarray] = []
40
+ for chunk in _chunk(texts, self.chunk_size):
41
+ try:
42
+ response = self._client.embeddings.create(
43
+ model=self.model_name,
44
+ input=list(chunk),
45
+ )
46
+ except Exception as exc: # pragma: no cover - API client variations
47
+ raise RuntimeError(_format_openai_error(exc)) from exc
48
+ data = getattr(response, "data", None) or []
49
+ if not data:
50
+ raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
51
+ for item in data:
52
+ embedding = getattr(item, "embedding", None)
53
+ if embedding is None:
54
+ continue
55
+ vectors.append(np.asarray(embedding, dtype=np.float32))
56
+ return np.vstack(vectors)
57
+
58
+
59
+ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
60
+ if size is None or size <= 0:
61
+ yield items
62
+ return
63
+ for idx in range(0, len(items), size):
64
+ yield items[idx : idx + size]
65
+
66
+
67
+ def _format_openai_error(exc: Exception) -> str:
68
+ message = getattr(exc, "message", None) or str(exc)
69
+ return f"{Messages.ERROR_OPENAI_PREFIX}{message}"
vexor/search.py CHANGED
@@ -1,19 +1,17 @@
1
- """Semantic search helpers backed by the Google Gemini embedding API."""
1
+ """Semantic search helpers backed by pluggable embedding backends."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import os
6
5
  from dataclasses import dataclass
7
6
  from pathlib import Path
8
- from typing import Iterator, List, Protocol, Sequence
7
+ from typing import List, Protocol, Sequence
9
8
 
10
9
  import numpy as np
11
- from dotenv import load_dotenv
12
- from google import genai
13
- from google.genai import errors as genai_errors
14
10
  from sklearn.metrics.pairwise import cosine_similarity
15
11
 
16
- from .config import DEFAULT_MODEL, ENV_API_KEY, load_config
12
+ from .config import DEFAULT_MODEL, DEFAULT_PROVIDER, SUPPORTED_PROVIDERS, resolve_api_key
13
+ from .providers.gemini import GeminiEmbeddingBackend
14
+ from .providers.openai import OpenAIEmbeddingBackend
17
15
  from .text import Messages
18
16
 
19
17
 
@@ -23,6 +21,7 @@ class SearchResult:
23
21
 
24
22
  path: Path
25
23
  score: float
24
+ preview: str | None = None
26
25
 
27
26
 
28
27
  class EmbeddingBackend(Protocol):
@@ -33,50 +32,6 @@ class EmbeddingBackend(Protocol):
33
32
  raise NotImplementedError # pragma: no cover
34
33
 
35
34
 
36
- class GeminiEmbeddingBackend:
37
- """Embedding backend that calls the Gemini API via google-genai."""
38
-
39
- def __init__(
40
- self,
41
- *,
42
- model_name: str = DEFAULT_MODEL,
43
- api_key: str | None = None,
44
- chunk_size: int | None = None,
45
- ) -> None:
46
- load_dotenv()
47
- config = load_config()
48
- self.model_name = model_name
49
- self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
50
- env_key = os.getenv(ENV_API_KEY)
51
- configured_key = getattr(config, "api_key", None)
52
- self.api_key = api_key or configured_key or env_key
53
- if not self.api_key or self.api_key.strip().lower() == "your_api_key_here":
54
- raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
55
- self._client = genai.Client(api_key=self.api_key)
56
-
57
- def embed(self, texts: Sequence[str]) -> np.ndarray:
58
- if not texts:
59
- return np.empty((0, 0), dtype=np.float32)
60
- vectors: list[np.ndarray] = []
61
- for chunk in _chunk(texts, self.chunk_size):
62
- try:
63
- response = self._client.models.embed_content(
64
- model=self.model_name,
65
- contents=list(chunk),
66
- )
67
- except genai_errors.ClientError as exc:
68
- raise RuntimeError(_format_genai_error(exc)) from exc
69
- embeddings = getattr(response, "embeddings", None)
70
- if not embeddings:
71
- raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
72
- for embedding in embeddings:
73
- values = getattr(embedding, "values", None) or getattr(
74
- embedding, "value", None
75
- )
76
- vectors.append(np.asarray(values, dtype=np.float32))
77
- return np.vstack(vectors)
78
-
79
-
80
35
  class VexorSearcher:
81
36
  """Encapsulates embedding generation and similarity computation."""
82
37
 
@@ -86,13 +41,20 @@ class VexorSearcher:
86
41
  *,
87
42
  backend: EmbeddingBackend | None = None,
88
43
  batch_size: int = 0,
44
+ provider: str = DEFAULT_PROVIDER,
45
+ base_url: str | None = None,
46
+ api_key: str | None = None,
89
47
  ) -> None:
90
48
  self.model_name = model_name
91
49
  self.batch_size = max(batch_size, 0)
92
- self._backend = backend or GeminiEmbeddingBackend(
93
- model_name=model_name, chunk_size=self.batch_size
94
- )
95
- self._device = f"{self.model_name} via Gemini API"
50
+ self.provider = (provider or DEFAULT_PROVIDER).lower()
51
+ self.base_url = base_url
52
+ self.api_key = resolve_api_key(api_key, self.provider)
53
+ if backend is not None:
54
+ self._backend = backend
55
+ self._device = getattr(backend, "device", "Custom embedding backend")
56
+ else:
57
+ self._backend = self._create_backend()
96
58
 
97
59
  @property
98
60
  def device(self) -> str:
@@ -136,17 +98,24 @@ class VexorSearcher:
136
98
  """Return the text representation of a file path for embedding."""
137
99
  return path.name.replace("_", " ")
138
100
 
139
-
140
- def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
141
- if size is None or size <= 0:
142
- yield items
143
- return
144
- for idx in range(0, len(items), size):
145
- yield items[idx : idx + size]
146
-
147
-
148
- def _format_genai_error(exc: genai_errors.ClientError) -> str:
149
- message = getattr(exc, "message", None) or str(exc)
150
- if "API key" in message:
151
- return Messages.ERROR_API_KEY_INVALID
152
- return f"{Messages.ERROR_GENAI_PREFIX}{message}"
101
+ def _create_backend(self) -> EmbeddingBackend:
102
+ if self.provider == "gemini":
103
+ self._device = f"{self.model_name} via Gemini API"
104
+ return GeminiEmbeddingBackend(
105
+ model_name=self.model_name,
106
+ chunk_size=self.batch_size,
107
+ base_url=self.base_url,
108
+ api_key=self.api_key,
109
+ )
110
+ if self.provider == "openai":
111
+ self._device = f"{self.model_name} via OpenAI API"
112
+ return OpenAIEmbeddingBackend(
113
+ model_name=self.model_name,
114
+ chunk_size=self.batch_size,
115
+ base_url=self.base_url,
116
+ api_key=self.api_key,
117
+ )
118
+ allowed = ", ".join(SUPPORTED_PROVIDERS)
119
+ raise RuntimeError(
120
+ Messages.ERROR_PROVIDER_INVALID.format(value=self.provider, allowed=allowed)
121
+ )
@@ -0,0 +1,9 @@
1
+ """Higher level service helpers used by the CLI layer."""
2
+
3
+ __all__ = [
4
+ "cache_service",
5
+ "config_service",
6
+ "index_service",
7
+ "search_service",
8
+ ]
9
+
@@ -0,0 +1,39 @@
1
+ """Shared helpers for interacting with cached index metadata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Sequence
7
+
8
+ def is_cache_current(
9
+ root: Path,
10
+ include_hidden: bool,
11
+ cached_files: Sequence[dict],
12
+ *,
13
+ recursive: bool,
14
+ current_files=None,
15
+ ) -> bool:
16
+ """Return True if cached metadata matches the current directory snapshot."""
17
+
18
+ if not cached_files:
19
+ return False
20
+ from ..cache import compare_snapshot # local import avoids eager heavy deps
21
+
22
+ return compare_snapshot(
23
+ root,
24
+ include_hidden,
25
+ cached_files,
26
+ recursive=recursive,
27
+ current_files=current_files,
28
+ )
29
+
30
+
31
+ def load_index_metadata_safe(root: Path, model: str, include_hidden: bool, mode: str, recursive: bool):
32
+ """Load index metadata when present, returning None if missing."""
33
+
34
+ from ..cache import load_index # local import avoids eager heavy deps
35
+
36
+ try:
37
+ return load_index(root, model, include_hidden, mode, recursive)
38
+ except FileNotFoundError:
39
+ return None
@@ -0,0 +1,83 @@
1
+ """Logic helpers for the `vexor config` command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from ..config import (
8
+ Config,
9
+ load_config,
10
+ set_api_key,
11
+ set_base_url,
12
+ set_batch_size,
13
+ set_model,
14
+ set_provider,
15
+ )
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class ConfigUpdateResult:
20
+ api_key_set: bool = False
21
+ api_key_cleared: bool = False
22
+ model_set: bool = False
23
+ batch_size_set: bool = False
24
+ provider_set: bool = False
25
+ base_url_set: bool = False
26
+ base_url_cleared: bool = False
27
+
28
+ @property
29
+ def changed(self) -> bool:
30
+ return any(
31
+ (
32
+ self.api_key_set,
33
+ self.api_key_cleared,
34
+ self.model_set,
35
+ self.batch_size_set,
36
+ self.provider_set,
37
+ self.base_url_set,
38
+ self.base_url_cleared,
39
+ )
40
+ )
41
+
42
+
43
+ def apply_config_updates(
44
+ *,
45
+ api_key: str | None = None,
46
+ clear_api_key: bool = False,
47
+ model: str | None = None,
48
+ batch_size: int | None = None,
49
+ provider: str | None = None,
50
+ base_url: str | None = None,
51
+ clear_base_url: bool = False,
52
+ ) -> ConfigUpdateResult:
53
+ """Apply config mutations and report which fields were updated."""
54
+
55
+ result = ConfigUpdateResult()
56
+ if api_key is not None:
57
+ set_api_key(api_key)
58
+ result.api_key_set = True
59
+ if clear_api_key:
60
+ set_api_key(None)
61
+ result.api_key_cleared = True
62
+ if model is not None:
63
+ set_model(model)
64
+ result.model_set = True
65
+ if batch_size is not None:
66
+ set_batch_size(batch_size)
67
+ result.batch_size_set = True
68
+ if provider is not None:
69
+ set_provider(provider)
70
+ result.provider_set = True
71
+ if base_url is not None:
72
+ set_base_url(base_url)
73
+ result.base_url_set = True
74
+ if clear_base_url:
75
+ set_base_url(None)
76
+ result.base_url_cleared = True
77
+ return result
78
+
79
+
80
+ def get_config_snapshot() -> Config:
81
+ """Return the current configuration dataclass."""
82
+
83
+ return load_config()