vexor 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vexor/__init__.py +1 -1
- vexor/cache.py +299 -26
- vexor/cli.py +340 -193
- vexor/config.py +45 -1
- vexor/modes.py +81 -0
- vexor/providers/__init__.py +3 -0
- vexor/providers/gemini.py +74 -0
- vexor/providers/openai.py +69 -0
- vexor/search.py +38 -69
- vexor/services/__init__.py +9 -0
- vexor/services/cache_service.py +39 -0
- vexor/services/config_service.py +83 -0
- vexor/services/content_extract_service.py +188 -0
- vexor/services/index_service.py +260 -0
- vexor/services/search_service.py +95 -0
- vexor/services/system_service.py +81 -0
- vexor/text.py +53 -10
- vexor/utils.py +24 -9
- vexor-0.5.0.dist-info/METADATA +139 -0
- vexor-0.5.0.dist-info/RECORD +24 -0
- vexor-0.2.0.dist-info/METADATA +0 -102
- vexor-0.2.0.dist-info/RECORD +0 -13
- {vexor-0.2.0.dist-info → vexor-0.5.0.dist-info}/WHEEL +0 -0
- {vexor-0.2.0.dist-info → vexor-0.5.0.dist-info}/entry_points.txt +0 -0
- {vexor-0.2.0.dist-info → vexor-0.5.0.dist-info}/licenses/LICENSE +0 -0
vexor/config.py
CHANGED
|
@@ -12,7 +12,11 @@ CONFIG_DIR = Path(os.path.expanduser("~")) / ".vexor"
|
|
|
12
12
|
CONFIG_FILE = CONFIG_DIR / "config.json"
|
|
13
13
|
DEFAULT_MODEL = "gemini-embedding-001"
|
|
14
14
|
DEFAULT_BATCH_SIZE = 0
|
|
15
|
-
|
|
15
|
+
DEFAULT_PROVIDER = "gemini"
|
|
16
|
+
SUPPORTED_PROVIDERS: tuple[str, ...] = (DEFAULT_PROVIDER, "openai")
|
|
17
|
+
ENV_API_KEY = "VEXOR_API_KEY"
|
|
18
|
+
LEGACY_GEMINI_ENV = "GOOGLE_GENAI_API_KEY"
|
|
19
|
+
OPENAI_ENV = "OPENAI_API_KEY"
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
@dataclass
|
|
@@ -20,6 +24,8 @@ class Config:
|
|
|
20
24
|
api_key: str | None = None
|
|
21
25
|
model: str = DEFAULT_MODEL
|
|
22
26
|
batch_size: int = DEFAULT_BATCH_SIZE
|
|
27
|
+
provider: str = DEFAULT_PROVIDER
|
|
28
|
+
base_url: str | None = None
|
|
23
29
|
|
|
24
30
|
|
|
25
31
|
def load_config() -> Config:
|
|
@@ -30,6 +36,8 @@ def load_config() -> Config:
|
|
|
30
36
|
api_key=raw.get("api_key") or None,
|
|
31
37
|
model=raw.get("model") or DEFAULT_MODEL,
|
|
32
38
|
batch_size=int(raw.get("batch_size", DEFAULT_BATCH_SIZE)),
|
|
39
|
+
provider=raw.get("provider") or DEFAULT_PROVIDER,
|
|
40
|
+
base_url=raw.get("base_url") or None,
|
|
33
41
|
)
|
|
34
42
|
|
|
35
43
|
|
|
@@ -41,6 +49,10 @@ def save_config(config: Config) -> None:
|
|
|
41
49
|
if config.model:
|
|
42
50
|
data["model"] = config.model
|
|
43
51
|
data["batch_size"] = config.batch_size
|
|
52
|
+
if config.provider:
|
|
53
|
+
data["provider"] = config.provider
|
|
54
|
+
if config.base_url:
|
|
55
|
+
data["base_url"] = config.base_url
|
|
44
56
|
CONFIG_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
45
57
|
|
|
46
58
|
|
|
@@ -60,3 +72,35 @@ def set_batch_size(value: int) -> None:
|
|
|
60
72
|
config = load_config()
|
|
61
73
|
config.batch_size = value
|
|
62
74
|
save_config(config)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def set_provider(value: str) -> None:
|
|
78
|
+
config = load_config()
|
|
79
|
+
config.provider = value
|
|
80
|
+
save_config(config)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def set_base_url(value: str | None) -> None:
|
|
84
|
+
config = load_config()
|
|
85
|
+
config.base_url = value
|
|
86
|
+
save_config(config)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def resolve_api_key(configured: str | None, provider: str) -> str | None:
|
|
90
|
+
"""Return the first available API key from config or environment."""
|
|
91
|
+
|
|
92
|
+
if configured:
|
|
93
|
+
return configured
|
|
94
|
+
general = os.getenv(ENV_API_KEY)
|
|
95
|
+
if general:
|
|
96
|
+
return general
|
|
97
|
+
normalized = (provider or DEFAULT_PROVIDER).lower()
|
|
98
|
+
if normalized == "gemini":
|
|
99
|
+
legacy = os.getenv(LEGACY_GEMINI_ENV)
|
|
100
|
+
if legacy:
|
|
101
|
+
return legacy
|
|
102
|
+
if normalized == "openai":
|
|
103
|
+
openai_key = os.getenv(OPENAI_ENV)
|
|
104
|
+
if openai_key:
|
|
105
|
+
return openai_key
|
|
106
|
+
return None
|
vexor/modes.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Index mode registry and strategy helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Protocol, Sequence
|
|
8
|
+
|
|
9
|
+
from .services.content_extract_service import extract_head
|
|
10
|
+
|
|
11
|
+
PREVIEW_CHAR_LIMIT = 160
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class ModePayload:
|
|
16
|
+
label: str
|
|
17
|
+
preview: str | None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class IndexModeStrategy(Protocol):
|
|
21
|
+
name: str
|
|
22
|
+
|
|
23
|
+
def payloads_for_files(self, files: Sequence[Path]) -> list[ModePayload]:
|
|
24
|
+
raise NotImplementedError
|
|
25
|
+
|
|
26
|
+
def payload_for_file(self, file: Path) -> ModePayload:
|
|
27
|
+
raise NotImplementedError
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True, slots=True)
|
|
31
|
+
class NameStrategy(IndexModeStrategy):
|
|
32
|
+
name: str = "name"
|
|
33
|
+
|
|
34
|
+
def payloads_for_files(self, files: Sequence[Path]) -> list[ModePayload]:
|
|
35
|
+
return [self.payload_for_file(file) for file in files]
|
|
36
|
+
|
|
37
|
+
def payload_for_file(self, file: Path) -> ModePayload:
|
|
38
|
+
label = file.name.replace("_", " ")
|
|
39
|
+
preview = file.name
|
|
40
|
+
return ModePayload(label=label, preview=preview)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True, slots=True)
|
|
44
|
+
class HeadStrategy(IndexModeStrategy):
|
|
45
|
+
name: str = "head"
|
|
46
|
+
fallback: NameStrategy = NameStrategy()
|
|
47
|
+
|
|
48
|
+
def payloads_for_files(self, files: Sequence[Path]) -> list[ModePayload]:
|
|
49
|
+
return [self.payload_for_file(file) for file in files]
|
|
50
|
+
|
|
51
|
+
def payload_for_file(self, file: Path) -> ModePayload:
|
|
52
|
+
snippet = extract_head(file)
|
|
53
|
+
if snippet:
|
|
54
|
+
label = f"{file.name} :: {snippet}"
|
|
55
|
+
preview = _trim_preview(snippet)
|
|
56
|
+
return ModePayload(label=label, preview=preview)
|
|
57
|
+
return self.fallback.payload_for_file(file)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
_STRATEGIES: Dict[str, IndexModeStrategy] = {
|
|
61
|
+
"name": NameStrategy(),
|
|
62
|
+
"head": HeadStrategy(),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_strategy(mode: str) -> IndexModeStrategy:
|
|
67
|
+
try:
|
|
68
|
+
return _STRATEGIES[mode]
|
|
69
|
+
except KeyError as exc:
|
|
70
|
+
raise ValueError(f"Unsupported mode: {mode}") from exc
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def available_modes() -> list[str]:
|
|
74
|
+
return sorted(_STRATEGIES.keys())
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _trim_preview(text: str, limit: int = PREVIEW_CHAR_LIMIT) -> str:
|
|
78
|
+
stripped = text.strip()
|
|
79
|
+
if len(stripped) <= limit:
|
|
80
|
+
return stripped
|
|
81
|
+
return stripped[: limit - 1].rstrip() + "…"
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Gemini-backed embedding backend for Vexor."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Iterator, Sequence
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from google import genai
|
|
10
|
+
from google.genai import errors as genai_errors
|
|
11
|
+
from google.genai import types as genai_types
|
|
12
|
+
|
|
13
|
+
from ..config import DEFAULT_MODEL
|
|
14
|
+
from ..text import Messages
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GeminiEmbeddingBackend:
|
|
18
|
+
"""Embedding backend that calls the Gemini API via google-genai."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
*,
|
|
23
|
+
model_name: str = DEFAULT_MODEL,
|
|
24
|
+
api_key: str | None = None,
|
|
25
|
+
chunk_size: int | None = None,
|
|
26
|
+
base_url: str | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
load_dotenv()
|
|
29
|
+
self.model_name = model_name
|
|
30
|
+
self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
|
|
31
|
+
self.api_key = api_key
|
|
32
|
+
if not self.api_key or self.api_key.strip().lower() == "your_api_key_here":
|
|
33
|
+
raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
|
|
34
|
+
client_kwargs: dict[str, object] = {"api_key": self.api_key}
|
|
35
|
+
if base_url:
|
|
36
|
+
client_kwargs["http_options"] = genai_types.HttpOptions(base_url=base_url)
|
|
37
|
+
self._client = genai.Client(**client_kwargs)
|
|
38
|
+
|
|
39
|
+
def embed(self, texts: Sequence[str]) -> np.ndarray:
|
|
40
|
+
if not texts:
|
|
41
|
+
return np.empty((0, 0), dtype=np.float32)
|
|
42
|
+
vectors: list[np.ndarray] = []
|
|
43
|
+
for chunk in _chunk(texts, self.chunk_size):
|
|
44
|
+
try:
|
|
45
|
+
response = self._client.models.embed_content(
|
|
46
|
+
model=self.model_name,
|
|
47
|
+
contents=list(chunk),
|
|
48
|
+
)
|
|
49
|
+
except genai_errors.ClientError as exc:
|
|
50
|
+
raise RuntimeError(_format_genai_error(exc)) from exc
|
|
51
|
+
embeddings = getattr(response, "embeddings", None)
|
|
52
|
+
if not embeddings:
|
|
53
|
+
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
54
|
+
for embedding in embeddings:
|
|
55
|
+
values = getattr(embedding, "values", None) or getattr(
|
|
56
|
+
embedding, "value", None
|
|
57
|
+
)
|
|
58
|
+
vectors.append(np.asarray(values, dtype=np.float32))
|
|
59
|
+
return np.vstack(vectors)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
|
|
63
|
+
if size is None or size <= 0:
|
|
64
|
+
yield items
|
|
65
|
+
return
|
|
66
|
+
for idx in range(0, len(items), size):
|
|
67
|
+
yield items[idx : idx + size]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _format_genai_error(exc: genai_errors.ClientError) -> str:
|
|
71
|
+
message = getattr(exc, "message", None) or str(exc)
|
|
72
|
+
if "API key" in message:
|
|
73
|
+
return Messages.ERROR_API_KEY_INVALID
|
|
74
|
+
return f"{Messages.ERROR_GENAI_PREFIX}{message}"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""OpenAI-backed embedding backend for Vexor."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Iterator, Sequence
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from openai import OpenAI
|
|
10
|
+
|
|
11
|
+
from ..text import Messages
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OpenAIEmbeddingBackend:
|
|
15
|
+
"""Embedding backend that calls OpenAI's embeddings API."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
model_name: str,
|
|
21
|
+
api_key: str | None,
|
|
22
|
+
chunk_size: int | None = None,
|
|
23
|
+
base_url: str | None = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
load_dotenv()
|
|
26
|
+
self.model_name = model_name
|
|
27
|
+
self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
|
|
28
|
+
self.api_key = api_key
|
|
29
|
+
if not self.api_key:
|
|
30
|
+
raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
|
|
31
|
+
client_kwargs: dict[str, object] = {"api_key": self.api_key}
|
|
32
|
+
if base_url:
|
|
33
|
+
client_kwargs["base_url"] = base_url.rstrip("/")
|
|
34
|
+
self._client = OpenAI(**client_kwargs)
|
|
35
|
+
|
|
36
|
+
def embed(self, texts: Sequence[str]) -> np.ndarray:
|
|
37
|
+
if not texts:
|
|
38
|
+
return np.empty((0, 0), dtype=np.float32)
|
|
39
|
+
vectors: list[np.ndarray] = []
|
|
40
|
+
for chunk in _chunk(texts, self.chunk_size):
|
|
41
|
+
try:
|
|
42
|
+
response = self._client.embeddings.create(
|
|
43
|
+
model=self.model_name,
|
|
44
|
+
input=list(chunk),
|
|
45
|
+
)
|
|
46
|
+
except Exception as exc: # pragma: no cover - API client variations
|
|
47
|
+
raise RuntimeError(_format_openai_error(exc)) from exc
|
|
48
|
+
data = getattr(response, "data", None) or []
|
|
49
|
+
if not data:
|
|
50
|
+
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
51
|
+
for item in data:
|
|
52
|
+
embedding = getattr(item, "embedding", None)
|
|
53
|
+
if embedding is None:
|
|
54
|
+
continue
|
|
55
|
+
vectors.append(np.asarray(embedding, dtype=np.float32))
|
|
56
|
+
return np.vstack(vectors)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
|
|
60
|
+
if size is None or size <= 0:
|
|
61
|
+
yield items
|
|
62
|
+
return
|
|
63
|
+
for idx in range(0, len(items), size):
|
|
64
|
+
yield items[idx : idx + size]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _format_openai_error(exc: Exception) -> str:
|
|
68
|
+
message = getattr(exc, "message", None) or str(exc)
|
|
69
|
+
return f"{Messages.ERROR_OPENAI_PREFIX}{message}"
|
vexor/search.py
CHANGED
|
@@ -1,19 +1,17 @@
|
|
|
1
|
-
"""Semantic search helpers backed by
|
|
1
|
+
"""Semantic search helpers backed by pluggable embedding backends."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import os
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
7
|
+
from typing import List, Protocol, Sequence
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
|
-
from dotenv import load_dotenv
|
|
12
|
-
from google import genai
|
|
13
|
-
from google.genai import errors as genai_errors
|
|
14
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
15
11
|
|
|
16
|
-
from .config import DEFAULT_MODEL,
|
|
12
|
+
from .config import DEFAULT_MODEL, DEFAULT_PROVIDER, SUPPORTED_PROVIDERS, resolve_api_key
|
|
13
|
+
from .providers.gemini import GeminiEmbeddingBackend
|
|
14
|
+
from .providers.openai import OpenAIEmbeddingBackend
|
|
17
15
|
from .text import Messages
|
|
18
16
|
|
|
19
17
|
|
|
@@ -23,6 +21,7 @@ class SearchResult:
|
|
|
23
21
|
|
|
24
22
|
path: Path
|
|
25
23
|
score: float
|
|
24
|
+
preview: str | None = None
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
class EmbeddingBackend(Protocol):
|
|
@@ -33,50 +32,6 @@ class EmbeddingBackend(Protocol):
|
|
|
33
32
|
raise NotImplementedError # pragma: no cover
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
class GeminiEmbeddingBackend:
|
|
37
|
-
"""Embedding backend that calls the Gemini API via google-genai."""
|
|
38
|
-
|
|
39
|
-
def __init__(
|
|
40
|
-
self,
|
|
41
|
-
*,
|
|
42
|
-
model_name: str = DEFAULT_MODEL,
|
|
43
|
-
api_key: str | None = None,
|
|
44
|
-
chunk_size: int | None = None,
|
|
45
|
-
) -> None:
|
|
46
|
-
load_dotenv()
|
|
47
|
-
config = load_config()
|
|
48
|
-
self.model_name = model_name
|
|
49
|
-
self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
|
|
50
|
-
env_key = os.getenv(ENV_API_KEY)
|
|
51
|
-
configured_key = getattr(config, "api_key", None)
|
|
52
|
-
self.api_key = api_key or configured_key or env_key
|
|
53
|
-
if not self.api_key or self.api_key.strip().lower() == "your_api_key_here":
|
|
54
|
-
raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
|
|
55
|
-
self._client = genai.Client(api_key=self.api_key)
|
|
56
|
-
|
|
57
|
-
def embed(self, texts: Sequence[str]) -> np.ndarray:
|
|
58
|
-
if not texts:
|
|
59
|
-
return np.empty((0, 0), dtype=np.float32)
|
|
60
|
-
vectors: list[np.ndarray] = []
|
|
61
|
-
for chunk in _chunk(texts, self.chunk_size):
|
|
62
|
-
try:
|
|
63
|
-
response = self._client.models.embed_content(
|
|
64
|
-
model=self.model_name,
|
|
65
|
-
contents=list(chunk),
|
|
66
|
-
)
|
|
67
|
-
except genai_errors.ClientError as exc:
|
|
68
|
-
raise RuntimeError(_format_genai_error(exc)) from exc
|
|
69
|
-
embeddings = getattr(response, "embeddings", None)
|
|
70
|
-
if not embeddings:
|
|
71
|
-
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
72
|
-
for embedding in embeddings:
|
|
73
|
-
values = getattr(embedding, "values", None) or getattr(
|
|
74
|
-
embedding, "value", None
|
|
75
|
-
)
|
|
76
|
-
vectors.append(np.asarray(values, dtype=np.float32))
|
|
77
|
-
return np.vstack(vectors)
|
|
78
|
-
|
|
79
|
-
|
|
80
35
|
class VexorSearcher:
|
|
81
36
|
"""Encapsulates embedding generation and similarity computation."""
|
|
82
37
|
|
|
@@ -86,13 +41,20 @@ class VexorSearcher:
|
|
|
86
41
|
*,
|
|
87
42
|
backend: EmbeddingBackend | None = None,
|
|
88
43
|
batch_size: int = 0,
|
|
44
|
+
provider: str = DEFAULT_PROVIDER,
|
|
45
|
+
base_url: str | None = None,
|
|
46
|
+
api_key: str | None = None,
|
|
89
47
|
) -> None:
|
|
90
48
|
self.model_name = model_name
|
|
91
49
|
self.batch_size = max(batch_size, 0)
|
|
92
|
-
self.
|
|
93
|
-
|
|
94
|
-
)
|
|
95
|
-
|
|
50
|
+
self.provider = (provider or DEFAULT_PROVIDER).lower()
|
|
51
|
+
self.base_url = base_url
|
|
52
|
+
self.api_key = resolve_api_key(api_key, self.provider)
|
|
53
|
+
if backend is not None:
|
|
54
|
+
self._backend = backend
|
|
55
|
+
self._device = getattr(backend, "device", "Custom embedding backend")
|
|
56
|
+
else:
|
|
57
|
+
self._backend = self._create_backend()
|
|
96
58
|
|
|
97
59
|
@property
|
|
98
60
|
def device(self) -> str:
|
|
@@ -136,17 +98,24 @@ class VexorSearcher:
|
|
|
136
98
|
"""Return the text representation of a file path for embedding."""
|
|
137
99
|
return path.name.replace("_", " ")
|
|
138
100
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
101
|
+
def _create_backend(self) -> EmbeddingBackend:
|
|
102
|
+
if self.provider == "gemini":
|
|
103
|
+
self._device = f"{self.model_name} via Gemini API"
|
|
104
|
+
return GeminiEmbeddingBackend(
|
|
105
|
+
model_name=self.model_name,
|
|
106
|
+
chunk_size=self.batch_size,
|
|
107
|
+
base_url=self.base_url,
|
|
108
|
+
api_key=self.api_key,
|
|
109
|
+
)
|
|
110
|
+
if self.provider == "openai":
|
|
111
|
+
self._device = f"{self.model_name} via OpenAI API"
|
|
112
|
+
return OpenAIEmbeddingBackend(
|
|
113
|
+
model_name=self.model_name,
|
|
114
|
+
chunk_size=self.batch_size,
|
|
115
|
+
base_url=self.base_url,
|
|
116
|
+
api_key=self.api_key,
|
|
117
|
+
)
|
|
118
|
+
allowed = ", ".join(SUPPORTED_PROVIDERS)
|
|
119
|
+
raise RuntimeError(
|
|
120
|
+
Messages.ERROR_PROVIDER_INVALID.format(value=self.provider, allowed=allowed)
|
|
121
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Shared helpers for interacting with cached index metadata."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Sequence
|
|
7
|
+
|
|
8
|
+
def is_cache_current(
|
|
9
|
+
root: Path,
|
|
10
|
+
include_hidden: bool,
|
|
11
|
+
cached_files: Sequence[dict],
|
|
12
|
+
*,
|
|
13
|
+
recursive: bool,
|
|
14
|
+
current_files=None,
|
|
15
|
+
) -> bool:
|
|
16
|
+
"""Return True if cached metadata matches the current directory snapshot."""
|
|
17
|
+
|
|
18
|
+
if not cached_files:
|
|
19
|
+
return False
|
|
20
|
+
from ..cache import compare_snapshot # local import avoids eager heavy deps
|
|
21
|
+
|
|
22
|
+
return compare_snapshot(
|
|
23
|
+
root,
|
|
24
|
+
include_hidden,
|
|
25
|
+
cached_files,
|
|
26
|
+
recursive=recursive,
|
|
27
|
+
current_files=current_files,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def load_index_metadata_safe(root: Path, model: str, include_hidden: bool, mode: str, recursive: bool):
|
|
32
|
+
"""Load index metadata when present, returning None if missing."""
|
|
33
|
+
|
|
34
|
+
from ..cache import load_index # local import avoids eager heavy deps
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
return load_index(root, model, include_hidden, mode, recursive)
|
|
38
|
+
except FileNotFoundError:
|
|
39
|
+
return None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Logic helpers for the `vexor config` command."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from ..config import (
|
|
8
|
+
Config,
|
|
9
|
+
load_config,
|
|
10
|
+
set_api_key,
|
|
11
|
+
set_base_url,
|
|
12
|
+
set_batch_size,
|
|
13
|
+
set_model,
|
|
14
|
+
set_provider,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(slots=True)
|
|
19
|
+
class ConfigUpdateResult:
|
|
20
|
+
api_key_set: bool = False
|
|
21
|
+
api_key_cleared: bool = False
|
|
22
|
+
model_set: bool = False
|
|
23
|
+
batch_size_set: bool = False
|
|
24
|
+
provider_set: bool = False
|
|
25
|
+
base_url_set: bool = False
|
|
26
|
+
base_url_cleared: bool = False
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def changed(self) -> bool:
|
|
30
|
+
return any(
|
|
31
|
+
(
|
|
32
|
+
self.api_key_set,
|
|
33
|
+
self.api_key_cleared,
|
|
34
|
+
self.model_set,
|
|
35
|
+
self.batch_size_set,
|
|
36
|
+
self.provider_set,
|
|
37
|
+
self.base_url_set,
|
|
38
|
+
self.base_url_cleared,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def apply_config_updates(
|
|
44
|
+
*,
|
|
45
|
+
api_key: str | None = None,
|
|
46
|
+
clear_api_key: bool = False,
|
|
47
|
+
model: str | None = None,
|
|
48
|
+
batch_size: int | None = None,
|
|
49
|
+
provider: str | None = None,
|
|
50
|
+
base_url: str | None = None,
|
|
51
|
+
clear_base_url: bool = False,
|
|
52
|
+
) -> ConfigUpdateResult:
|
|
53
|
+
"""Apply config mutations and report which fields were updated."""
|
|
54
|
+
|
|
55
|
+
result = ConfigUpdateResult()
|
|
56
|
+
if api_key is not None:
|
|
57
|
+
set_api_key(api_key)
|
|
58
|
+
result.api_key_set = True
|
|
59
|
+
if clear_api_key:
|
|
60
|
+
set_api_key(None)
|
|
61
|
+
result.api_key_cleared = True
|
|
62
|
+
if model is not None:
|
|
63
|
+
set_model(model)
|
|
64
|
+
result.model_set = True
|
|
65
|
+
if batch_size is not None:
|
|
66
|
+
set_batch_size(batch_size)
|
|
67
|
+
result.batch_size_set = True
|
|
68
|
+
if provider is not None:
|
|
69
|
+
set_provider(provider)
|
|
70
|
+
result.provider_set = True
|
|
71
|
+
if base_url is not None:
|
|
72
|
+
set_base_url(base_url)
|
|
73
|
+
result.base_url_set = True
|
|
74
|
+
if clear_base_url:
|
|
75
|
+
set_base_url(None)
|
|
76
|
+
result.base_url_cleared = True
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_config_snapshot() -> Config:
|
|
81
|
+
"""Return the current configuration dataclass."""
|
|
82
|
+
|
|
83
|
+
return load_config()
|