vexor 0.21.1__py3-none-any.whl → 0.23.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vexor/cache.py CHANGED
@@ -5,9 +5,13 @@ from __future__ import annotations
5
5
  import hashlib
6
6
  import os
7
7
  import sqlite3
8
+ from collections import OrderedDict
8
9
  from dataclasses import dataclass
10
+ from contextlib import contextmanager
11
+ from contextvars import ContextVar
9
12
  from datetime import datetime, timezone, timedelta
10
13
  from pathlib import Path
14
+ from threading import Lock
11
15
  from typing import Iterable, Mapping, Sequence
12
16
 
13
17
  import numpy as np
@@ -16,10 +20,18 @@ from .utils import collect_files
16
20
 
17
21
  DEFAULT_CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
18
22
  CACHE_DIR = DEFAULT_CACHE_DIR
23
+ _CACHE_DIR_OVERRIDE: ContextVar[Path | None] = ContextVar(
24
+ "vexor_cache_dir_override",
25
+ default=None,
26
+ )
19
27
  CACHE_VERSION = 6
20
28
  DB_FILENAME = "index.db"
21
29
  EMBED_CACHE_TTL_DAYS = 30
22
30
  EMBED_CACHE_MAX_ENTRIES = 50_000
31
+ EMBED_MEMORY_CACHE_MAX_ENTRIES = 2_048
32
+
33
+ _EMBED_MEMORY_CACHE: "OrderedDict[tuple[str, int | None, str], np.ndarray]" = OrderedDict()
34
+ _EMBED_MEMORY_LOCK = Lock()
23
35
 
24
36
 
25
37
  @dataclass(slots=True)
@@ -77,11 +89,73 @@ def query_cache_key(query: str, model: str) -> str:
77
89
  return hashlib.sha1(base.encode("utf-8")).hexdigest()
78
90
 
79
91
 
80
- def embedding_cache_key(text: str) -> str:
81
- """Return a stable hash for embedding cache lookups."""
92
+ def embedding_cache_key(text: str, dimension: int | None = None) -> str:
93
+ """Return a stable hash for embedding cache lookups.
82
94
 
95
+ Args:
96
+ text: The text to hash
97
+ dimension: Optional embedding dimension (included in hash for dimension-aware caching)
98
+ """
83
99
  clean_text = text or ""
84
- return hashlib.sha1(clean_text.encode("utf-8")).hexdigest()
100
+ # Include dimension in hash to prevent cross-dimension cache pollution
101
+ if dimension is not None:
102
+ base = f"{clean_text}|dim={dimension}"
103
+ else:
104
+ base = clean_text
105
+ return hashlib.sha1(base.encode("utf-8")).hexdigest()
106
+
107
+
108
+ def _clear_embedding_memory_cache() -> None:
109
+ if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0:
110
+ return
111
+ with _EMBED_MEMORY_LOCK:
112
+ _EMBED_MEMORY_CACHE.clear()
113
+
114
+
115
+ def _load_embedding_memory_cache(
116
+ model: str,
117
+ text_hashes: Sequence[str],
118
+ dimension: int | None = None,
119
+ ) -> dict[str, np.ndarray]:
120
+ if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0:
121
+ return {}
122
+ results: dict[str, np.ndarray] = {}
123
+ with _EMBED_MEMORY_LOCK:
124
+ for text_hash in text_hashes:
125
+ if not text_hash:
126
+ continue
127
+ # Include dimension in cache key to prevent cross-dimension pollution
128
+ key = (model, dimension, text_hash)
129
+ vector = _EMBED_MEMORY_CACHE.pop(key, None)
130
+ if vector is None:
131
+ continue
132
+ _EMBED_MEMORY_CACHE[key] = vector
133
+ results[text_hash] = vector
134
+ return results
135
+
136
+
137
+ def _store_embedding_memory_cache(
138
+ *,
139
+ model: str,
140
+ embeddings: Mapping[str, np.ndarray],
141
+ dimension: int | None = None,
142
+ ) -> None:
143
+ if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0 or not embeddings:
144
+ return
145
+ with _EMBED_MEMORY_LOCK:
146
+ for text_hash, vector in embeddings.items():
147
+ if not text_hash:
148
+ continue
149
+ array = np.asarray(vector, dtype=np.float32)
150
+ if array.size == 0:
151
+ continue
152
+ # Include dimension in cache key to prevent cross-dimension pollution
153
+ key = (model, dimension, text_hash)
154
+ if key in _EMBED_MEMORY_CACHE:
155
+ _EMBED_MEMORY_CACHE.pop(key, None)
156
+ _EMBED_MEMORY_CACHE[key] = array
157
+ while len(_EMBED_MEMORY_CACHE) > EMBED_MEMORY_CACHE_MAX_ENTRIES:
158
+ _EMBED_MEMORY_CACHE.popitem(last=False)
85
159
 
86
160
 
87
161
  def _serialize_extensions(extensions: Sequence[str] | None) -> str:
@@ -115,9 +189,32 @@ def _chunk_values(values: Sequence[object], size: int) -> Iterable[Sequence[obje
115
189
  yield values[idx : idx + size]
116
190
 
117
191
 
192
+ def _resolve_cache_dir() -> Path:
193
+ override = _CACHE_DIR_OVERRIDE.get()
194
+ return override if override is not None else CACHE_DIR
195
+
196
+
197
+ @contextmanager
198
+ def cache_dir_context(path: Path | str | None):
199
+ """Temporarily override the cache directory for the current context."""
200
+
201
+ if path is None:
202
+ yield
203
+ return
204
+ dir_path = Path(path).expanduser().resolve()
205
+ if dir_path.exists() and not dir_path.is_dir():
206
+ raise NotADirectoryError(f"Path is not a directory: {dir_path}")
207
+ token = _CACHE_DIR_OVERRIDE.set(dir_path)
208
+ try:
209
+ yield
210
+ finally:
211
+ _CACHE_DIR_OVERRIDE.reset(token)
212
+
213
+
118
214
  def ensure_cache_dir() -> Path:
119
- CACHE_DIR.mkdir(parents=True, exist_ok=True)
120
- return CACHE_DIR
215
+ cache_dir = _resolve_cache_dir()
216
+ cache_dir.mkdir(parents=True, exist_ok=True)
217
+ return cache_dir
121
218
 
122
219
 
123
220
  def set_cache_dir(path: Path | str | None) -> None:
@@ -134,8 +231,8 @@ def set_cache_dir(path: Path | str | None) -> None:
134
231
  def cache_db_path() -> Path:
135
232
  """Return the absolute path to the shared SQLite cache database."""
136
233
 
137
- ensure_cache_dir()
138
- return CACHE_DIR / DB_FILENAME
234
+ cache_dir = ensure_cache_dir()
235
+ return cache_dir / DB_FILENAME
139
236
 
140
237
 
141
238
  def cache_file(root: Path, model: str, include_hidden: bool) -> Path: # pragma: no cover - kept for API parity
@@ -1304,25 +1401,38 @@ def load_embedding_cache(
1304
1401
  model: str,
1305
1402
  text_hashes: Sequence[str],
1306
1403
  conn: sqlite3.Connection | None = None,
1404
+ *,
1405
+ dimension: int | None = None,
1307
1406
  ) -> dict[str, np.ndarray]:
1308
- """Load cached embeddings keyed by (model, text_hash)."""
1309
-
1407
+ """Load cached embeddings keyed by (model, text_hash).
1408
+
1409
+ Args:
1410
+ model: The embedding model name
1411
+ text_hashes: Sequence of text hashes to look up (should be generated with
1412
+ embedding_cache_key() using the same dimension parameter)
1413
+ conn: Optional database connection
1414
+ dimension: Embedding dimension (used for memory cache segmentation)
1415
+ """
1310
1416
  unique_hashes = list(dict.fromkeys([value for value in text_hashes if value]))
1311
1417
  if not unique_hashes:
1312
1418
  return {}
1419
+ results = _load_embedding_memory_cache(model, unique_hashes, dimension=dimension)
1420
+ missing = [value for value in unique_hashes if value not in results]
1421
+ if not missing:
1422
+ return results
1313
1423
  db_path = cache_db_path()
1314
1424
  owns_connection = conn is None
1315
1425
  try:
1316
1426
  connection = conn or _connect(db_path, readonly=True)
1317
1427
  except sqlite3.OperationalError:
1318
- return {}
1428
+ return results
1319
1429
  try:
1320
1430
  try:
1321
1431
  _ensure_schema_readonly(connection, tables=("embedding_cache",))
1322
1432
  except sqlite3.OperationalError:
1323
- return {}
1324
- results: dict[str, np.ndarray] = {}
1325
- for chunk in _chunk_values(unique_hashes, 900):
1433
+ return results
1434
+ disk_results: dict[str, np.ndarray] = {}
1435
+ for chunk in _chunk_values(missing, 900):
1326
1436
  placeholders = ", ".join("?" for _ in chunk)
1327
1437
  rows = connection.execute(
1328
1438
  f"""
@@ -1339,7 +1449,12 @@ def load_embedding_cache(
1339
1449
  vector = np.frombuffer(blob, dtype=np.float32)
1340
1450
  if vector.size == 0:
1341
1451
  continue
1342
- results[row["text_hash"]] = vector
1452
+ disk_results[row["text_hash"]] = vector
1453
+ if disk_results:
1454
+ _store_embedding_memory_cache(
1455
+ model=model, embeddings=disk_results, dimension=dimension
1456
+ )
1457
+ results.update(disk_results)
1343
1458
  return results
1344
1459
  finally:
1345
1460
  if owns_connection:
@@ -1351,11 +1466,20 @@ def store_embedding_cache(
1351
1466
  model: str,
1352
1467
  embeddings: Mapping[str, np.ndarray],
1353
1468
  conn: sqlite3.Connection | None = None,
1469
+ dimension: int | None = None,
1354
1470
  ) -> None:
1355
- """Store embedding vectors keyed by (model, text_hash)."""
1356
-
1471
+ """Store embedding vectors keyed by (model, text_hash).
1472
+
1473
+ Args:
1474
+ model: The embedding model name
1475
+ embeddings: Dict mapping text_hash -> vector (hashes should be generated with
1476
+ embedding_cache_key() using the same dimension parameter)
1477
+ conn: Optional database connection
1478
+ dimension: Embedding dimension (used for memory cache segmentation)
1479
+ """
1357
1480
  if not embeddings:
1358
1481
  return
1482
+ _store_embedding_memory_cache(model=model, embeddings=embeddings, dimension=dimension)
1359
1483
  db_path = cache_db_path()
1360
1484
  owns_connection = conn is None
1361
1485
  connection = conn or _connect(db_path)
vexor/cli.py CHANGED
@@ -31,14 +31,18 @@ from .config import (
31
31
  DEFAULT_MODEL,
32
32
  DEFAULT_PROVIDER,
33
33
  DEFAULT_RERANK,
34
+ DEFAULT_VOYAGE_MODEL,
35
+ DIMENSION_SUPPORTED_MODELS,
34
36
  SUPPORTED_EXTRACT_BACKENDS,
35
37
  SUPPORTED_PROVIDERS,
36
38
  SUPPORTED_RERANKERS,
37
39
  flashrank_cache_dir,
40
+ get_supported_dimensions,
38
41
  load_config,
39
42
  normalize_remote_rerank_url,
40
43
  resolve_remote_rerank_api_key,
41
44
  resolve_default_model,
45
+ supports_dimensions,
42
46
  )
43
47
  from .modes import available_modes, get_strategy
44
48
  from .services.cache_service import is_cache_current, load_index_metadata_safe
@@ -454,6 +458,7 @@ def search(
454
458
  rerank=rerank,
455
459
  flashrank_model=flashrank_model,
456
460
  remote_rerank=remote_rerank,
461
+ embedding_dimensions=config.embedding_dimensions,
457
462
  )
458
463
  if output_format == SearchOutputFormat.rich:
459
464
  if no_cache:
@@ -488,7 +493,7 @@ def search(
488
493
  else:
489
494
  typer.echo(message, err=True)
490
495
  raise typer.Exit(code=1)
491
- except RuntimeError as exc:
496
+ except (RuntimeError, ValueError) as exc:
492
497
  if output_format == SearchOutputFormat.rich:
493
498
  console.print(_styled(str(exc), Styles.ERROR))
494
499
  else:
@@ -688,8 +693,9 @@ def index(
688
693
  local_cuda=bool(config.local_cuda),
689
694
  exclude_patterns=normalized_excludes,
690
695
  extensions=normalized_exts,
696
+ embedding_dimensions=config.embedding_dimensions,
691
697
  )
692
- except RuntimeError as exc:
698
+ except (RuntimeError, ValueError) as exc:
693
699
  console.print(_styled(str(exc), Styles.ERROR))
694
700
  raise typer.Exit(code=1)
695
701
  if result.status == IndexStatus.EMPTY:
@@ -768,6 +774,16 @@ def config(
768
774
  "--clear-base-url",
769
775
  help=Messages.HELP_CLEAR_BASE_URL,
770
776
  ),
777
+ set_embedding_dimensions_option: int | None = typer.Option(
778
+ None,
779
+ "--set-embedding-dimensions",
780
+ help=Messages.HELP_SET_EMBEDDING_DIMENSIONS,
781
+ ),
782
+ clear_embedding_dimensions: bool = typer.Option(
783
+ False,
784
+ "--clear-embedding-dimensions",
785
+ help=Messages.HELP_CLEAR_EMBEDDING_DIMENSIONS,
786
+ ),
771
787
  set_auto_index_option: str | None = typer.Option(
772
788
  None,
773
789
  "--set-auto-index",
@@ -989,6 +1005,33 @@ def config(
989
1005
  except ValueError as exc:
990
1006
  raise typer.BadParameter(str(exc)) from exc
991
1007
 
1008
+ effective_embedding_dimensions = set_embedding_dimensions_option
1009
+ effective_clear_embedding_dimensions = clear_embedding_dimensions
1010
+ if effective_embedding_dimensions == 0:
1011
+ effective_embedding_dimensions = None
1012
+ effective_clear_embedding_dimensions = True
1013
+
1014
+ # Validate embedding dimensions if set
1015
+ if effective_embedding_dimensions is not None:
1016
+ if effective_embedding_dimensions < 0:
1017
+ raise typer.BadParameter(
1018
+ f"--set-embedding-dimensions must be non-negative, got {effective_embedding_dimensions}"
1019
+ )
1020
+ if effective_embedding_dimensions > 0:
1021
+ # Resolve effective model from provider + model to account for provider defaults
1022
+ effective_model = resolve_default_model(pending_provider, pending_model)
1023
+ if not supports_dimensions(effective_model):
1024
+ raise typer.BadParameter(
1025
+ f"Model '{effective_model}' does not support custom dimensions. "
1026
+ f"Supported model names/prefixes: {', '.join(DIMENSION_SUPPORTED_MODELS.keys())}"
1027
+ )
1028
+ supported = get_supported_dimensions(effective_model)
1029
+ if supported and effective_embedding_dimensions not in supported:
1030
+ raise typer.BadParameter(
1031
+ f"Dimension {effective_embedding_dimensions} is not supported for model '{effective_model}'. "
1032
+ f"Supported dimensions: {supported}"
1033
+ )
1034
+
992
1035
  updates = apply_config_updates(
993
1036
  api_key=set_api_key_option,
994
1037
  clear_api_key=clear_api_key,
@@ -1007,6 +1050,8 @@ def config(
1007
1050
  remote_rerank_model=set_remote_rerank_model_option,
1008
1051
  remote_rerank_api_key=set_remote_rerank_api_key_option,
1009
1052
  clear_remote_rerank=clear_remote_rerank,
1053
+ embedding_dimensions=effective_embedding_dimensions,
1054
+ clear_embedding_dimensions=effective_clear_embedding_dimensions,
1010
1055
  )
1011
1056
 
1012
1057
  if updates.api_key_set:
@@ -1109,6 +1154,17 @@ def config(
1109
1154
  console.print(_styled(Messages.INFO_REMOTE_RERANK_API_KEY_SET, Styles.SUCCESS))
1110
1155
  if updates.remote_rerank_cleared and clear_remote_rerank:
1111
1156
  console.print(_styled(Messages.INFO_REMOTE_RERANK_CLEARED, Styles.SUCCESS))
1157
+ if updates.embedding_dimensions_set and effective_embedding_dimensions is not None:
1158
+ console.print(
1159
+ _styled(
1160
+ Messages.INFO_EMBEDDING_DIMENSIONS_SET.format(
1161
+ value=effective_embedding_dimensions
1162
+ ),
1163
+ Styles.SUCCESS,
1164
+ )
1165
+ )
1166
+ if updates.embedding_dimensions_cleared:
1167
+ console.print(_styled(Messages.INFO_EMBEDDING_DIMENSIONS_CLEARED, Styles.SUCCESS))
1112
1168
 
1113
1169
  if clear_flashrank:
1114
1170
  cache_dir = flashrank_cache_dir(create=False)
@@ -1188,6 +1244,7 @@ def config(
1188
1244
  api="yes" if cfg.api_key else "no",
1189
1245
  provider=provider,
1190
1246
  model=resolve_default_model(provider, cfg.model),
1247
+ embedding_dimensions=cfg.embedding_dimensions if cfg.embedding_dimensions else "default",
1191
1248
  batch=cfg.batch_size if cfg.batch_size is not None else DEFAULT_BATCH_SIZE,
1192
1249
  concurrency=cfg.embed_concurrency,
1193
1250
  extract_concurrency=cfg.extract_concurrency,