vexor 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vexor/cache.py CHANGED
@@ -5,9 +5,13 @@ from __future__ import annotations
5
5
  import hashlib
6
6
  import os
7
7
  import sqlite3
8
+ from collections import OrderedDict
8
9
  from dataclasses import dataclass
10
+ from contextlib import contextmanager
11
+ from contextvars import ContextVar
9
12
  from datetime import datetime, timezone, timedelta
10
13
  from pathlib import Path
14
+ from threading import Lock
11
15
  from typing import Iterable, Mapping, Sequence
12
16
 
13
17
  import numpy as np
@@ -16,10 +20,18 @@ from .utils import collect_files
16
20
 
17
21
  DEFAULT_CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
18
22
  CACHE_DIR = DEFAULT_CACHE_DIR
23
+ _CACHE_DIR_OVERRIDE: ContextVar[Path | None] = ContextVar(
24
+ "vexor_cache_dir_override",
25
+ default=None,
26
+ )
19
27
  CACHE_VERSION = 6
20
28
  DB_FILENAME = "index.db"
21
29
  EMBED_CACHE_TTL_DAYS = 30
22
30
  EMBED_CACHE_MAX_ENTRIES = 50_000
31
+ EMBED_MEMORY_CACHE_MAX_ENTRIES = 2_048
32
+
33
+ _EMBED_MEMORY_CACHE: "OrderedDict[tuple[str, str], np.ndarray]" = OrderedDict()
34
+ _EMBED_MEMORY_LOCK = Lock()
23
35
 
24
36
 
25
37
  @dataclass(slots=True)
@@ -84,6 +96,55 @@ def embedding_cache_key(text: str) -> str:
84
96
  return hashlib.sha1(clean_text.encode("utf-8")).hexdigest()
85
97
 
86
98
 
99
+ def _clear_embedding_memory_cache() -> None:
100
+ if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0:
101
+ return
102
+ with _EMBED_MEMORY_LOCK:
103
+ _EMBED_MEMORY_CACHE.clear()
104
+
105
+
106
+ def _load_embedding_memory_cache(
107
+ model: str,
108
+ text_hashes: Sequence[str],
109
+ ) -> dict[str, np.ndarray]:
110
+ if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0:
111
+ return {}
112
+ results: dict[str, np.ndarray] = {}
113
+ with _EMBED_MEMORY_LOCK:
114
+ for text_hash in text_hashes:
115
+ if not text_hash:
116
+ continue
117
+ key = (model, text_hash)
118
+ vector = _EMBED_MEMORY_CACHE.pop(key, None)
119
+ if vector is None:
120
+ continue
121
+ _EMBED_MEMORY_CACHE[key] = vector
122
+ results[text_hash] = vector
123
+ return results
124
+
125
+
126
+ def _store_embedding_memory_cache(
127
+ *,
128
+ model: str,
129
+ embeddings: Mapping[str, np.ndarray],
130
+ ) -> None:
131
+ if EMBED_MEMORY_CACHE_MAX_ENTRIES <= 0 or not embeddings:
132
+ return
133
+ with _EMBED_MEMORY_LOCK:
134
+ for text_hash, vector in embeddings.items():
135
+ if not text_hash:
136
+ continue
137
+ array = np.asarray(vector, dtype=np.float32)
138
+ if array.size == 0:
139
+ continue
140
+ key = (model, text_hash)
141
+ if key in _EMBED_MEMORY_CACHE:
142
+ _EMBED_MEMORY_CACHE.pop(key, None)
143
+ _EMBED_MEMORY_CACHE[key] = array
144
+ while len(_EMBED_MEMORY_CACHE) > EMBED_MEMORY_CACHE_MAX_ENTRIES:
145
+ _EMBED_MEMORY_CACHE.popitem(last=False)
146
+
147
+
87
148
  def _serialize_extensions(extensions: Sequence[str] | None) -> str:
88
149
  if not extensions:
89
150
  return ""
@@ -115,9 +176,32 @@ def _chunk_values(values: Sequence[object], size: int) -> Iterable[Sequence[obje
115
176
  yield values[idx : idx + size]
116
177
 
117
178
 
179
+ def _resolve_cache_dir() -> Path:
180
+ override = _CACHE_DIR_OVERRIDE.get()
181
+ return override if override is not None else CACHE_DIR
182
+
183
+
184
+ @contextmanager
185
+ def cache_dir_context(path: Path | str | None):
186
+ """Temporarily override the cache directory for the current context."""
187
+
188
+ if path is None:
189
+ yield
190
+ return
191
+ dir_path = Path(path).expanduser().resolve()
192
+ if dir_path.exists() and not dir_path.is_dir():
193
+ raise NotADirectoryError(f"Path is not a directory: {dir_path}")
194
+ token = _CACHE_DIR_OVERRIDE.set(dir_path)
195
+ try:
196
+ yield
197
+ finally:
198
+ _CACHE_DIR_OVERRIDE.reset(token)
199
+
200
+
118
201
  def ensure_cache_dir() -> Path:
119
- CACHE_DIR.mkdir(parents=True, exist_ok=True)
120
- return CACHE_DIR
202
+ cache_dir = _resolve_cache_dir()
203
+ cache_dir.mkdir(parents=True, exist_ok=True)
204
+ return cache_dir
121
205
 
122
206
 
123
207
  def set_cache_dir(path: Path | str | None) -> None:
@@ -134,8 +218,8 @@ def set_cache_dir(path: Path | str | None) -> None:
134
218
  def cache_db_path() -> Path:
135
219
  """Return the absolute path to the shared SQLite cache database."""
136
220
 
137
- ensure_cache_dir()
138
- return CACHE_DIR / DB_FILENAME
221
+ cache_dir = ensure_cache_dir()
222
+ return cache_dir / DB_FILENAME
139
223
 
140
224
 
141
225
  def cache_file(root: Path, model: str, include_hidden: bool) -> Path: # pragma: no cover - kept for API parity
@@ -1310,19 +1394,23 @@ def load_embedding_cache(
1310
1394
  unique_hashes = list(dict.fromkeys([value for value in text_hashes if value]))
1311
1395
  if not unique_hashes:
1312
1396
  return {}
1397
+ results = _load_embedding_memory_cache(model, unique_hashes)
1398
+ missing = [value for value in unique_hashes if value not in results]
1399
+ if not missing:
1400
+ return results
1313
1401
  db_path = cache_db_path()
1314
1402
  owns_connection = conn is None
1315
1403
  try:
1316
1404
  connection = conn or _connect(db_path, readonly=True)
1317
1405
  except sqlite3.OperationalError:
1318
- return {}
1406
+ return results
1319
1407
  try:
1320
1408
  try:
1321
1409
  _ensure_schema_readonly(connection, tables=("embedding_cache",))
1322
1410
  except sqlite3.OperationalError:
1323
- return {}
1324
- results: dict[str, np.ndarray] = {}
1325
- for chunk in _chunk_values(unique_hashes, 900):
1411
+ return results
1412
+ disk_results: dict[str, np.ndarray] = {}
1413
+ for chunk in _chunk_values(missing, 900):
1326
1414
  placeholders = ", ".join("?" for _ in chunk)
1327
1415
  rows = connection.execute(
1328
1416
  f"""
@@ -1339,7 +1427,10 @@ def load_embedding_cache(
1339
1427
  vector = np.frombuffer(blob, dtype=np.float32)
1340
1428
  if vector.size == 0:
1341
1429
  continue
1342
- results[row["text_hash"]] = vector
1430
+ disk_results[row["text_hash"]] = vector
1431
+ if disk_results:
1432
+ _store_embedding_memory_cache(model=model, embeddings=disk_results)
1433
+ results.update(disk_results)
1343
1434
  return results
1344
1435
  finally:
1345
1436
  if owns_connection:
@@ -1356,6 +1447,7 @@ def store_embedding_cache(
1356
1447
 
1357
1448
  if not embeddings:
1358
1449
  return
1450
+ _store_embedding_memory_cache(model=model, embeddings=embeddings)
1359
1451
  db_path = cache_db_path()
1360
1452
  owns_connection = conn is None
1361
1453
  connection = conn or _connect(db_path)
vexor/config.py CHANGED
@@ -5,6 +5,8 @@ from __future__ import annotations
5
5
  import json
6
6
  import os
7
7
  from dataclasses import dataclass
8
+ from contextlib import contextmanager
9
+ from contextvars import ContextVar
8
10
  from collections.abc import Mapping
9
11
  from pathlib import Path
10
12
  from typing import Any, Dict
@@ -15,6 +17,10 @@ from .text import Messages
15
17
  DEFAULT_CONFIG_DIR = Path(os.path.expanduser("~")) / ".vexor"
16
18
  CONFIG_DIR = DEFAULT_CONFIG_DIR
17
19
  CONFIG_FILE = CONFIG_DIR / "config.json"
20
+ _CONFIG_DIR_OVERRIDE: ContextVar[Path | None] = ContextVar(
21
+ "vexor_config_dir_override",
22
+ default=None,
23
+ )
18
24
  DEFAULT_MODEL = "text-embedding-3-small"
19
25
  DEFAULT_GEMINI_MODEL = "gemini-embedding-001"
20
26
  DEFAULT_LOCAL_MODEL = "intfloat/multilingual-e5-small"
@@ -74,10 +80,40 @@ def _parse_remote_rerank(raw: object) -> RemoteRerankConfig | None:
74
80
  )
75
81
 
76
82
 
83
+ def _resolve_config_dir() -> Path:
84
+ override = _CONFIG_DIR_OVERRIDE.get()
85
+ return override if override is not None else CONFIG_DIR
86
+
87
+
88
+ def _resolve_config_file() -> Path:
89
+ override = _CONFIG_DIR_OVERRIDE.get()
90
+ if override is not None:
91
+ return override / "config.json"
92
+ return CONFIG_FILE
93
+
94
+
95
+ @contextmanager
96
+ def config_dir_context(path: Path | str | None):
97
+ """Temporarily override the config directory for the current context."""
98
+
99
+ if path is None:
100
+ yield
101
+ return
102
+ dir_path = Path(path).expanduser().resolve()
103
+ if dir_path.exists() and not dir_path.is_dir():
104
+ raise NotADirectoryError(f"Path is not a directory: {dir_path}")
105
+ token = _CONFIG_DIR_OVERRIDE.set(dir_path)
106
+ try:
107
+ yield
108
+ finally:
109
+ _CONFIG_DIR_OVERRIDE.reset(token)
110
+
111
+
77
112
  def load_config() -> Config:
78
- if not CONFIG_FILE.exists():
113
+ config_file = _resolve_config_file()
114
+ if not config_file.exists():
79
115
  return Config()
80
- raw = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
116
+ raw = json.loads(config_file.read_text(encoding="utf-8"))
81
117
  rerank = (raw.get("rerank") or DEFAULT_RERANK).strip().lower()
82
118
  if rerank not in SUPPORTED_RERANKERS:
83
119
  rerank = DEFAULT_RERANK
@@ -101,7 +137,8 @@ def load_config() -> Config:
101
137
 
102
138
 
103
139
  def save_config(config: Config) -> None:
104
- CONFIG_DIR.mkdir(parents=True, exist_ok=True)
140
+ config_dir = _resolve_config_dir()
141
+ config_dir.mkdir(parents=True, exist_ok=True)
105
142
  data: Dict[str, Any] = {}
106
143
  if config.api_key:
107
144
  data["api_key"] = config.api_key
@@ -130,15 +167,19 @@ def save_config(config: Config) -> None:
130
167
  remote_data["model"] = config.remote_rerank.model
131
168
  if remote_data:
132
169
  data["remote_rerank"] = remote_data
133
- CONFIG_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
170
+ config_file = _resolve_config_file()
171
+ config_file.write_text(
172
+ json.dumps(data, ensure_ascii=False, indent=2),
173
+ encoding="utf-8",
174
+ )
134
175
 
135
176
 
136
177
  def local_model_dir() -> Path:
137
- return CONFIG_DIR / "models"
178
+ return _resolve_config_dir() / "models"
138
179
 
139
180
 
140
181
  def flashrank_cache_dir(*, create: bool = True) -> Path:
141
- cache_dir = CONFIG_DIR / "flashrank"
182
+ cache_dir = _resolve_config_dir() / "flashrank"
142
183
  if create:
143
184
  cache_dir.mkdir(parents=True, exist_ok=True)
144
185
  return cache_dir
@@ -108,6 +108,12 @@ TEXT_EXTENSIONS = (
108
108
  ".vb",
109
109
  ".ps1",
110
110
  ".bash",
111
+ ".zsh",
112
+ ".fish",
113
+ ".vue",
114
+ ".jsx",
115
+ ".tsx",
116
+ ".scss",
111
117
  )
112
118
 
113
119
 
@@ -689,35 +689,22 @@ def perform_search(request: SearchRequest) -> SearchResponse:
689
689
  )
690
690
 
691
691
 
692
- def _perform_search_with_temporary_index(request: SearchRequest) -> SearchResponse:
693
- from .index_service import build_index_in_memory # local import
694
-
695
- paths, file_vectors, metadata = build_index_in_memory(
696
- request.directory,
697
- include_hidden=request.include_hidden,
698
- respect_gitignore=request.respect_gitignore,
699
- mode=request.mode,
700
- recursive=request.recursive,
701
- model_name=request.model_name,
702
- batch_size=request.batch_size,
703
- embed_concurrency=request.embed_concurrency,
704
- extract_concurrency=request.extract_concurrency,
705
- extract_backend=request.extract_backend,
706
- provider=request.provider,
707
- base_url=request.base_url,
708
- api_key=request.api_key,
709
- local_cuda=request.local_cuda,
710
- exclude_patterns=request.exclude_patterns,
711
- extensions=request.extensions,
712
- no_cache=request.no_cache,
713
- )
692
+ def search_from_vectors(
693
+ request: SearchRequest,
694
+ *,
695
+ paths: Sequence[Path],
696
+ file_vectors: np.ndarray,
697
+ metadata: dict,
698
+ is_stale: bool = False,
699
+ ) -> SearchResponse:
700
+ """Return ranked results from an in-memory index."""
714
701
 
715
702
  if not len(paths):
716
703
  return SearchResponse(
717
704
  base_path=request.directory,
718
705
  backend=None,
719
706
  results=[],
720
- is_stale=False,
707
+ is_stale=is_stale,
721
708
  index_empty=True,
722
709
  )
723
710
 
@@ -813,12 +800,43 @@ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchRespon
813
800
  base_path=request.directory,
814
801
  backend=searcher.device,
815
802
  results=results,
816
- is_stale=False,
803
+ is_stale=is_stale,
817
804
  index_empty=False,
818
805
  reranker=reranker,
819
806
  )
820
807
 
821
808
 
809
+ def _perform_search_with_temporary_index(request: SearchRequest) -> SearchResponse:
810
+ from .index_service import build_index_in_memory # local import
811
+
812
+ paths, file_vectors, metadata = build_index_in_memory(
813
+ request.directory,
814
+ include_hidden=request.include_hidden,
815
+ respect_gitignore=request.respect_gitignore,
816
+ mode=request.mode,
817
+ recursive=request.recursive,
818
+ model_name=request.model_name,
819
+ batch_size=request.batch_size,
820
+ embed_concurrency=request.embed_concurrency,
821
+ extract_concurrency=request.extract_concurrency,
822
+ extract_backend=request.extract_backend,
823
+ provider=request.provider,
824
+ base_url=request.base_url,
825
+ api_key=request.api_key,
826
+ local_cuda=request.local_cuda,
827
+ exclude_patterns=request.exclude_patterns,
828
+ extensions=request.extensions,
829
+ no_cache=request.no_cache,
830
+ )
831
+ return search_from_vectors(
832
+ request,
833
+ paths=paths,
834
+ file_vectors=file_vectors,
835
+ metadata=metadata,
836
+ is_stale=False,
837
+ )
838
+
839
+
822
840
  def _load_index_vectors_for_request(
823
841
  request: SearchRequest,
824
842
  *,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vexor
3
- Version: 0.21.1
3
+ Version: 0.22.0
4
4
  Summary: A vector-powered CLI for semantic search over files.
5
5
  Project-URL: Repository, https://github.com/scarletkc/vexor
6
6
  Author: scarletkc
@@ -76,6 +76,13 @@ It supports configurable embedding and reranking providers, and exposes the same
76
76
  Vexor Demo Video
77
77
  </video>
78
78
 
79
+ ## Featured In
80
+
81
+ Vexor has been recognized and featured by the community:
82
+
83
+ - **[Ruan Yifeng's Weekly (Issue #379)](https://github.com/ruanyf/weekly/blob/master/docs/issue-379.md#ai-%E7%9B%B8%E5%85%B3)** - A leading tech newsletter in the Chinese developer community.
84
+ - **[Awesome Claude Skills](https://github.com/VoltAgent/awesome-claude-skills?tab=readme-ov-file#development-and-testing)** - Curated list of best-in-class skills for AI agents.
85
+
79
86
  ## Why Vexor?
80
87
 
81
88
  When you remember what a file *does* but forget its name or location, Vexor finds it instantly—no grep patterns or directory traversal needed.
@@ -315,8 +322,14 @@ Porcelain output fields: `rank`, `similarity`, `path`, `chunk_index`, `start_lin
315
322
 
316
323
  See [docs](https://github.com/scarletkc/vexor/tree/main/docs) for more details.
317
324
 
325
+ ## Contributing
326
+
318
327
  Contributions, issues, and PRs welcome! Star if you find it helpful.
319
328
 
329
+ ## Star History
330
+
331
+ [![Star History Chart](https://api.star-history.com/svg?repos=scarletkc/vexor&type=date&legend=top-left)](https://www.star-history.com/#scarletkc/vexor&type=date&legend=top-left)
332
+
320
333
  ## License
321
334
 
322
335
  [MIT](http://github.com/scarletkc/vexor/blob/main/LICENSE)
@@ -1,9 +1,9 @@
1
- vexor/__init__.py,sha256=Ab63nROf2nbDW-xY4wuNU_DS0K8hsqfPa1KjvCaKJzA,441
1
+ vexor/__init__.py,sha256=EQXPbwsXfHJAK3mNlhYfc4UwVqOMqL52cO46xg1GNCo,632
2
2
  vexor/__main__.py,sha256=ZFzom1wCfP6TPXe3aoDFpNcUgjbCZ7Quy_vfzNsH5Fw,426
3
- vexor/api.py,sha256=YCHpiydbPbRJUqdQYrpwe1JrRI-w_7LRuyZDGBP1_d4,11506
4
- vexor/cache.py,sha256=20SaiBKkPJIDXHtflX6uHiQXI4DtD6wx7RtWbz2l6LU,54339
3
+ vexor/api.py,sha256=W6eJLNbg5uBC2gcE8kq34iUciJCYZjY9Zsytxunl-vs,35860
4
+ vexor/cache.py,sha256=irCGy5XIcRKX5EFk7plKDVqGHrTjRpaWgXnFDiVChXk,57323
5
5
  vexor/cli.py,sha256=M9GKdD_mJ068Zpm62znTp0KhhKp1dkh_WHmfJHR9hwU,68094
6
- vexor/config.py,sha256=CiPfEH7Ilt6XepEx4p02qfW5HfkpNDBjhEMyckbSWaA,17413
6
+ vexor/config.py,sha256=CEL5u7afZV81Y0i9FVsj8GVzZlb2C2gC17r5lNKu-aM,18570
7
7
  vexor/modes.py,sha256=N_wAWoqbxmCfko-v520p59tpAYvUwraCSSQRtMaF4ac,11549
8
8
  vexor/output.py,sha256=iooZgLlK8dh7ajJ4XMHUNNx0qyTVtD_OAAwrBx5MeqE,864
9
9
  vexor/search.py,sha256=MSU4RmH6waFYOofkIdo8_ElTiz1oNaKuvr-3umif7Bs,6826
@@ -16,18 +16,18 @@ vexor/providers/openai.py,sha256=YnJDY9gJW7RfGGdkgswVHvmOKNvgLRQUsbpA1MUuLPg,535
16
16
  vexor/services/__init__.py,sha256=dA_i2N03vlYmbZbEK2knzJLWviunkNWbzN2LWPNvMk0,160
17
17
  vexor/services/cache_service.py,sha256=ywt6AgupCJ7_wC3je4znCMw5_VBouw3skbDTAt8xw6o,1639
18
18
  vexor/services/config_service.py,sha256=PojolfbSKh9pW8slF4qxCOs9hz5L6xvjf_nB7vfVlsU,5039
19
- vexor/services/content_extract_service.py,sha256=zdhLxpNv70BU7irLf3Uc0ou9rKSvdjtrDcHkgRKlMn4,26421
19
+ vexor/services/content_extract_service.py,sha256=oO7Hbadwp3uiyqCbr_4MRXQsUeMix2D98i-Yp94PwFk,26495
20
20
  vexor/services/index_service.py,sha256=FXf1bBoqj4-K1l38ItxHf6Oh7QHVIdNAdVY2kg_Zoq8,32265
21
21
  vexor/services/init_service.py,sha256=3D04hylGA9FRQhLHCfR95nMko3vb5MNBcRb9nWWaUE8,26863
22
22
  vexor/services/js_parser.py,sha256=eRtW6KlK4JBYDGbyoecHVqLZ0hcx-Cc0kx6bOujHPAQ,16254
23
23
  vexor/services/keyword_service.py,sha256=vmke8tII9kTwRDdBaLHBc6Hpy_B3p98L65iGkCQgtMU,2211
24
- vexor/services/search_service.py,sha256=K7SiAuMA7bGeyPWOHPMKpFFvzzkj5kHWwa3p94NakJs,38663
24
+ vexor/services/search_service.py,sha256=-7qHfebMOmXWiVBVYoBji6eaZr8OOY3q1vbtJaY3I6E,39076
25
25
  vexor/services/skill_service.py,sha256=Rrgt3OMsKPPiXOiRhSNAWjBM9UNz9qmSWQe3uYGzq4M,4863
26
26
  vexor/services/system_service.py,sha256=KPlv83v3rTvBiNiH7vrp6tDmt_AqHxuUd-5RI0TfvWs,24638
27
27
  vexor/_bundled_skills/vexor-cli/SKILL.md,sha256=m3FlyqgHBdRwyGPEp8PrUS21K0G2jEl88tRvhSPta08,2798
28
28
  vexor/_bundled_skills/vexor-cli/references/install-vexor.md,sha256=IUBShLI1mAxugwUIMAJQ5_j6KcaPWfobe0gSd6MWU7w,1245
29
- vexor-0.21.1.dist-info/METADATA,sha256=jS_xdqPXD8WsDNKd684w5eHmj_f1CHvNMR-DY-MvBQg,13494
30
- vexor-0.21.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
31
- vexor-0.21.1.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
32
- vexor-0.21.1.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
33
- vexor-0.21.1.dist-info/RECORD,,
29
+ vexor-0.22.0.dist-info/METADATA,sha256=UAqD6ciQCaP3eBrOGA3unO_XLX0eArcYHfcXuRhjN8c,14154
30
+ vexor-0.22.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
31
+ vexor-0.22.0.dist-info/entry_points.txt,sha256=dvxp6Q1R1d6bozR7TwmpdJ0X_v83MkzsLPagGY_lfr0,40
32
+ vexor-0.22.0.dist-info/licenses/LICENSE,sha256=wP7TAKRll1t9LoYGxWS9NikPM_0hCc00LmlLyvQBsL8,1066
33
+ vexor-0.22.0.dist-info/RECORD,,
File without changes