vexor 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vexor/__init__.py CHANGED
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  __all__ = ["__version__", "get_version"]
6
6
 
7
- __version__ = "0.2.0"
7
+ __version__ = "0.5.0"
8
8
 
9
9
 
10
10
  def get_version() -> str:
vexor/cache.py CHANGED
@@ -7,20 +7,20 @@ import os
7
7
  import sqlite3
8
8
  from datetime import datetime, timezone
9
9
  from pathlib import Path
10
- from typing import Sequence
10
+ from typing import Mapping, Sequence
11
11
 
12
12
  import numpy as np
13
13
 
14
14
  from .utils import collect_files
15
15
 
16
16
  CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
17
- CACHE_VERSION = 1
17
+ CACHE_VERSION = 3
18
18
  DB_FILENAME = "index.db"
19
19
 
20
20
 
21
- def _cache_key(root: Path, include_hidden: bool) -> str:
21
+ def _cache_key(root: Path, include_hidden: bool, recursive: bool, mode: str) -> str:
22
22
  digest = hashlib.sha1(
23
- f"{root.resolve()}|hidden={include_hidden}".encode("utf-8")
23
+ f"{root.resolve()}|hidden={include_hidden}|recursive={recursive}|mode={mode}".encode("utf-8")
24
24
  ).hexdigest()
25
25
  return digest
26
26
 
@@ -30,12 +30,18 @@ def ensure_cache_dir() -> Path:
30
30
  return CACHE_DIR
31
31
 
32
32
 
33
- def cache_file(root: Path, model: str, include_hidden: bool) -> Path: # pragma: no cover - kept for API parity
34
- """Return the on-disk cache artifact path (single SQLite DB)."""
33
+ def cache_db_path() -> Path:
34
+ """Return the absolute path to the shared SQLite cache database."""
35
+
35
36
  ensure_cache_dir()
36
37
  return CACHE_DIR / DB_FILENAME
37
38
 
38
39
 
40
+ def cache_file(root: Path, model: str, include_hidden: bool) -> Path: # pragma: no cover - kept for API parity
41
+ """Return the on-disk cache artifact path (single SQLite DB)."""
42
+ return cache_db_path()
43
+
44
+
39
45
  def _connect(db_path: Path) -> sqlite3.Connection:
40
46
  conn = sqlite3.connect(db_path)
41
47
  conn.row_factory = sqlite3.Row
@@ -52,6 +58,8 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
52
58
  root_path TEXT NOT NULL,
53
59
  model TEXT NOT NULL,
54
60
  include_hidden INTEGER NOT NULL,
61
+ recursive INTEGER NOT NULL DEFAULT 1,
62
+ mode TEXT NOT NULL,
55
63
  dimension INTEGER NOT NULL,
56
64
  version INTEGER NOT NULL,
57
65
  generated_at TEXT NOT NULL,
@@ -66,6 +74,7 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
66
74
  size_bytes INTEGER NOT NULL,
67
75
  mtime REAL NOT NULL,
68
76
  position INTEGER NOT NULL,
77
+ preview TEXT DEFAULT '',
69
78
  UNIQUE(index_id, rel_path)
70
79
  );
71
80
 
@@ -78,6 +87,25 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
78
87
  ON indexed_file(index_id, position);
79
88
  """
80
89
  )
90
+ try:
91
+ conn.execute(
92
+ "ALTER TABLE index_metadata ADD COLUMN recursive INTEGER NOT NULL DEFAULT 1"
93
+ )
94
+ except sqlite3.OperationalError:
95
+ # Column already exists; ignore error.
96
+ pass
97
+ try:
98
+ conn.execute(
99
+ "ALTER TABLE index_metadata ADD COLUMN mode TEXT NOT NULL DEFAULT 'name'"
100
+ )
101
+ except sqlite3.OperationalError:
102
+ pass
103
+ try:
104
+ conn.execute(
105
+ "ALTER TABLE indexed_file ADD COLUMN preview TEXT DEFAULT ''"
106
+ )
107
+ except sqlite3.OperationalError:
108
+ pass
81
109
 
82
110
 
83
111
  def store_index(
@@ -85,19 +113,24 @@ def store_index(
85
113
  root: Path,
86
114
  model: str,
87
115
  include_hidden: bool,
116
+ mode: str,
117
+ recursive: bool,
88
118
  files: Sequence[Path],
119
+ previews: Sequence[str],
89
120
  embeddings: np.ndarray,
90
121
  ) -> Path:
91
122
  db_path = cache_file(root, model, include_hidden)
92
123
  conn = _connect(db_path)
93
124
  try:
94
125
  _ensure_schema(conn)
95
- key = _cache_key(root, include_hidden)
126
+ key = _cache_key(root, include_hidden, recursive, mode)
96
127
  generated_at = datetime.now(timezone.utc).isoformat()
97
128
  dimension = int(embeddings.shape[1] if embeddings.size else 0)
98
129
  include_flag = 1 if include_hidden else 0
130
+ recursive_flag = 1 if recursive else 0
99
131
 
100
132
  with conn:
133
+ conn.execute("BEGIN IMMEDIATE;")
101
134
  conn.execute(
102
135
  "DELETE FROM index_metadata WHERE cache_key = ? AND model = ?",
103
136
  (key, model),
@@ -109,12 +142,24 @@ def store_index(
109
142
  root_path,
110
143
  model,
111
144
  include_hidden,
145
+ recursive,
146
+ mode,
112
147
  dimension,
113
148
  version,
114
149
  generated_at
115
- ) VALUES (?, ?, ?, ?, ?, ?, ?)
150
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
116
151
  """,
117
- (key, str(root), model, include_flag, dimension, CACHE_VERSION, generated_at),
152
+ (
153
+ key,
154
+ str(root),
155
+ model,
156
+ include_flag,
157
+ recursive_flag,
158
+ mode,
159
+ dimension,
160
+ CACHE_VERSION,
161
+ generated_at,
162
+ ),
118
163
  )
119
164
  index_id = cursor.lastrowid
120
165
 
@@ -132,8 +177,9 @@ def store_index(
132
177
  abs_path,
133
178
  size_bytes,
134
179
  mtime,
135
- position
136
- ) VALUES (?, ?, ?, ?, ?, ?)
180
+ position,
181
+ preview
182
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
137
183
  """,
138
184
  (
139
185
  index_id,
@@ -142,6 +188,7 @@ def store_index(
142
188
  stat.st_size,
143
189
  stat.st_mtime,
144
190
  position,
191
+ previews[position] if position < len(previews) else "",
145
192
  ),
146
193
  )
147
194
  vector_blob = embeddings[position].astype(np.float32).tobytes()
@@ -155,7 +202,21 @@ def store_index(
155
202
  conn.close()
156
203
 
157
204
 
158
- def load_index(root: Path, model: str, include_hidden: bool) -> dict:
205
+ def apply_index_updates(
206
+ *,
207
+ root: Path,
208
+ model: str,
209
+ include_hidden: bool,
210
+ mode: str,
211
+ recursive: bool,
212
+ current_files: Sequence[Path],
213
+ changed_files: Sequence[Path],
214
+ removed_rel_paths: Sequence[str],
215
+ embeddings: Mapping[str, np.ndarray],
216
+ previews: Mapping[str, str],
217
+ ) -> Path:
218
+ """Apply incremental updates to an existing cached index."""
219
+
159
220
  db_path = cache_file(root, model, include_hidden)
160
221
  if not db_path.exists():
161
222
  raise FileNotFoundError(db_path)
@@ -163,22 +224,142 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
163
224
  conn = _connect(db_path)
164
225
  try:
165
226
  _ensure_schema(conn)
166
- key = _cache_key(root, include_hidden)
227
+ key = _cache_key(root, include_hidden, recursive, mode)
167
228
  include_flag = 1 if include_hidden else 0
229
+ recursive_flag = 1 if recursive else 0
230
+
231
+ with conn:
232
+ conn.execute("BEGIN IMMEDIATE;")
233
+ meta = conn.execute(
234
+ """
235
+ SELECT id, dimension
236
+ FROM index_metadata
237
+ WHERE cache_key = ? AND model = ? AND include_hidden = ? AND recursive = ? AND mode = ?
238
+ """,
239
+ (key, model, include_flag, recursive_flag, mode),
240
+ ).fetchone()
241
+ if meta is None:
242
+ raise FileNotFoundError(db_path)
243
+ index_id = meta["id"]
244
+ existing_dimension = int(meta["dimension"])
245
+
246
+ if removed_rel_paths:
247
+ conn.executemany(
248
+ "DELETE FROM indexed_file WHERE index_id = ? AND rel_path = ?",
249
+ ((index_id, rel) for rel in removed_rel_paths),
250
+ )
251
+
252
+ vector_dimension = None
253
+ for path in changed_files:
254
+ rel_path = _relative_path(path, root)
255
+ vector = embeddings.get(rel_path)
256
+ if vector is None:
257
+ raise ValueError(f"Missing embedding for updated file: {rel_path}")
258
+ vector = np.asarray(vector, dtype=np.float32)
259
+ if vector_dimension is None:
260
+ vector_dimension = vector.shape[0]
261
+ stat = path.stat()
262
+ record = conn.execute(
263
+ "SELECT id FROM indexed_file WHERE index_id = ? AND rel_path = ?",
264
+ (index_id, rel_path),
265
+ ).fetchone()
266
+ if record is None:
267
+ cursor = conn.execute(
268
+ """
269
+ INSERT INTO indexed_file (
270
+ index_id,
271
+ rel_path,
272
+ abs_path,
273
+ size_bytes,
274
+ mtime,
275
+ position,
276
+ preview
277
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
278
+ """,
279
+ (
280
+ index_id,
281
+ rel_path,
282
+ str(path),
283
+ stat.st_size,
284
+ stat.st_mtime,
285
+ 0,
286
+ previews.get(rel_path, ""),
287
+ ),
288
+ )
289
+ file_id = cursor.lastrowid
290
+ conn.execute(
291
+ "INSERT INTO file_embedding (file_id, vector_blob) VALUES (?, ?)",
292
+ (file_id, vector.tobytes()),
293
+ )
294
+ else:
295
+ file_id = record["id"]
296
+ conn.execute(
297
+ """
298
+ UPDATE indexed_file
299
+ SET abs_path = ?, size_bytes = ?, mtime = ?, preview = ?
300
+ WHERE id = ?
301
+ """,
302
+ (
303
+ str(path),
304
+ stat.st_size,
305
+ stat.st_mtime,
306
+ previews.get(rel_path, ""),
307
+ file_id,
308
+ ),
309
+ )
310
+ conn.execute(
311
+ "UPDATE file_embedding SET vector_blob = ? WHERE file_id = ?",
312
+ (vector.tobytes(), file_id),
313
+ )
314
+
315
+ for position, file in enumerate(current_files):
316
+ rel_path = _relative_path(file, root)
317
+ conn.execute(
318
+ "UPDATE indexed_file SET position = ? WHERE index_id = ? AND rel_path = ?",
319
+ (position, index_id, rel_path),
320
+ )
321
+
322
+ generated_at = datetime.now(timezone.utc).isoformat()
323
+ new_dimension = vector_dimension or existing_dimension
324
+ conn.execute(
325
+ """
326
+ UPDATE index_metadata
327
+ SET generated_at = ?, dimension = ?
328
+ WHERE id = ?
329
+ """,
330
+ (generated_at, new_dimension, index_id),
331
+ )
332
+
333
+ return db_path
334
+ finally:
335
+ conn.close()
336
+
337
+
338
+ def load_index(root: Path, model: str, include_hidden: bool, mode: str, recursive: bool) -> dict:
339
+ db_path = cache_file(root, model, include_hidden)
340
+ if not db_path.exists():
341
+ raise FileNotFoundError(db_path)
342
+
343
+ conn = _connect(db_path)
344
+ try:
345
+ _ensure_schema(conn)
346
+ key = _cache_key(root, include_hidden, recursive, mode)
347
+ include_flag = 1 if include_hidden else 0
348
+ recursive_flag = 1 if recursive else 0
168
349
  meta = conn.execute(
169
350
  """
170
- SELECT id, root_path, model, include_hidden, dimension, version, generated_at
351
+ SELECT id, root_path, model, include_hidden, recursive, mode, dimension, version, generated_at
171
352
  FROM index_metadata
172
- WHERE cache_key = ? AND model = ? AND include_hidden = ?
353
+ WHERE cache_key = ? AND model = ? AND include_hidden = ? AND recursive = ? AND mode = ?
173
354
  """,
174
- (key, model, include_flag),
355
+ (key, model, include_flag, recursive_flag, mode),
175
356
  ).fetchone()
176
357
  if meta is None:
177
358
  raise FileNotFoundError(db_path)
178
359
 
179
360
  files = conn.execute(
180
361
  """
181
- SELECT f.rel_path, f.abs_path, f.size_bytes, f.mtime, e.vector_blob
362
+ SELECT f.rel_path, f.abs_path, f.size_bytes, f.mtime, f.preview, e.vector_blob
182
363
  FROM indexed_file AS f
183
364
  JOIN file_embedding AS e ON e.file_id = f.id
184
365
  WHERE f.index_id = ?
@@ -196,6 +377,7 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
196
377
  "absolute": row["abs_path"],
197
378
  "mtime": row["mtime"],
198
379
  "size": row["size_bytes"],
380
+ "preview": row["preview"],
199
381
  "embedding": vector.tolist(),
200
382
  }
201
383
  )
@@ -206,6 +388,8 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
206
388
  "root": meta["root_path"],
207
389
  "model": meta["model"],
208
390
  "include_hidden": bool(meta["include_hidden"]),
391
+ "recursive": bool(meta["recursive"]),
392
+ "mode": meta["mode"],
209
393
  "dimension": meta["dimension"],
210
394
  "files": serialized_files,
211
395
  }
@@ -213,15 +397,21 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
213
397
  conn.close()
214
398
 
215
399
 
216
- def load_index_vectors(root: Path, model: str, include_hidden: bool):
217
- data = load_index(root, model, include_hidden)
400
+ def load_index_vectors(root: Path, model: str, include_hidden: bool, mode: str, recursive: bool):
401
+ data = load_index(root, model, include_hidden, mode, recursive)
218
402
  files = data.get("files", [])
219
403
  paths = [root / Path(entry["path"]) for entry in files]
220
404
  embeddings = np.asarray([entry["embedding"] for entry in files], dtype=np.float32)
221
405
  return paths, embeddings, data
222
406
 
223
407
 
224
- def clear_index(root: Path, include_hidden: bool, model: str | None = None) -> int:
408
+ def clear_index(
409
+ root: Path,
410
+ include_hidden: bool,
411
+ mode: str,
412
+ recursive: bool,
413
+ model: str | None = None,
414
+ ) -> int:
225
415
  """Remove cached index entries for *root* (optionally filtered by *model*)."""
226
416
  db_path = cache_file(root, model or "_", include_hidden)
227
417
  if not db_path.exists():
@@ -230,13 +420,14 @@ def clear_index(root: Path, include_hidden: bool, model: str | None = None) -> i
230
420
  conn = _connect(db_path)
231
421
  try:
232
422
  _ensure_schema(conn)
233
- key = _cache_key(root, include_hidden)
423
+ key = _cache_key(root, include_hidden, recursive, mode)
424
+ # when model is None we still need a mode; reuse provided mode
234
425
  if model is None:
235
- query = "DELETE FROM index_metadata WHERE cache_key = ?"
236
- params = (key,)
426
+ query = "DELETE FROM index_metadata WHERE cache_key = ? AND mode = ?"
427
+ params = (key, mode)
237
428
  else:
238
- query = "DELETE FROM index_metadata WHERE cache_key = ? AND model = ?"
239
- params = (key, model)
429
+ query = "DELETE FROM index_metadata WHERE cache_key = ? AND model = ? AND mode = ?"
430
+ params = (key, model, mode)
240
431
  with conn:
241
432
  cursor = conn.execute(query, params)
242
433
  return cursor.rowcount
@@ -244,15 +435,97 @@ def clear_index(root: Path, include_hidden: bool, model: str | None = None) -> i
244
435
  conn.close()
245
436
 
246
437
 
438
+ def list_cache_entries() -> list[dict[str, object]]:
439
+ """Return metadata for every cached index currently stored."""
440
+
441
+ db_path = cache_db_path()
442
+ if not db_path.exists():
443
+ return []
444
+
445
+ conn = _connect(db_path)
446
+ try:
447
+ _ensure_schema(conn)
448
+ rows = conn.execute(
449
+ """
450
+ SELECT
451
+ root_path,
452
+ model,
453
+ include_hidden,
454
+ recursive,
455
+ mode,
456
+ dimension,
457
+ version,
458
+ generated_at,
459
+ (
460
+ SELECT COUNT(*)
461
+ FROM indexed_file
462
+ WHERE index_id = index_metadata.id
463
+ ) AS file_count
464
+ FROM index_metadata
465
+ ORDER BY generated_at DESC
466
+ """
467
+ ).fetchall()
468
+
469
+ entries: list[dict[str, object]] = []
470
+ for row in rows:
471
+ entries.append(
472
+ {
473
+ "root_path": row["root_path"],
474
+ "model": row["model"],
475
+ "include_hidden": bool(row["include_hidden"]),
476
+ "recursive": bool(row["recursive"]),
477
+ "mode": row["mode"],
478
+ "dimension": row["dimension"],
479
+ "version": row["version"],
480
+ "generated_at": row["generated_at"],
481
+ "file_count": int(row["file_count"] or 0),
482
+ }
483
+ )
484
+ return entries
485
+ finally:
486
+ conn.close()
487
+
488
+
489
+ def clear_all_cache() -> int:
490
+ """Remove the entire cache database, returning number of entries removed."""
491
+
492
+ db_path = cache_db_path()
493
+ if not db_path.exists():
494
+ return 0
495
+
496
+ conn = _connect(db_path)
497
+ try:
498
+ _ensure_schema(conn)
499
+ count_row = conn.execute("SELECT COUNT(*) AS total FROM index_metadata").fetchone()
500
+ total = int(count_row["total"] if count_row is not None else 0)
501
+ finally:
502
+ conn.close()
503
+
504
+ if db_path.exists():
505
+ db_path.unlink()
506
+ for suffix in ("-wal", "-shm"):
507
+ sidecar = Path(f"{db_path}{suffix}")
508
+ if sidecar.exists():
509
+ sidecar.unlink()
510
+
511
+ return total
512
+
513
+
247
514
  def compare_snapshot(
248
515
  root: Path,
249
516
  include_hidden: bool,
250
517
  cached_files: Sequence[dict],
518
+ *,
519
+ recursive: bool,
251
520
  current_files: Sequence[Path] | None = None,
252
521
  ) -> bool:
253
522
  """Return True if the current filesystem matches the cached snapshot."""
254
523
  if current_files is None:
255
- current_files = collect_files(root, include_hidden=include_hidden)
524
+ current_files = collect_files(
525
+ root,
526
+ include_hidden=include_hidden,
527
+ recursive=recursive,
528
+ )
256
529
  if len(current_files) != len(cached_files):
257
530
  return False
258
531
  cached_map = {