PyPI - vexor - Versions diffs - 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

vexor 0.2.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

vexor/__init__.py +1 -1
vexor/cache.py +299 -26
vexor/cli.py +340 -193
vexor/config.py +45 -1
vexor/modes.py +81 -0
vexor/providers/__init__.py +3 -0
vexor/providers/gemini.py +74 -0
vexor/providers/openai.py +69 -0
vexor/search.py +38 -69
vexor/services/__init__.py +9 -0
vexor/services/cache_service.py +39 -0
vexor/services/config_service.py +83 -0
vexor/services/content_extract_service.py +188 -0
vexor/services/index_service.py +260 -0
vexor/services/search_service.py +95 -0
vexor/services/system_service.py +81 -0
vexor/text.py +53 -10
vexor/utils.py +24 -9
vexor-0.5.0.dist-info/METADATA +139 -0
vexor-0.5.0.dist-info/RECORD +24 -0
vexor-0.2.0.dist-info/METADATA +0 -102
vexor-0.2.0.dist-info/RECORD +0 -13
{vexor-0.2.0.dist-info → vexor-0.5.0.dist-info}/WHEEL +0 -0
{vexor-0.2.0.dist-info → vexor-0.5.0.dist-info}/entry_points.txt +0 -0
{vexor-0.2.0.dist-info → vexor-0.5.0.dist-info}/licenses/LICENSE +0 -0

vexor/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from __future__ import annotations
 __all__ = ["__version__", "get_version"]
-__version__ = "0.2.0"
+__version__ = "0.5.0"
 def get_version() -> str:

vexor/cache.py CHANGED Viewed

@@ -7,20 +7,20 @@ import os
 import sqlite3
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Sequence
+from typing import Mapping, Sequence
 import numpy as np
 from .utils import collect_files
 CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
-CACHE_VERSION = 1
+CACHE_VERSION = 3
 DB_FILENAME = "index.db"
-def _cache_key(root: Path, include_hidden: bool) -> str:
+def _cache_key(root: Path, include_hidden: bool, recursive: bool, mode: str) -> str:
     digest = hashlib.sha1(
-        f"{root.resolve()}|hidden={include_hidden}".encode("utf-8")
+        f"{root.resolve()}|hidden={include_hidden}|recursive={recursive}|mode={mode}".encode("utf-8")
     ).hexdigest()
     return digest
@@ -30,12 +30,18 @@ def ensure_cache_dir() -> Path:
     return CACHE_DIR
-def cache_file(root: Path, model: str, include_hidden: bool) -> Path:  # pragma: no cover - kept for API parity
-    """Return the on-disk cache artifact path (single SQLite DB)."""
+def cache_db_path() -> Path:
+    """Return the absolute path to the shared SQLite cache database."""
     ensure_cache_dir()
     return CACHE_DIR / DB_FILENAME
+def cache_file(root: Path, model: str, include_hidden: bool) -> Path:  # pragma: no cover - kept for API parity
+    """Return the on-disk cache artifact path (single SQLite DB)."""
+    return cache_db_path()
 def _connect(db_path: Path) -> sqlite3.Connection:
     conn = sqlite3.connect(db_path)
     conn.row_factory = sqlite3.Row
@@ -52,6 +58,8 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
             root_path TEXT NOT NULL,
             model TEXT NOT NULL,
             include_hidden INTEGER NOT NULL,
+            recursive INTEGER NOT NULL DEFAULT 1,
+            mode TEXT NOT NULL,
             dimension INTEGER NOT NULL,
             version INTEGER NOT NULL,
             generated_at TEXT NOT NULL,
@@ -66,6 +74,7 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
             size_bytes INTEGER NOT NULL,
             mtime REAL NOT NULL,
             position INTEGER NOT NULL,
+            preview TEXT DEFAULT '',
             UNIQUE(index_id, rel_path)
         );
@@ -78,6 +87,25 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
             ON indexed_file(index_id, position);
         """
     )
+    try:
+        conn.execute(
+            "ALTER TABLE index_metadata ADD COLUMN recursive INTEGER NOT NULL DEFAULT 1"
+        )
+    except sqlite3.OperationalError:
+        # Column already exists; ignore error.
+        pass
+    try:
+        conn.execute(
+            "ALTER TABLE index_metadata ADD COLUMN mode TEXT NOT NULL DEFAULT 'name'"
+        )
+    except sqlite3.OperationalError:
+        pass
+    try:
+        conn.execute(
+            "ALTER TABLE indexed_file ADD COLUMN preview TEXT DEFAULT ''"
+        )
+    except sqlite3.OperationalError:
+        pass
 def store_index(
@@ -85,19 +113,24 @@ def store_index(
     root: Path,
     model: str,
     include_hidden: bool,
+    mode: str,
+    recursive: bool,
     files: Sequence[Path],
+    previews: Sequence[str],
     embeddings: np.ndarray,
 ) -> Path:
     db_path = cache_file(root, model, include_hidden)
     conn = _connect(db_path)
     try:
         _ensure_schema(conn)
-        key = _cache_key(root, include_hidden)
+        key = _cache_key(root, include_hidden, recursive, mode)
         generated_at = datetime.now(timezone.utc).isoformat()
         dimension = int(embeddings.shape[1] if embeddings.size else 0)
         include_flag = 1 if include_hidden else 0
+        recursive_flag = 1 if recursive else 0
         with conn:
+            conn.execute("BEGIN IMMEDIATE;")
             conn.execute(
                 "DELETE FROM index_metadata WHERE cache_key = ? AND model = ?",
                 (key, model),
@@ -109,12 +142,24 @@ def store_index(
                     root_path,
                     model,
                     include_hidden,
+                    recursive,
+                    mode,
                     dimension,
                     version,
                     generated_at
-                ) VALUES (?, ?, ?, ?, ?, ?, ?)
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                 """,
-                (key, str(root), model, include_flag, dimension, CACHE_VERSION, generated_at),
+                (
+                    key,
+                    str(root),
+                    model,
+                    include_flag,
+                    recursive_flag,
+                    mode,
+                    dimension,
+                    CACHE_VERSION,
+                    generated_at,
+                ),
             )
             index_id = cursor.lastrowid
@@ -132,8 +177,9 @@ def store_index(
                         abs_path,
                         size_bytes,
                         mtime,
-                        position
-                    ) VALUES (?, ?, ?, ?, ?, ?)
+                        position,
+                        preview
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?)
                     """,
                     (
                         index_id,
@@ -142,6 +188,7 @@ def store_index(
                         stat.st_size,
                         stat.st_mtime,
                         position,
+                        previews[position] if position < len(previews) else "",
                     ),
                 )
                 vector_blob = embeddings[position].astype(np.float32).tobytes()
@@ -155,7 +202,21 @@ def store_index(
         conn.close()
-def load_index(root: Path, model: str, include_hidden: bool) -> dict:
+def apply_index_updates(
+    *,
+    root: Path,
+    model: str,
+    include_hidden: bool,
+    mode: str,
+    recursive: bool,
+    current_files: Sequence[Path],
+    changed_files: Sequence[Path],
+    removed_rel_paths: Sequence[str],
+    embeddings: Mapping[str, np.ndarray],
+    previews: Mapping[str, str],
+) -> Path:
+    """Apply incremental updates to an existing cached index."""
     db_path = cache_file(root, model, include_hidden)
     if not db_path.exists():
         raise FileNotFoundError(db_path)
@@ -163,22 +224,142 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
     conn = _connect(db_path)
     try:
         _ensure_schema(conn)
-        key = _cache_key(root, include_hidden)
+        key = _cache_key(root, include_hidden, recursive, mode)
         include_flag = 1 if include_hidden else 0
+        recursive_flag = 1 if recursive else 0
+        with conn:
+            conn.execute("BEGIN IMMEDIATE;")
+            meta = conn.execute(
+                """
+                SELECT id, dimension
+                FROM index_metadata
+                WHERE cache_key = ? AND model = ? AND include_hidden = ? AND recursive = ? AND mode = ?
+                """,
+                (key, model, include_flag, recursive_flag, mode),
+            ).fetchone()
+            if meta is None:
+                raise FileNotFoundError(db_path)
+            index_id = meta["id"]
+            existing_dimension = int(meta["dimension"])
+            if removed_rel_paths:
+                conn.executemany(
+                    "DELETE FROM indexed_file WHERE index_id = ? AND rel_path = ?",
+                    ((index_id, rel) for rel in removed_rel_paths),
+                )
+            vector_dimension = None
+            for path in changed_files:
+                rel_path = _relative_path(path, root)
+                vector = embeddings.get(rel_path)
+                if vector is None:
+                    raise ValueError(f"Missing embedding for updated file: {rel_path}")
+                vector = np.asarray(vector, dtype=np.float32)
+                if vector_dimension is None:
+                    vector_dimension = vector.shape[0]
+                stat = path.stat()
+                record = conn.execute(
+                    "SELECT id FROM indexed_file WHERE index_id = ? AND rel_path = ?",
+                    (index_id, rel_path),
+                ).fetchone()
+                if record is None:
+                    cursor = conn.execute(
+                        """
+                    INSERT INTO indexed_file (
+                        index_id,
+                        rel_path,
+                        abs_path,
+                        size_bytes,
+                        mtime,
+                        position,
+                        preview
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?)
+                        """,
+                        (
+                            index_id,
+                            rel_path,
+                            str(path),
+                            stat.st_size,
+                            stat.st_mtime,
+                            0,
+                            previews.get(rel_path, ""),
+                        ),
+                    )
+                    file_id = cursor.lastrowid
+                    conn.execute(
+                        "INSERT INTO file_embedding (file_id, vector_blob) VALUES (?, ?)",
+                        (file_id, vector.tobytes()),
+                    )
+                else:
+                    file_id = record["id"]
+                    conn.execute(
+                        """
+                        UPDATE indexed_file
+                        SET abs_path = ?, size_bytes = ?, mtime = ?, preview = ?
+                        WHERE id = ?
+                        """,
+                        (
+                            str(path),
+                            stat.st_size,
+                            stat.st_mtime,
+                            previews.get(rel_path, ""),
+                            file_id,
+                        ),
+                    )
+                    conn.execute(
+                        "UPDATE file_embedding SET vector_blob = ? WHERE file_id = ?",
+                        (vector.tobytes(), file_id),
+                    )
+            for position, file in enumerate(current_files):
+                rel_path = _relative_path(file, root)
+                conn.execute(
+                    "UPDATE indexed_file SET position = ? WHERE index_id = ? AND rel_path = ?",
+                    (position, index_id, rel_path),
+                )
+            generated_at = datetime.now(timezone.utc).isoformat()
+            new_dimension = vector_dimension or existing_dimension
+            conn.execute(
+                """
+                UPDATE index_metadata
+                SET generated_at = ?, dimension = ?
+                WHERE id = ?
+                """,
+                (generated_at, new_dimension, index_id),
+            )
+        return db_path
+    finally:
+        conn.close()
+def load_index(root: Path, model: str, include_hidden: bool, mode: str, recursive: bool) -> dict:
+    db_path = cache_file(root, model, include_hidden)
+    if not db_path.exists():
+        raise FileNotFoundError(db_path)
+    conn = _connect(db_path)
+    try:
+        _ensure_schema(conn)
+        key = _cache_key(root, include_hidden, recursive, mode)
+        include_flag = 1 if include_hidden else 0
+        recursive_flag = 1 if recursive else 0
         meta = conn.execute(
             """
-            SELECT id, root_path, model, include_hidden, dimension, version, generated_at
+            SELECT id, root_path, model, include_hidden, recursive, mode, dimension, version, generated_at
             FROM index_metadata
-            WHERE cache_key = ? AND model = ? AND include_hidden = ?
+            WHERE cache_key = ? AND model = ? AND include_hidden = ? AND recursive = ? AND mode = ?
             """,
-            (key, model, include_flag),
+            (key, model, include_flag, recursive_flag, mode),
         ).fetchone()
         if meta is None:
             raise FileNotFoundError(db_path)
         files = conn.execute(
             """
-            SELECT f.rel_path, f.abs_path, f.size_bytes, f.mtime, e.vector_blob
+            SELECT f.rel_path, f.abs_path, f.size_bytes, f.mtime, f.preview, e.vector_blob
             FROM indexed_file AS f
             JOIN file_embedding AS e ON e.file_id = f.id
             WHERE f.index_id = ?
@@ -196,6 +377,7 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
                     "absolute": row["abs_path"],
                     "mtime": row["mtime"],
                     "size": row["size_bytes"],
+                    "preview": row["preview"],
                     "embedding": vector.tolist(),
                 }
             )
@@ -206,6 +388,8 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
             "root": meta["root_path"],
             "model": meta["model"],
             "include_hidden": bool(meta["include_hidden"]),
+            "recursive": bool(meta["recursive"]),
+            "mode": meta["mode"],
             "dimension": meta["dimension"],
             "files": serialized_files,
         }
@@ -213,15 +397,21 @@ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
         conn.close()
-def load_index_vectors(root: Path, model: str, include_hidden: bool):
-    data = load_index(root, model, include_hidden)
+def load_index_vectors(root: Path, model: str, include_hidden: bool, mode: str, recursive: bool):
+    data = load_index(root, model, include_hidden, mode, recursive)
     files = data.get("files", [])
     paths = [root / Path(entry["path"]) for entry in files]
     embeddings = np.asarray([entry["embedding"] for entry in files], dtype=np.float32)
     return paths, embeddings, data
-def clear_index(root: Path, include_hidden: bool, model: str | None = None) -> int:
+def clear_index(
+    root: Path,
+    include_hidden: bool,
+    mode: str,
+    recursive: bool,
+    model: str | None = None,
+) -> int:
     """Remove cached index entries for *root* (optionally filtered by *model*)."""
     db_path = cache_file(root, model or "_", include_hidden)
     if not db_path.exists():
@@ -230,13 +420,14 @@ def clear_index(root: Path, include_hidden: bool, model: str | None = None) -> i
     conn = _connect(db_path)
     try:
         _ensure_schema(conn)
-        key = _cache_key(root, include_hidden)
+        key = _cache_key(root, include_hidden, recursive, mode)
+        # when model is None we still need a mode; reuse provided mode
         if model is None:
-            query = "DELETE FROM index_metadata WHERE cache_key = ?"
-            params = (key,)
+            query = "DELETE FROM index_metadata WHERE cache_key = ? AND mode = ?"
+            params = (key, mode)
         else:
-            query = "DELETE FROM index_metadata WHERE cache_key = ? AND model = ?"
-            params = (key, model)
+            query = "DELETE FROM index_metadata WHERE cache_key = ? AND model = ? AND mode = ?"
+            params = (key, model, mode)
         with conn:
             cursor = conn.execute(query, params)
         return cursor.rowcount
@@ -244,15 +435,97 @@ def clear_index(root: Path, include_hidden: bool, model: str | None = None) -> i
         conn.close()
+def list_cache_entries() -> list[dict[str, object]]:
+    """Return metadata for every cached index currently stored."""
+    db_path = cache_db_path()
+    if not db_path.exists():
+        return []
+    conn = _connect(db_path)
+    try:
+        _ensure_schema(conn)
+        rows = conn.execute(
+            """
+            SELECT
+                root_path,
+                model,
+                include_hidden,
+                recursive,
+                mode,
+                dimension,
+                version,
+                generated_at,
+                (
+                    SELECT COUNT(*)
+                    FROM indexed_file
+                    WHERE index_id = index_metadata.id
+                ) AS file_count
+            FROM index_metadata
+            ORDER BY generated_at DESC
+            """
+        ).fetchall()
+        entries: list[dict[str, object]] = []
+        for row in rows:
+            entries.append(
+                {
+                    "root_path": row["root_path"],
+                    "model": row["model"],
+                    "include_hidden": bool(row["include_hidden"]),
+                    "recursive": bool(row["recursive"]),
+                    "mode": row["mode"],
+                    "dimension": row["dimension"],
+                    "version": row["version"],
+                    "generated_at": row["generated_at"],
+                    "file_count": int(row["file_count"] or 0),
+                }
+            )
+        return entries
+    finally:
+        conn.close()
+def clear_all_cache() -> int:
+    """Remove the entire cache database, returning number of entries removed."""
+    db_path = cache_db_path()
+    if not db_path.exists():
+        return 0
+    conn = _connect(db_path)
+    try:
+        _ensure_schema(conn)
+        count_row = conn.execute("SELECT COUNT(*) AS total FROM index_metadata").fetchone()
+        total = int(count_row["total"] if count_row is not None else 0)
+    finally:
+        conn.close()
+    if db_path.exists():
+        db_path.unlink()
+    for suffix in ("-wal", "-shm"):
+        sidecar = Path(f"{db_path}{suffix}")
+        if sidecar.exists():
+            sidecar.unlink()
+    return total
 def compare_snapshot(
     root: Path,
     include_hidden: bool,
     cached_files: Sequence[dict],
+    *,
+    recursive: bool,
     current_files: Sequence[Path] | None = None,
 ) -> bool:
     """Return True if the current filesystem matches the cached snapshot."""
     if current_files is None:
-        current_files = collect_files(root, include_hidden=include_hidden)
+        current_files = collect_files(
+            root,
+            include_hidden=include_hidden,
+            recursive=recursive,
+        )
     if len(current_files) != len(cached_files):
         return False
     cached_map = {

vexor 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

vexor 0.2.0py3-none-any.whl → 0.5.0py3-none-any.whl