PyPI - vexor - Versions diffs - 0.1.2__tar.gz - Mend

vexor 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vexor might be problematic. Click here for more details.

Files changed (13) hide show

vexor-0.1.2/.gitignore ADDED Viewed

@@ -0,0 +1,159 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+PIPE_MANIFEST
+# PyInstaller
+*.manifest
+#*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# poetry
+poetry.lock
+# pdm
+.pdm.toml
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# IDE specific files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+# SentenceTransformers cache
+.cache/
+sentence_transformers/
+# Model cache (HuggingFace)
+.transformers_cache/
+models/
+# Project specific
+*.db
+*.sqlite
+test_data/
+temp/
+tmp/

vexor-0.1.2/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 ScarletKc
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

vexor-0.1.2/PKG-INFO ADDED Viewed

@@ -0,0 +1,95 @@
+Metadata-Version: 2.4
+Name: vexor
+Version: 0.1.2
+Summary: A vector-powered CLI for semantic search over filenames.
+Project-URL: Repository, https://github.com/scarletkc/vexor
+Author: scarletkc
+License: MIT
+License-File: LICENSE
+Keywords: ai,cli,semantic-search,typer
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: System :: Filesystems
+Classifier: Topic :: Text Processing :: Indexing
+Classifier: Topic :: Utilities
+Requires-Python: >=3.9
+Requires-Dist: google-genai>=0.5.0
+Requires-Dist: numpy>=1.23.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: scikit-learn>=1.3.0
+Requires-Dist: typer>=0.9.0
+Provides-Extra: dev
+Requires-Dist: build>=1.2.1; extra == 'dev'
+Requires-Dist: pytest-cov>=4.1; extra == 'dev'
+Requires-Dist: pytest>=7.4; extra == 'dev'
+Requires-Dist: twine>=5.1.1; extra == 'dev'
+Description-Content-Type: text/markdown
+# Vexor
+Vexor is a vector-powered CLI that searches file names semantically. It uses Google GenAI's `gemini-embedding-001` model to embed file names and queries, then ranks matches with cosine similarity.
+## Install
+```bash
+pip install -e .
+```
+The CLI entry point is `vexor` (or `python -m vexor`).
+## Configure
+Set the Gemini API key once and reuse it everywhere:
+```bash
+vexor config --set-api-key "YOUR_KEY"
+```
+Optional defaults:
+```bash
+vexor config --set-model gemini-embedding-001
+vexor config --set-batch-size 0   # 0 = single request
+```
+Configuration is stored in `~/.vexor/config.json`.
+## Workflow
+1. **Index** the project root (includes every subdirectory):
+   ```bash
+   vexor index --path ~/projects/demo --include-hidden
+   ```
+2. **Search** from anywhere, pointing to the same path:
+   ```bash
+   vexor search "api client config" --path ~/projects/demo --top 5
+   ```
+   Output example:
+   ```
+   Vexor semantic file search results
+   ──────────────────────────────────
+   1   0.923   ./src/config_loader.py
+   2   0.871   ./src/utils/config_parse.py
+   3   0.809   ./tests/test_config_loader.py
+   ```
+Tips:
+- Keep one index per project root; subdirectories need separate indexes only if you explicitly run `vexor index` on them.
+- Hidden files are included only if both `index` and `search` use `--include-hidden`.
+## Commands
+| Command | Description |
+| ------- | ----------- |
+| `vexor index --path PATH [--include-hidden]` | Recursively scans `PATH`, embeds file names, and writes a cache under `~/.vexor`. |
+| `vexor search QUERY --path PATH [--top K] [--include-hidden]` | Loads the cached embeddings for `PATH` and ranks matches for `QUERY`. |
+| `vexor config --set-api-key/--clear-api-key` | Manage the stored Gemini API key. |
+| `vexor config --set-model/--set-batch-size/--show` | Manage default model and batch size. |
+## Development
+Run tests with:
+```bash
+pip install -e .[dev]
+pytest
+```
+Tests rely on fake embedding backends, so no network access is required.
+Cache files and configuration live in `~/.vexor`. Adjust `_label_for_path` or `VexorSearcher._prepare_text` if you need to encode additional context (e.g., relative paths).

vexor-0.1.2/README.md ADDED Viewed

@@ -0,0 +1,61 @@
+# Vexor
+Vexor is a vector-powered CLI that searches file names semantically. It uses Google GenAI's `gemini-embedding-001` model to embed file names and queries, then ranks matches with cosine similarity.
+## Install
+```bash
+pip install -e .
+```
+The CLI entry point is `vexor` (or `python -m vexor`).
+## Configure
+Set the Gemini API key once and reuse it everywhere:
+```bash
+vexor config --set-api-key "YOUR_KEY"
+```
+Optional defaults:
+```bash
+vexor config --set-model gemini-embedding-001
+vexor config --set-batch-size 0   # 0 = single request
+```
+Configuration is stored in `~/.vexor/config.json`.
+## Workflow
+1. **Index** the project root (includes every subdirectory):
+   ```bash
+   vexor index --path ~/projects/demo --include-hidden
+   ```
+2. **Search** from anywhere, pointing to the same path:
+   ```bash
+   vexor search "api client config" --path ~/projects/demo --top 5
+   ```
+   Output example:
+   ```
+   Vexor semantic file search results
+   ──────────────────────────────────
+   1   0.923   ./src/config_loader.py
+   2   0.871   ./src/utils/config_parse.py
+   3   0.809   ./tests/test_config_loader.py
+   ```
+Tips:
+- Keep one index per project root; subdirectories need separate indexes only if you explicitly run `vexor index` on them.
+- Hidden files are included only if both `index` and `search` use `--include-hidden`.
+## Commands
+| Command | Description |
+| ------- | ----------- |
+| `vexor index --path PATH [--include-hidden]` | Recursively scans `PATH`, embeds file names, and writes a cache under `~/.vexor`. |
+| `vexor search QUERY --path PATH [--top K] [--include-hidden]` | Loads the cached embeddings for `PATH` and ranks matches for `QUERY`. |
+| `vexor config --set-api-key/--clear-api-key` | Manage the stored Gemini API key. |
+| `vexor config --set-model/--set-batch-size/--show` | Manage default model and batch size. |
+## Development
+Run tests with:
+```bash
+pip install -e .[dev]
+pytest
+```
+Tests rely on fake embedding backends, so no network access is required.
+Cache files and configuration live in `~/.vexor`. Adjust `_label_for_path` or `VexorSearcher._prepare_text` if you need to encode additional context (e.g., relative paths).

vexor-0.1.2/pyproject.toml ADDED Viewed

@@ -0,0 +1,68 @@
+[build-system]
+requires = ["hatchling>=1.21"]
+build-backend = "hatchling.build"
+[project]
+name = "vexor"
+description = "A vector-powered CLI for semantic search over filenames."
+readme = "README.md"
+authors = [{ name = "scarletkc" }]
+license = { text = "MIT" }
+requires-python = ">=3.9"
+keywords = ["semantic-search", "cli", "typer", "ai"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: End Users/Desktop",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Topic :: Utilities",
+    "Topic :: System :: Filesystems",
+    "Topic :: Text Processing :: Indexing",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+dependencies = [
+    "google-genai>=0.5.0",
+    "python-dotenv>=1.0.0",
+    "scikit-learn>=1.3.0",
+    "numpy>=1.23.0",
+    "typer>=0.9.0",
+    "rich>=13.0.0",
+]
+dynamic = ["version"]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.4",
+    "pytest-cov>=4.1",
+    "build>=1.2.1",
+    "twine>=5.1.1",
+]
+[project.urls]
+Repository = "https://github.com/scarletkc/vexor"
+[project.scripts]
+vexor = "vexor.cli:run"
+[tool.hatch.version]
+path = "vexor/__init__.py"
+[tool.hatch.build.targets.sdist]
+include = [
+    "vexor",
+    "README.md",
+    "LICENSE",
+    "pyproject.toml",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["vexor"]
+[tool.pytest.ini_options]
+addopts = "-ra"
+pythonpath = ["."]

vexor-0.1.2/vexor/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Vexor package initialization."""
+from __future__ import annotations
+__all__ = ["__version__", "get_version"]
+__version__ = "0.1.2"
+def get_version() -> str:
+    """Return the current package version."""
+    return __version__

vexor-0.1.2/vexor/__main__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Entry point for `python -m vexor` and frozen builds."""
+from __future__ import annotations
+try:
+    # Normal package execution path
+    from .cli import run
+except ImportError:  # pragma: no cover - happens in frozen single-file builds
+    from vexor.cli import run  # type: ignore[import]
+def main() -> None:
+    """Execute the Typer application."""
+    run()
+if __name__ == "__main__":
+    raise SystemExit(main())

vexor-0.1.2/vexor/cache.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""Index cache helpers for Vexor."""
+from __future__ import annotations
+import hashlib
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Iterable, Sequence
+import numpy as np
+from .utils import collect_files
+CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
+CACHE_VERSION = 1
+def _safe_model_name(model: str) -> str:
+    return model.replace("/", "_")
+def _cache_key(root: Path, include_hidden: bool) -> str:
+    digest = hashlib.sha1(f"{root.resolve()}|hidden={include_hidden}".encode("utf-8")).hexdigest()
+    return digest
+def cache_file(root: Path, model: str, include_hidden: bool) -> Path:
+    key = _cache_key(root, include_hidden)
+    safe_model = _safe_model_name(model)
+    return CACHE_DIR / f"{key}-{safe_model}.json"
+def ensure_cache_dir() -> Path:
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    return CACHE_DIR
+def store_index(
+    *,
+    root: Path,
+    model: str,
+    include_hidden: bool,
+    files: Sequence[Path],
+    embeddings: np.ndarray,
+) -> Path:
+    ensure_cache_dir()
+    payload = {
+        "version": CACHE_VERSION,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "root": str(root),
+        "model": model,
+        "include_hidden": include_hidden,
+        "dimension": int(embeddings.shape[1] if embeddings.size else 0),
+        "files": [],
+    }
+    for idx, file in enumerate(files):
+        stat = file.stat()
+        try:
+            rel_path = file.relative_to(root)
+        except ValueError:
+            rel_path = file
+        payload["files"].append(
+            {
+                "path": str(rel_path),
+                "absolute": str(file),
+                "mtime": stat.st_mtime,
+                "size": stat.st_size,
+                "embedding": embeddings[idx].astype(float).tolist(),
+            }
+        )
+    path = cache_file(root, model, include_hidden)
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    return path
+def load_index(root: Path, model: str, include_hidden: bool) -> dict:
+    path = cache_file(root, model, include_hidden)
+    if not path.exists():
+        raise FileNotFoundError(path)
+    text = path.read_text(encoding="utf-8")
+    return json.loads(text)
+def load_index_vectors(root: Path, model: str, include_hidden: bool):
+    data = load_index(root, model, include_hidden)
+    files = data.get("files", [])
+    paths = [root / Path(entry["path"]) for entry in files]
+    embeddings = np.asarray([entry["embedding"] for entry in files], dtype=np.float32)
+    return paths, embeddings, data
+def compare_snapshot(
+    root: Path,
+    include_hidden: bool,
+    cached_files: Sequence[dict],
+    current_files: Sequence[Path] | None = None,
+) -> bool:
+    """Return True if the current filesystem matches the cached snapshot."""
+    if current_files is None:
+        current_files = collect_files(root, include_hidden=include_hidden)
+    if len(current_files) != len(cached_files):
+        return False
+    cached_map = {
+        entry["path"]: (entry["mtime"], entry.get("size"))
+        for entry in cached_files
+    }
+    for file in current_files:
+        rel = _relative_path(file, root)
+        data = cached_map.get(rel)
+        if data is None:
+            return False
+        cached_mtime, cached_size = data
+        stat = file.stat()
+        current_mtime = stat.st_mtime
+        current_size = stat.st_size
+        # allow drift due to filesystem precision (approx 0.5s on some platforms)
+        if abs(current_mtime - cached_mtime) > 5e-1:
+            if cached_size is not None and cached_size == current_size:
+                continue
+            return False
+        if cached_size is not None and cached_size != current_size:
+            return False
+    return True
+def _relative_path(path: Path, root: Path) -> str:
+    try:
+        rel = path.relative_to(root)
+    except ValueError:
+        rel = path
+    return str(rel)

vexor-0.1.2/vexor/cli.py ADDED Viewed

@@ -0,0 +1,334 @@
+"""Command line interface for Vexor."""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Sequence
+import typer
+from rich.console import Console
+from rich.table import Table
+from . import __version__
+from .config import (
+    DEFAULT_BATCH_SIZE,
+    DEFAULT_MODEL,
+    load_config,
+    set_api_key,
+    set_batch_size,
+    set_model,
+)
+from .text import Messages, Styles
+from .utils import collect_files, resolve_directory, format_path, ensure_positive
+console = Console()
+app = typer.Typer(
+    help=Messages.APP_HELP,
+    no_args_is_help=True,
+    context_settings={"help_option_names": ["-h", "--help"]},
+)
+@dataclass(slots=True)
+class DisplayResult:
+    path: Path
+    score: float
+def _version_callback(value: bool) -> None:
+    if value:
+        console.print(f"Vexor v{__version__}")
+        raise typer.Exit()
+@app.callback()
+def main(
+    version: bool = typer.Option(
+        False,
+        "--version",
+        "-v",
+        callback=_version_callback,
+        is_eager=True,
+        help="Show version and exit.",
+    )
+) -> None:
+    """Global Typer callback for shared options."""
+    return None
+@app.command()
+def search(
+    query: str = typer.Argument(..., help=Messages.HELP_QUERY),
+    path: Path = typer.Option(
+        Path.cwd(),
+        "--path",
+        "-p",
+        help=Messages.HELP_SEARCH_PATH,
+    ),
+    top: int = typer.Option(5, "--top", "-k", help=Messages.HELP_SEARCH_TOP),
+    include_hidden: bool = typer.Option(
+        False,
+        "--include-hidden",
+        help=Messages.HELP_INCLUDE_HIDDEN,
+    ),
+) -> None:
+    """Run the semantic search using a cached index."""
+    config = load_config()
+    model_name = config.model or DEFAULT_MODEL
+    batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
+    clean_query = query.strip()
+    if not clean_query:
+        console.print(_styled(Messages.ERROR_EMPTY_QUERY, Styles.ERROR))
+        raise typer.Exit(code=1)
+    try:
+        ensure_positive(top, "top")
+    except ValueError as exc:  # pragma: no cover - validated by Typer
+        raise typer.BadParameter(str(exc), param_name="top") from exc
+    directory = resolve_directory(path)
+    try:
+        cached_paths, file_vectors, meta = _load_index(directory, model_name, include_hidden)
+    except FileNotFoundError:
+        console.print(
+            _styled(Messages.ERROR_INDEX_MISSING.format(path=directory), Styles.ERROR)
+        )
+        raise typer.Exit(code=1)
+    _warn_if_stale(directory, include_hidden, meta.get("files", []))
+    if not cached_paths:
+        console.print(_styled(Messages.INFO_INDEX_EMPTY, Styles.WARNING))
+        raise typer.Exit(code=0)
+    searcher = _create_searcher(model_name=model_name, batch_size=batch_size)
+    try:
+        query_vector = searcher.embed_texts([clean_query])[0]
+    except RuntimeError as exc:
+        console.print(_styled(str(exc), Styles.ERROR))
+        raise typer.Exit(code=1)
+    from sklearn.metrics.pairwise import cosine_similarity  # local import
+    similarities = cosine_similarity(
+        query_vector.reshape(1, -1), file_vectors
+    )[0]
+    scored = [
+        DisplayResult(path=path, score=float(score))
+        for path, score in zip(cached_paths, similarities)
+    ]
+    scored.sort(key=lambda item: item.score, reverse=True)
+    results = scored[:top]
+    if not results:
+        console.print(_styled(Messages.INFO_NO_RESULTS, Styles.WARNING))
+        raise typer.Exit(code=0)
+    _render_results(results, directory, searcher.device)
+@app.command()
+def index(
+    path: Path = typer.Option(
+        Path.cwd(),
+        "--path",
+        "-p",
+        help=Messages.HELP_INDEX_PATH,
+    ),
+    include_hidden: bool = typer.Option(
+        False,
+        "--include-hidden",
+        help=Messages.HELP_INDEX_INCLUDE,
+    ),
+) -> None:
+    """Create or refresh the cached index for the given directory."""
+    config = load_config()
+    model_name = config.model or DEFAULT_MODEL
+    batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
+    directory = resolve_directory(path)
+    files = collect_files(directory, include_hidden=include_hidden)
+    if not files:
+        console.print(_styled(Messages.INFO_NO_FILES, Styles.WARNING))
+        raise typer.Exit(code=0)
+    existing_meta = _load_index_metadata_safe(directory, model_name, include_hidden)
+    if existing_meta:
+        cached_files = existing_meta.get("files", [])
+        if cached_files and _is_cache_current(
+            directory, include_hidden, cached_files, current_files=files
+        ):
+            console.print(
+                _styled(Messages.INFO_INDEX_UP_TO_DATE.format(path=directory), Styles.INFO)
+            )
+            return
+    searcher = _create_searcher(model_name=model_name, batch_size=batch_size)
+    file_labels = [_label_for_path(file) for file in files]
+    embeddings = searcher.embed_texts(file_labels)
+    cache_path = _store_index(
+        root=directory,
+        model=model_name,
+        include_hidden=include_hidden,
+        files=files,
+        embeddings=embeddings,
+    )
+    console.print(_styled(Messages.INFO_INDEX_SAVED.format(path=cache_path), Styles.SUCCESS))
+@app.command()
+def config(
+    set_api_key_option: str | None = typer.Option(
+        None,
+        "--set-api-key",
+        help=Messages.HELP_SET_API_KEY,
+    ),
+    clear_api_key: bool = typer.Option(
+        False,
+        "--clear-api-key",
+        help=Messages.HELP_CLEAR_API_KEY,
+    ),
+    set_model_option: str | None = typer.Option(
+        None,
+        "--set-model",
+        help=Messages.HELP_SET_MODEL,
+    ),
+    set_batch_option: int | None = typer.Option(
+        None,
+        "--set-batch-size",
+        help=Messages.HELP_SET_BATCH,
+    ),
+    show: bool = typer.Option(
+        False,
+        "--show",
+        help=Messages.HELP_SHOW_CONFIG,
+    ),
+) -> None:
+    """Manage Vexor configuration stored in ~/.vexor/config.json."""
+    changed = False
+    if set_api_key_option is not None:
+        set_api_key(set_api_key_option)
+        console.print(_styled(Messages.INFO_API_SAVED, Styles.SUCCESS))
+        changed = True
+    if clear_api_key:
+        set_api_key(None)
+        console.print(_styled(Messages.INFO_API_CLEARED, Styles.SUCCESS))
+        changed = True
+    if set_model_option is not None:
+        set_model(set_model_option)
+        console.print(
+            _styled(Messages.INFO_MODEL_SET.format(value=set_model_option), Styles.SUCCESS)
+        )
+        changed = True
+    if set_batch_option is not None:
+        if set_batch_option < 0:
+            raise typer.BadParameter(Messages.ERROR_BATCH_NEGATIVE)
+        set_batch_size(set_batch_option)
+        console.print(
+            _styled(Messages.INFO_BATCH_SET.format(value=set_batch_option), Styles.SUCCESS)
+        )
+        changed = True
+    if show or not changed:
+        cfg = load_config()
+        console.print(
+            _styled(
+                Messages.INFO_CONFIG_SUMMARY.format(
+                    api="yes" if cfg.api_key else "no",
+                    model=cfg.model or DEFAULT_MODEL,
+                    batch=cfg.batch_size if cfg.batch_size is not None else DEFAULT_BATCH_SIZE,
+                ),
+                Styles.INFO,
+            )
+        )
+def _render_results(results: Sequence[DisplayResult], base: Path, backend: str | None) -> None:
+    console.print(_styled(Messages.TABLE_TITLE, Styles.TITLE))
+    if backend:
+        console.print(_styled(f"{Messages.TABLE_BACKEND_PREFIX}{backend}", Styles.INFO))
+    table = Table(show_header=True, header_style=Styles.TABLE_HEADER)
+    table.add_column(Messages.TABLE_HEADER_INDEX, justify="right")
+    table.add_column(Messages.TABLE_HEADER_SIMILARITY, justify="right")
+    table.add_column(Messages.TABLE_HEADER_PATH, overflow="fold")
+    for idx, result in enumerate(results, start=1):
+        table.add_row(
+            str(idx),
+            f"{result.score:.3f}",
+            format_path(result.path, base),
+        )
+    console.print(table)
+def _create_searcher(model_name: str, batch_size: int):
+    from .search import VexorSearcher  # Local import keeps CLI startup fast
+    return VexorSearcher(model_name=model_name, batch_size=batch_size)
+def _label_for_path(path: Path) -> str:
+    return path.name.replace("_", " ")
+def _load_index(root: Path, model: str, include_hidden: bool):
+    from .cache import load_index_vectors  # local import
+    return load_index_vectors(root, model, include_hidden)
+def _load_index_metadata_safe(root: Path, model: str, include_hidden: bool):
+    from .cache import load_index  # local import
+    try:
+        return load_index(root, model, include_hidden)
+    except FileNotFoundError:
+        return None
+def _store_index(**kwargs):
+    from .cache import store_index  # local import
+    return store_index(**kwargs)
+def _is_cache_current(
+    root: Path,
+    include_hidden: bool,
+    cached_files: Sequence[dict],
+    *,
+    current_files: Sequence[Path] | None = None,
+) -> bool:
+    if not cached_files:
+        return False
+    from .cache import compare_snapshot  # local import
+    return compare_snapshot(
+        root,
+        include_hidden,
+        cached_files,
+        current_files=current_files,
+    )
+def _warn_if_stale(root: Path, include_hidden: bool, cached_files: Sequence[dict]) -> None:
+    if not cached_files:
+        return
+    if not _is_cache_current(root, include_hidden, cached_files):
+        console.print(
+            _styled(Messages.WARNING_INDEX_STALE.format(path=root), Styles.WARNING)
+        )
+def _styled(text: str, style: str) -> str:
+    return f"[{style}]{text}[/{style}]"
+def run(argv: list[str] | None = None) -> None:
+    """Entry point wrapper allowing optional argument override."""
+    if argv is None:
+        app()
+    else:
+        app(args=list(argv))

vexor-0.1.2/vexor/config.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""Global configuration management for Vexor."""
+from __future__ import annotations
+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict
+CONFIG_DIR = Path(os.path.expanduser("~")) / ".vexor"
+CONFIG_FILE = CONFIG_DIR / "config.json"
+DEFAULT_MODEL = "gemini-embedding-001"
+DEFAULT_BATCH_SIZE = 0
+ENV_API_KEY = "GOOGLE_GENAI_API_KEY"
+@dataclass
+class Config:
+    api_key: str | None = None
+    model: str = DEFAULT_MODEL
+    batch_size: int = DEFAULT_BATCH_SIZE
+def load_config() -> Config:
+    if not CONFIG_FILE.exists():
+        return Config()
+    raw = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
+    return Config(
+        api_key=raw.get("api_key") or None,
+        model=raw.get("model") or DEFAULT_MODEL,
+        batch_size=int(raw.get("batch_size", DEFAULT_BATCH_SIZE)),
+    )
+def save_config(config: Config) -> None:
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    data: Dict[str, Any] = {}
+    if config.api_key:
+        data["api_key"] = config.api_key
+    if config.model:
+        data["model"] = config.model
+    data["batch_size"] = config.batch_size
+    CONFIG_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+def set_api_key(value: str | None) -> None:
+    config = load_config()
+    config.api_key = value
+    save_config(config)
+def set_model(value: str) -> None:
+    config = load_config()
+    config.model = value
+    save_config(config)
+def set_batch_size(value: int) -> None:
+    config = load_config()
+    config.batch_size = value
+    save_config(config)

vexor-0.1.2/vexor/search.py ADDED Viewed

@@ -0,0 +1,152 @@
+"""Semantic search helpers backed by the Google Gemini embedding API."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterator, List, Protocol, Sequence
+import numpy as np
+from dotenv import load_dotenv
+from google import genai
+from google.genai import errors as genai_errors
+from sklearn.metrics.pairwise import cosine_similarity
+from .config import DEFAULT_MODEL, ENV_API_KEY, load_config
+from .text import Messages
+@dataclass(slots=True)
+class SearchResult:
+    """Container describing a single semantic search hit."""
+    path: Path
+    score: float
+class EmbeddingBackend(Protocol):
+    """Minimal protocol for components that can embed text batches."""
+    def embed(self, texts: Sequence[str]) -> np.ndarray:
+        """Return embeddings for *texts* as a 2D numpy array."""
+        raise NotImplementedError  # pragma: no cover
+class GeminiEmbeddingBackend:
+    """Embedding backend that calls the Gemini API via google-genai."""
+    def __init__(
+        self,
+        *,
+        model_name: str = DEFAULT_MODEL,
+        api_key: str | None = None,
+        chunk_size: int | None = None,
+    ) -> None:
+        load_dotenv()
+        config = load_config()
+        self.model_name = model_name
+        self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
+        env_key = os.getenv(ENV_API_KEY)
+        configured_key = getattr(config, "api_key", None)
+        self.api_key = api_key or configured_key or env_key
+        if not self.api_key or self.api_key.strip().lower() == "your_api_key_here":
+            raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
+        self._client = genai.Client(api_key=self.api_key)
+    def embed(self, texts: Sequence[str]) -> np.ndarray:
+        if not texts:
+            return np.empty((0, 0), dtype=np.float32)
+        vectors: list[np.ndarray] = []
+        for chunk in _chunk(texts, self.chunk_size):
+            try:
+                response = self._client.models.embed_content(
+                    model=self.model_name,
+                    contents=list(chunk),
+                )
+            except genai_errors.ClientError as exc:
+                raise RuntimeError(_format_genai_error(exc)) from exc
+            embeddings = getattr(response, "embeddings", None)
+            if not embeddings:
+                raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
+            for embedding in embeddings:
+                values = getattr(embedding, "values", None) or getattr(
+                    embedding, "value", None
+                )
+                vectors.append(np.asarray(values, dtype=np.float32))
+        return np.vstack(vectors)
+class VexorSearcher:
+    """Encapsulates embedding generation and similarity computation."""
+    def __init__(
+        self,
+        model_name: str = DEFAULT_MODEL,
+        *,
+        backend: EmbeddingBackend | None = None,
+        batch_size: int = 0,
+    ) -> None:
+        self.model_name = model_name
+        self.batch_size = max(batch_size, 0)
+        self._backend = backend or GeminiEmbeddingBackend(
+            model_name=model_name, chunk_size=self.batch_size
+        )
+        self._device = f"{self.model_name} via Gemini API"
+    @property
+    def device(self) -> str:
+        """Return a description of the remote backend in use."""
+        return self._device
+    def _encode(self, texts: Sequence[str]) -> np.ndarray:
+        embeddings = self._backend.embed(texts)
+        if embeddings.size == 0:
+            return embeddings
+        norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
+        norms[norms == 0] = 1.0
+        return embeddings / norms
+    def embed_texts(self, texts: Sequence[str]) -> np.ndarray:
+        """Public helper to encode arbitrary text batches."""
+        return self._encode(texts)
+    def search(self, query: str, files: Sequence[Path], top_k: int = 5) -> List[SearchResult]:
+        """Return the *top_k* most similar files for *query*."""
+        clean_query = query.strip()
+        if not clean_query:
+            raise ValueError("Query text must not be empty")
+        if not files:
+            return []
+        file_labels = [self._prepare_text(path) for path in files]
+        file_vectors = self._encode(file_labels)
+        query_vector = self._encode([clean_query])[0]
+        similarities = cosine_similarity(
+            query_vector.reshape(1, -1), file_vectors
+        )[0]
+        scored = [
+            SearchResult(path=path, score=float(score))
+            for path, score in zip(files, similarities)
+        ]
+        scored.sort(key=lambda item: item.score, reverse=True)
+        return scored[:top_k]
+    @staticmethod
+    def _prepare_text(path: Path) -> str:
+        """Return the text representation of a file path for embedding."""
+        return path.name.replace("_", " ")
+def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
+    if size is None or size <= 0:
+        yield items
+        return
+    for idx in range(0, len(items), size):
+        yield items[idx : idx + size]
+def _format_genai_error(exc: genai_errors.ClientError) -> str:
+    message = getattr(exc, "message", None) or str(exc)
+    if "API key" in message:
+        return Messages.ERROR_API_KEY_INVALID
+    return f"{Messages.ERROR_GENAI_PREFIX}{message}"

vexor-0.1.2/vexor/text.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""Centralized user-facing text for Vexor CLI."""
+from __future__ import annotations
+class Styles:
+    ERROR = "red"
+    WARNING = "yellow"
+    SUCCESS = "green"
+    INFO = "dim"
+    TITLE = "bold cyan"
+    TABLE_HEADER = "bold magenta"
+class Messages:
+    APP_HELP = "Vexor – A vector-powered CLI for semantic search over filenames."
+    HELP_QUERY = "Text used to semantically match file names."
+    HELP_SEARCH_PATH = "Root directory whose cached index will be used."
+    HELP_SEARCH_TOP = "Number of results to display."
+    HELP_INCLUDE_HIDDEN = "Use the index built with hidden files included."
+    HELP_INDEX_PATH = "Root directory to scan recursively for indexing."
+    HELP_INDEX_INCLUDE = "Include hidden files and directories when building the index."
+    HELP_SET_API_KEY = "Persist an API key in ~/.vexor/config.json."
+    HELP_CLEAR_API_KEY = "Remove the stored API key."
+    HELP_SET_MODEL = "Set the default embedding model."
+    HELP_SET_BATCH = "Set the default batch size (0 = single request)."
+    HELP_SHOW_CONFIG = "Show current configuration."
+    ERROR_API_KEY_MISSING = (
+        "Gemini API key is missing or still set to the placeholder. "
+        "Configure it via `vexor config --set-api-key <token>` or an environment variable."
+    )
+    ERROR_API_KEY_INVALID = (
+        "Gemini API key is invalid. Verify the stored token and try again."
+    )
+    ERROR_GENAI_PREFIX = "Gemini API request failed: "
+    ERROR_NO_EMBEDDINGS = "Gemini API returned no embeddings."
+    ERROR_EMPTY_QUERY = "Query text must not be empty."
+    ERROR_BATCH_NEGATIVE = "Batch size must be >= 0"
+    INFO_NO_FILES = "No files found in the selected directory."
+    INFO_NO_RESULTS = "No matching files found."
+    ERROR_INDEX_MISSING = (
+        "No cached index found for {path}. Run `vexor index --path \"{path}\"` first."
+    )
+    INFO_INDEX_SAVED = "Index saved to {path}."
+    INFO_INDEX_EMPTY = "Index contains no files."
+    INFO_INDEX_UP_TO_DATE = "Index already matches the current directory; nothing to do."
+    WARNING_INDEX_STALE = "Cached index for {path} appears outdated; run `vexor index --path \"{path}\"` to refresh."
+    INFO_API_SAVED = "API key saved."
+    INFO_API_CLEARED = "API key cleared."
+    INFO_MODEL_SET = "Default model set to {value}."
+    INFO_BATCH_SET = "Default batch size set to {value}."
+    INFO_CONFIG_SUMMARY = (
+        "API key set: {api}\n"
+        "Default model: {model}\n"
+        "Default batch size: {batch}"
+    )
+    TABLE_TITLE = "Vexor semantic file search results"
+    TABLE_HEADER_INDEX = "#"
+    TABLE_HEADER_SIMILARITY = "Similarity"
+    TABLE_HEADER_PATH = "File path"
+    TABLE_BACKEND_PREFIX = "Backend: "

vexor-0.1.2/vexor/utils.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Utility helpers for filesystem access and path handling."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Iterable, List
+import os
+def resolve_directory(path: Path | str) -> Path:
+    """Resolve and validate a user supplied directory path."""
+    dir_path = Path(path).expanduser().resolve()
+    if not dir_path.exists():
+        raise FileNotFoundError(f"Directory does not exist: {dir_path}")
+    if not dir_path.is_dir():
+        raise NotADirectoryError(f"Path is not a directory: {dir_path}")
+    return dir_path
+def collect_files(root: Path | str, include_hidden: bool = False) -> List[Path]:
+    """Recursively collect files under *root*, optionally keeping hidden entries."""
+    directory = resolve_directory(root)
+    files: List[Path] = []
+    for dirpath, dirnames, filenames in os.walk(directory):
+        if not include_hidden:
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
+            filenames = [f for f in filenames if not f.startswith(".")]
+        current_dir = Path(dirpath)
+        for filename in filenames:
+            files.append(current_dir / filename)
+    files.sort()
+    return files
+def format_path(path: Path, base: Path | None = None) -> str:
+    """Return a user friendly representation of *path* relative to *base* when possible."""
+    if base:
+        try:
+            relative = path.relative_to(base)
+            return f"./{relative.as_posix()}"
+        except ValueError:
+            return str(path)
+    return str(path)
+def ensure_positive(value: int, name: str) -> int:
+    """Validate that *value* is positive."""
+    if value <= 0:
+        raise ValueError(f"{name} must be greater than 0")
+    return value