vexor 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vexor might be problematic. Click here for more details.

vexor-0.1.2/.gitignore ADDED
@@ -0,0 +1,159 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ PIPE_MANIFEST
28
+
29
+ # PyInstaller
30
+ *.manifest
31
+ #*.spec
32
+
33
+ # Installer logs
34
+ pip-log.txt
35
+ pip-delete-this-directory.txt
36
+
37
+ # Unit test / coverage reports
38
+ htmlcov/
39
+ .tox/
40
+ .nox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ *.py,cover
48
+ .hypothesis/
49
+ .pytest_cache/
50
+ cover/
51
+
52
+ # Translations
53
+ *.mo
54
+ *.pot
55
+
56
+ # Django stuff:
57
+ *.log
58
+ local_settings.py
59
+ db.sqlite3
60
+ db.sqlite3-journal
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ .pybuilder/
74
+ target/
75
+
76
+ # Jupyter Notebook
77
+ .ipynb_checkpoints
78
+
79
+ # IPython
80
+ profile_default/
81
+ ipython_config.py
82
+
83
+ # pyenv
84
+ .python-version
85
+
86
+ # pipenv
87
+ Pipfile.lock
88
+
89
+ # poetry
90
+ poetry.lock
91
+
92
+ # pdm
93
+ .pdm.toml
94
+
95
+ # PEP 582
96
+ __pypackages__/
97
+
98
+ # Celery stuff
99
+ celerybeat-schedule
100
+ celerybeat.pid
101
+
102
+ # SageMath parsed files
103
+ *.sage.py
104
+
105
+ # Environments
106
+ .env
107
+ .venv
108
+ env/
109
+ venv/
110
+ ENV/
111
+ env.bak/
112
+ venv.bak/
113
+
114
+ # Spyder project settings
115
+ .spyderproject
116
+ .spyproject
117
+
118
+ # Rope project settings
119
+ .ropeproject
120
+
121
+ # mkdocs documentation
122
+ /site
123
+
124
+ # mypy
125
+ .mypy_cache/
126
+ .dmypy.json
127
+ dmypy.json
128
+
129
+ # Pyre type checker
130
+ .pyre/
131
+
132
+ # pytype static type analyzer
133
+ .pytype/
134
+
135
+ # Cython debug symbols
136
+ cython_debug/
137
+
138
+ # IDE specific files
139
+ .vscode/
140
+ .idea/
141
+ *.swp
142
+ *.swo
143
+ *~
144
+ .DS_Store
145
+
146
+ # SentenceTransformers cache
147
+ .cache/
148
+ sentence_transformers/
149
+
150
+ # Model cache (HuggingFace)
151
+ .transformers_cache/
152
+ models/
153
+
154
+ # Project specific
155
+ *.db
156
+ *.sqlite
157
+ test_data/
158
+ temp/
159
+ tmp/
vexor-0.1.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ScarletKc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
vexor-0.1.2/PKG-INFO ADDED
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: vexor
3
+ Version: 0.1.2
4
+ Summary: A vector-powered CLI for semantic search over filenames.
5
+ Project-URL: Repository, https://github.com/scarletkc/vexor
6
+ Author: scarletkc
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: ai,cli,semantic-search,typer
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
18
+ Classifier: Topic :: System :: Filesystems
19
+ Classifier: Topic :: Text Processing :: Indexing
20
+ Classifier: Topic :: Utilities
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: google-genai>=0.5.0
23
+ Requires-Dist: numpy>=1.23.0
24
+ Requires-Dist: python-dotenv>=1.0.0
25
+ Requires-Dist: rich>=13.0.0
26
+ Requires-Dist: scikit-learn>=1.3.0
27
+ Requires-Dist: typer>=0.9.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: build>=1.2.1; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
31
+ Requires-Dist: pytest>=7.4; extra == 'dev'
32
+ Requires-Dist: twine>=5.1.1; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # Vexor
36
+
37
+ Vexor is a vector-powered CLI that searches file names semantically. It uses Google GenAI's `gemini-embedding-001` model to embed file names and queries, then ranks matches with cosine similarity.
38
+
39
+ ## Install
40
+ ```bash
41
+ pip install -e .
42
+ ```
43
+ The CLI entry point is `vexor` (or `python -m vexor`).
44
+
45
+ ## Configure
46
+ Set the Gemini API key once and reuse it everywhere:
47
+ ```bash
48
+ vexor config --set-api-key "YOUR_KEY"
49
+ ```
50
+ Optional defaults:
51
+ ```bash
52
+ vexor config --set-model gemini-embedding-001
53
+ vexor config --set-batch-size 0 # 0 = single request
54
+ ```
55
+ Configuration is stored in `~/.vexor/config.json`.
56
+
57
+ ## Workflow
58
+ 1. **Index** the project root (includes every subdirectory):
59
+ ```bash
60
+ vexor index --path ~/projects/demo --include-hidden
61
+ ```
62
+ 2. **Search** from anywhere, pointing to the same path:
63
+ ```bash
64
+ vexor search "api client config" --path ~/projects/demo --top 5
65
+ ```
66
+ Output example:
67
+ ```
68
+ Vexor semantic file search results
69
+ ──────────────────────────────────
70
+ 1 0.923 ./src/config_loader.py
71
+ 2 0.871 ./src/utils/config_parse.py
72
+ 3 0.809 ./tests/test_config_loader.py
73
+ ```
74
+
75
+ Tips:
76
+ - Keep one index per project root; subdirectories need separate indexes only if you explicitly run `vexor index` on them.
77
+ - Hidden files are included only if both `index` and `search` use `--include-hidden`.
78
+
79
+ ## Commands
80
+ | Command | Description |
81
+ | ------- | ----------- |
82
+ | `vexor index --path PATH [--include-hidden]` | Recursively scans `PATH`, embeds file names, and writes a cache under `~/.vexor`. |
83
+ | `vexor search QUERY --path PATH [--top K] [--include-hidden]` | Loads the cached embeddings for `PATH` and ranks matches for `QUERY`. |
84
+ | `vexor config --set-api-key/--clear-api-key` | Manage the stored Gemini API key. |
85
+ | `vexor config --set-model/--set-batch-size/--show` | Manage default model and batch size. |
86
+
87
+ ## Development
88
+ Run tests with:
89
+ ```bash
90
+ pip install -e .[dev]
91
+ pytest
92
+ ```
93
+ Tests rely on fake embedding backends, so no network access is required.
94
+
95
+ Cache files and configuration live in `~/.vexor`. Adjust `_label_for_path` or `VexorSearcher._prepare_text` if you need to encode additional context (e.g., relative paths).
vexor-0.1.2/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # Vexor
2
+
3
+ Vexor is a vector-powered CLI that searches file names semantically. It uses Google GenAI's `gemini-embedding-001` model to embed file names and queries, then ranks matches with cosine similarity.
4
+
5
+ ## Install
6
+ ```bash
7
+ pip install -e .
8
+ ```
9
+ The CLI entry point is `vexor` (or `python -m vexor`).
10
+
11
+ ## Configure
12
+ Set the Gemini API key once and reuse it everywhere:
13
+ ```bash
14
+ vexor config --set-api-key "YOUR_KEY"
15
+ ```
16
+ Optional defaults:
17
+ ```bash
18
+ vexor config --set-model gemini-embedding-001
19
+ vexor config --set-batch-size 0 # 0 = single request
20
+ ```
21
+ Configuration is stored in `~/.vexor/config.json`.
22
+
23
+ ## Workflow
24
+ 1. **Index** the project root (includes every subdirectory):
25
+ ```bash
26
+ vexor index --path ~/projects/demo --include-hidden
27
+ ```
28
+ 2. **Search** from anywhere, pointing to the same path:
29
+ ```bash
30
+ vexor search "api client config" --path ~/projects/demo --top 5
31
+ ```
32
+ Output example:
33
+ ```
34
+ Vexor semantic file search results
35
+ ──────────────────────────────────
36
+ 1 0.923 ./src/config_loader.py
37
+ 2 0.871 ./src/utils/config_parse.py
38
+ 3 0.809 ./tests/test_config_loader.py
39
+ ```
40
+
41
+ Tips:
42
+ - Keep one index per project root; subdirectories need separate indexes only if you explicitly run `vexor index` on them.
43
+ - Hidden files are included only if both `index` and `search` use `--include-hidden`.
44
+
45
+ ## Commands
46
+ | Command | Description |
47
+ | ------- | ----------- |
48
+ | `vexor index --path PATH [--include-hidden]` | Recursively scans `PATH`, embeds file names, and writes a cache under `~/.vexor`. |
49
+ | `vexor search QUERY --path PATH [--top K] [--include-hidden]` | Loads the cached embeddings for `PATH` and ranks matches for `QUERY`. |
50
+ | `vexor config --set-api-key/--clear-api-key` | Manage the stored Gemini API key. |
51
+ | `vexor config --set-model/--set-batch-size/--show` | Manage default model and batch size. |
52
+
53
+ ## Development
54
+ Run tests with:
55
+ ```bash
56
+ pip install -e .[dev]
57
+ pytest
58
+ ```
59
+ Tests rely on fake embedding backends, so no network access is required.
60
+
61
+ Cache files and configuration live in `~/.vexor`. Adjust `_label_for_path` or `VexorSearcher._prepare_text` if you need to encode additional context (e.g., relative paths).
@@ -0,0 +1,68 @@
1
+
2
+ [build-system]
3
+ requires = ["hatchling>=1.21"]
4
+ build-backend = "hatchling.build"
5
+
6
+ [project]
7
+ name = "vexor"
8
+ description = "A vector-powered CLI for semantic search over filenames."
9
+ readme = "README.md"
10
+ authors = [{ name = "scarletkc" }]
11
+ license = { text = "MIT" }
12
+ requires-python = ">=3.9"
13
+ keywords = ["semantic-search", "cli", "typer", "ai"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Environment :: Console",
17
+ "Intended Audience :: End Users/Desktop",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python",
21
+ "Programming Language :: Python :: 3",
22
+ "Topic :: Utilities",
23
+ "Topic :: System :: Filesystems",
24
+ "Topic :: Text Processing :: Indexing",
25
+ "Topic :: Scientific/Engineering :: Information Analysis",
26
+ ]
27
+ dependencies = [
28
+ "google-genai>=0.5.0",
29
+ "python-dotenv>=1.0.0",
30
+ "scikit-learn>=1.3.0",
31
+ "numpy>=1.23.0",
32
+ "typer>=0.9.0",
33
+ "rich>=13.0.0",
34
+ ]
35
+ dynamic = ["version"]
36
+
37
+
38
+ [project.optional-dependencies]
39
+ dev = [
40
+ "pytest>=7.4",
41
+ "pytest-cov>=4.1",
42
+ "build>=1.2.1",
43
+ "twine>=5.1.1",
44
+ ]
45
+
46
+ [project.urls]
47
+ Repository = "https://github.com/scarletkc/vexor"
48
+
49
+ [project.scripts]
50
+ vexor = "vexor.cli:run"
51
+
52
+ [tool.hatch.version]
53
+ path = "vexor/__init__.py"
54
+
55
+ [tool.hatch.build.targets.sdist]
56
+ include = [
57
+ "vexor",
58
+ "README.md",
59
+ "LICENSE",
60
+ "pyproject.toml",
61
+ ]
62
+
63
+ [tool.hatch.build.targets.wheel]
64
+ packages = ["vexor"]
65
+
66
+ [tool.pytest.ini_options]
67
+ addopts = "-ra"
68
+ pythonpath = ["."]
@@ -0,0 +1,12 @@
1
+ """Vexor package initialization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ["__version__", "get_version"]
6
+
7
+ __version__ = "0.1.2"
8
+
9
+
10
+ def get_version() -> str:
11
+ """Return the current package version."""
12
+ return __version__
@@ -0,0 +1,18 @@
1
+ """Entry point for `python -m vexor` and frozen builds."""
2
+
3
+ from __future__ import annotations
4
+
5
+ try:
6
+ # Normal package execution path
7
+ from .cli import run
8
+ except ImportError: # pragma: no cover - happens in frozen single-file builds
9
+ from vexor.cli import run # type: ignore[import]
10
+
11
+
12
+ def main() -> None:
13
+ """Execute the Typer application."""
14
+ run()
15
+
16
+
17
+ if __name__ == "__main__":
18
+ raise SystemExit(main())
@@ -0,0 +1,134 @@
1
+ """Index cache helpers for Vexor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import os
8
+ from dataclasses import dataclass
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from typing import Iterable, Sequence
12
+
13
+ import numpy as np
14
+
15
+ from .utils import collect_files
16
+
17
+ CACHE_DIR = Path(os.path.expanduser("~")) / ".vexor"
18
+ CACHE_VERSION = 1
19
+
20
+
21
+ def _safe_model_name(model: str) -> str:
22
+ return model.replace("/", "_")
23
+
24
+
25
+ def _cache_key(root: Path, include_hidden: bool) -> str:
26
+ digest = hashlib.sha1(f"{root.resolve()}|hidden={include_hidden}".encode("utf-8")).hexdigest()
27
+ return digest
28
+
29
+
30
+ def cache_file(root: Path, model: str, include_hidden: bool) -> Path:
31
+ key = _cache_key(root, include_hidden)
32
+ safe_model = _safe_model_name(model)
33
+ return CACHE_DIR / f"{key}-{safe_model}.json"
34
+
35
+
36
+ def ensure_cache_dir() -> Path:
37
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
38
+ return CACHE_DIR
39
+
40
+
41
+ def store_index(
42
+ *,
43
+ root: Path,
44
+ model: str,
45
+ include_hidden: bool,
46
+ files: Sequence[Path],
47
+ embeddings: np.ndarray,
48
+ ) -> Path:
49
+ ensure_cache_dir()
50
+ payload = {
51
+ "version": CACHE_VERSION,
52
+ "generated_at": datetime.now(timezone.utc).isoformat(),
53
+ "root": str(root),
54
+ "model": model,
55
+ "include_hidden": include_hidden,
56
+ "dimension": int(embeddings.shape[1] if embeddings.size else 0),
57
+ "files": [],
58
+ }
59
+ for idx, file in enumerate(files):
60
+ stat = file.stat()
61
+ try:
62
+ rel_path = file.relative_to(root)
63
+ except ValueError:
64
+ rel_path = file
65
+ payload["files"].append(
66
+ {
67
+ "path": str(rel_path),
68
+ "absolute": str(file),
69
+ "mtime": stat.st_mtime,
70
+ "size": stat.st_size,
71
+ "embedding": embeddings[idx].astype(float).tolist(),
72
+ }
73
+ )
74
+ path = cache_file(root, model, include_hidden)
75
+ path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
76
+ return path
77
+
78
+
79
+ def load_index(root: Path, model: str, include_hidden: bool) -> dict:
80
+ path = cache_file(root, model, include_hidden)
81
+ if not path.exists():
82
+ raise FileNotFoundError(path)
83
+ text = path.read_text(encoding="utf-8")
84
+ return json.loads(text)
85
+
86
+
87
+ def load_index_vectors(root: Path, model: str, include_hidden: bool):
88
+ data = load_index(root, model, include_hidden)
89
+ files = data.get("files", [])
90
+ paths = [root / Path(entry["path"]) for entry in files]
91
+ embeddings = np.asarray([entry["embedding"] for entry in files], dtype=np.float32)
92
+ return paths, embeddings, data
93
+
94
+
95
+ def compare_snapshot(
96
+ root: Path,
97
+ include_hidden: bool,
98
+ cached_files: Sequence[dict],
99
+ current_files: Sequence[Path] | None = None,
100
+ ) -> bool:
101
+ """Return True if the current filesystem matches the cached snapshot."""
102
+ if current_files is None:
103
+ current_files = collect_files(root, include_hidden=include_hidden)
104
+ if len(current_files) != len(cached_files):
105
+ return False
106
+ cached_map = {
107
+ entry["path"]: (entry["mtime"], entry.get("size"))
108
+ for entry in cached_files
109
+ }
110
+ for file in current_files:
111
+ rel = _relative_path(file, root)
112
+ data = cached_map.get(rel)
113
+ if data is None:
114
+ return False
115
+ cached_mtime, cached_size = data
116
+ stat = file.stat()
117
+ current_mtime = stat.st_mtime
118
+ current_size = stat.st_size
119
+ # allow drift due to filesystem precision (approx 0.5s on some platforms)
120
+ if abs(current_mtime - cached_mtime) > 5e-1:
121
+ if cached_size is not None and cached_size == current_size:
122
+ continue
123
+ return False
124
+ if cached_size is not None and cached_size != current_size:
125
+ return False
126
+ return True
127
+
128
+
129
+ def _relative_path(path: Path, root: Path) -> str:
130
+ try:
131
+ rel = path.relative_to(root)
132
+ except ValueError:
133
+ rel = path
134
+ return str(rel)
@@ -0,0 +1,334 @@
1
+ """Command line interface for Vexor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Sequence
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ from . import __version__
14
+ from .config import (
15
+ DEFAULT_BATCH_SIZE,
16
+ DEFAULT_MODEL,
17
+ load_config,
18
+ set_api_key,
19
+ set_batch_size,
20
+ set_model,
21
+ )
22
+ from .text import Messages, Styles
23
+ from .utils import collect_files, resolve_directory, format_path, ensure_positive
24
+
25
+ console = Console()
26
+ app = typer.Typer(
27
+ help=Messages.APP_HELP,
28
+ no_args_is_help=True,
29
+ context_settings={"help_option_names": ["-h", "--help"]},
30
+ )
31
+
32
+
33
+ @dataclass(slots=True)
34
+ class DisplayResult:
35
+ path: Path
36
+ score: float
37
+
38
+
39
+ def _version_callback(value: bool) -> None:
40
+ if value:
41
+ console.print(f"Vexor v{__version__}")
42
+ raise typer.Exit()
43
+
44
+
45
+ @app.callback()
46
+ def main(
47
+ version: bool = typer.Option(
48
+ False,
49
+ "--version",
50
+ "-v",
51
+ callback=_version_callback,
52
+ is_eager=True,
53
+ help="Show version and exit.",
54
+ )
55
+ ) -> None:
56
+ """Global Typer callback for shared options."""
57
+ return None
58
+
59
+
60
+ @app.command()
61
+ def search(
62
+ query: str = typer.Argument(..., help=Messages.HELP_QUERY),
63
+ path: Path = typer.Option(
64
+ Path.cwd(),
65
+ "--path",
66
+ "-p",
67
+ help=Messages.HELP_SEARCH_PATH,
68
+ ),
69
+ top: int = typer.Option(5, "--top", "-k", help=Messages.HELP_SEARCH_TOP),
70
+ include_hidden: bool = typer.Option(
71
+ False,
72
+ "--include-hidden",
73
+ help=Messages.HELP_INCLUDE_HIDDEN,
74
+ ),
75
+ ) -> None:
76
+ """Run the semantic search using a cached index."""
77
+ config = load_config()
78
+ model_name = config.model or DEFAULT_MODEL
79
+ batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
80
+
81
+ clean_query = query.strip()
82
+ if not clean_query:
83
+ console.print(_styled(Messages.ERROR_EMPTY_QUERY, Styles.ERROR))
84
+ raise typer.Exit(code=1)
85
+ try:
86
+ ensure_positive(top, "top")
87
+ except ValueError as exc: # pragma: no cover - validated by Typer
88
+ raise typer.BadParameter(str(exc), param_name="top") from exc
89
+
90
+ directory = resolve_directory(path)
91
+ try:
92
+ cached_paths, file_vectors, meta = _load_index(directory, model_name, include_hidden)
93
+ except FileNotFoundError:
94
+ console.print(
95
+ _styled(Messages.ERROR_INDEX_MISSING.format(path=directory), Styles.ERROR)
96
+ )
97
+ raise typer.Exit(code=1)
98
+
99
+ _warn_if_stale(directory, include_hidden, meta.get("files", []))
100
+
101
+ if not cached_paths:
102
+ console.print(_styled(Messages.INFO_INDEX_EMPTY, Styles.WARNING))
103
+ raise typer.Exit(code=0)
104
+
105
+ searcher = _create_searcher(model_name=model_name, batch_size=batch_size)
106
+ try:
107
+ query_vector = searcher.embed_texts([clean_query])[0]
108
+ except RuntimeError as exc:
109
+ console.print(_styled(str(exc), Styles.ERROR))
110
+ raise typer.Exit(code=1)
111
+
112
+ from sklearn.metrics.pairwise import cosine_similarity # local import
113
+
114
+ similarities = cosine_similarity(
115
+ query_vector.reshape(1, -1), file_vectors
116
+ )[0]
117
+ scored = [
118
+ DisplayResult(path=path, score=float(score))
119
+ for path, score in zip(cached_paths, similarities)
120
+ ]
121
+ scored.sort(key=lambda item: item.score, reverse=True)
122
+ results = scored[:top]
123
+
124
+ if not results:
125
+ console.print(_styled(Messages.INFO_NO_RESULTS, Styles.WARNING))
126
+ raise typer.Exit(code=0)
127
+
128
+ _render_results(results, directory, searcher.device)
129
+
130
+
131
+ @app.command()
132
+ def index(
133
+ path: Path = typer.Option(
134
+ Path.cwd(),
135
+ "--path",
136
+ "-p",
137
+ help=Messages.HELP_INDEX_PATH,
138
+ ),
139
+ include_hidden: bool = typer.Option(
140
+ False,
141
+ "--include-hidden",
142
+ help=Messages.HELP_INDEX_INCLUDE,
143
+ ),
144
+ ) -> None:
145
+ """Create or refresh the cached index for the given directory."""
146
+ config = load_config()
147
+ model_name = config.model or DEFAULT_MODEL
148
+ batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
149
+
150
+ directory = resolve_directory(path)
151
+ files = collect_files(directory, include_hidden=include_hidden)
152
+ if not files:
153
+ console.print(_styled(Messages.INFO_NO_FILES, Styles.WARNING))
154
+ raise typer.Exit(code=0)
155
+
156
+ existing_meta = _load_index_metadata_safe(directory, model_name, include_hidden)
157
+ if existing_meta:
158
+ cached_files = existing_meta.get("files", [])
159
+ if cached_files and _is_cache_current(
160
+ directory, include_hidden, cached_files, current_files=files
161
+ ):
162
+ console.print(
163
+ _styled(Messages.INFO_INDEX_UP_TO_DATE.format(path=directory), Styles.INFO)
164
+ )
165
+ return
166
+
167
+ searcher = _create_searcher(model_name=model_name, batch_size=batch_size)
168
+ file_labels = [_label_for_path(file) for file in files]
169
+ embeddings = searcher.embed_texts(file_labels)
170
+
171
+ cache_path = _store_index(
172
+ root=directory,
173
+ model=model_name,
174
+ include_hidden=include_hidden,
175
+ files=files,
176
+ embeddings=embeddings,
177
+ )
178
+ console.print(_styled(Messages.INFO_INDEX_SAVED.format(path=cache_path), Styles.SUCCESS))
179
+
180
+
181
+ @app.command()
182
+ def config(
183
+ set_api_key_option: str | None = typer.Option(
184
+ None,
185
+ "--set-api-key",
186
+ help=Messages.HELP_SET_API_KEY,
187
+ ),
188
+ clear_api_key: bool = typer.Option(
189
+ False,
190
+ "--clear-api-key",
191
+ help=Messages.HELP_CLEAR_API_KEY,
192
+ ),
193
+ set_model_option: str | None = typer.Option(
194
+ None,
195
+ "--set-model",
196
+ help=Messages.HELP_SET_MODEL,
197
+ ),
198
+ set_batch_option: int | None = typer.Option(
199
+ None,
200
+ "--set-batch-size",
201
+ help=Messages.HELP_SET_BATCH,
202
+ ),
203
+ show: bool = typer.Option(
204
+ False,
205
+ "--show",
206
+ help=Messages.HELP_SHOW_CONFIG,
207
+ ),
208
+ ) -> None:
209
+ """Manage Vexor configuration stored in ~/.vexor/config.json."""
210
+ changed = False
211
+
212
+ if set_api_key_option is not None:
213
+ set_api_key(set_api_key_option)
214
+ console.print(_styled(Messages.INFO_API_SAVED, Styles.SUCCESS))
215
+ changed = True
216
+ if clear_api_key:
217
+ set_api_key(None)
218
+ console.print(_styled(Messages.INFO_API_CLEARED, Styles.SUCCESS))
219
+ changed = True
220
+ if set_model_option is not None:
221
+ set_model(set_model_option)
222
+ console.print(
223
+ _styled(Messages.INFO_MODEL_SET.format(value=set_model_option), Styles.SUCCESS)
224
+ )
225
+ changed = True
226
+ if set_batch_option is not None:
227
+ if set_batch_option < 0:
228
+ raise typer.BadParameter(Messages.ERROR_BATCH_NEGATIVE)
229
+ set_batch_size(set_batch_option)
230
+ console.print(
231
+ _styled(Messages.INFO_BATCH_SET.format(value=set_batch_option), Styles.SUCCESS)
232
+ )
233
+ changed = True
234
+
235
+ if show or not changed:
236
+ cfg = load_config()
237
+ console.print(
238
+ _styled(
239
+ Messages.INFO_CONFIG_SUMMARY.format(
240
+ api="yes" if cfg.api_key else "no",
241
+ model=cfg.model or DEFAULT_MODEL,
242
+ batch=cfg.batch_size if cfg.batch_size is not None else DEFAULT_BATCH_SIZE,
243
+ ),
244
+ Styles.INFO,
245
+ )
246
+ )
247
+
248
+
249
+ def _render_results(results: Sequence[DisplayResult], base: Path, backend: str | None) -> None:
250
+ console.print(_styled(Messages.TABLE_TITLE, Styles.TITLE))
251
+ if backend:
252
+ console.print(_styled(f"{Messages.TABLE_BACKEND_PREFIX}{backend}", Styles.INFO))
253
+ table = Table(show_header=True, header_style=Styles.TABLE_HEADER)
254
+ table.add_column(Messages.TABLE_HEADER_INDEX, justify="right")
255
+ table.add_column(Messages.TABLE_HEADER_SIMILARITY, justify="right")
256
+ table.add_column(Messages.TABLE_HEADER_PATH, overflow="fold")
257
+ for idx, result in enumerate(results, start=1):
258
+ table.add_row(
259
+ str(idx),
260
+ f"{result.score:.3f}",
261
+ format_path(result.path, base),
262
+ )
263
+ console.print(table)
264
+
265
+
266
+ def _create_searcher(model_name: str, batch_size: int):
267
+ from .search import VexorSearcher # Local import keeps CLI startup fast
268
+
269
+ return VexorSearcher(model_name=model_name, batch_size=batch_size)
270
+
271
+
272
+ def _label_for_path(path: Path) -> str:
273
+ return path.name.replace("_", " ")
274
+
275
+
276
+ def _load_index(root: Path, model: str, include_hidden: bool):
277
+ from .cache import load_index_vectors # local import
278
+
279
+ return load_index_vectors(root, model, include_hidden)
280
+
281
+
282
+ def _load_index_metadata_safe(root: Path, model: str, include_hidden: bool):
283
+ from .cache import load_index # local import
284
+
285
+ try:
286
+ return load_index(root, model, include_hidden)
287
+ except FileNotFoundError:
288
+ return None
289
+
290
+
291
+ def _store_index(**kwargs):
292
+ from .cache import store_index # local import
293
+
294
+ return store_index(**kwargs)
295
+
296
+
297
+ def _is_cache_current(
298
+ root: Path,
299
+ include_hidden: bool,
300
+ cached_files: Sequence[dict],
301
+ *,
302
+ current_files: Sequence[Path] | None = None,
303
+ ) -> bool:
304
+ if not cached_files:
305
+ return False
306
+ from .cache import compare_snapshot # local import
307
+
308
+ return compare_snapshot(
309
+ root,
310
+ include_hidden,
311
+ cached_files,
312
+ current_files=current_files,
313
+ )
314
+
315
+
316
+ def _warn_if_stale(root: Path, include_hidden: bool, cached_files: Sequence[dict]) -> None:
317
+ if not cached_files:
318
+ return
319
+ if not _is_cache_current(root, include_hidden, cached_files):
320
+ console.print(
321
+ _styled(Messages.WARNING_INDEX_STALE.format(path=root), Styles.WARNING)
322
+ )
323
+
324
+
325
+ def _styled(text: str, style: str) -> str:
326
+ return f"[{style}]{text}[/{style}]"
327
+
328
+
329
+ def run(argv: list[str] | None = None) -> None:
330
+ """Entry point wrapper allowing optional argument override."""
331
+ if argv is None:
332
+ app()
333
+ else:
334
+ app(args=list(argv))
@@ -0,0 +1,62 @@
1
+ """Global configuration management for Vexor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any, Dict
10
+
11
+ CONFIG_DIR = Path(os.path.expanduser("~")) / ".vexor"
12
+ CONFIG_FILE = CONFIG_DIR / "config.json"
13
+ DEFAULT_MODEL = "gemini-embedding-001"
14
+ DEFAULT_BATCH_SIZE = 0
15
+ ENV_API_KEY = "GOOGLE_GENAI_API_KEY"
16
+
17
+
18
+ @dataclass
19
+ class Config:
20
+ api_key: str | None = None
21
+ model: str = DEFAULT_MODEL
22
+ batch_size: int = DEFAULT_BATCH_SIZE
23
+
24
+
25
+ def load_config() -> Config:
26
+ if not CONFIG_FILE.exists():
27
+ return Config()
28
+ raw = json.loads(CONFIG_FILE.read_text(encoding="utf-8"))
29
+ return Config(
30
+ api_key=raw.get("api_key") or None,
31
+ model=raw.get("model") or DEFAULT_MODEL,
32
+ batch_size=int(raw.get("batch_size", DEFAULT_BATCH_SIZE)),
33
+ )
34
+
35
+
36
+ def save_config(config: Config) -> None:
37
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
38
+ data: Dict[str, Any] = {}
39
+ if config.api_key:
40
+ data["api_key"] = config.api_key
41
+ if config.model:
42
+ data["model"] = config.model
43
+ data["batch_size"] = config.batch_size
44
+ CONFIG_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
45
+
46
+
47
+ def set_api_key(value: str | None) -> None:
48
+ config = load_config()
49
+ config.api_key = value
50
+ save_config(config)
51
+
52
+
53
+ def set_model(value: str) -> None:
54
+ config = load_config()
55
+ config.model = value
56
+ save_config(config)
57
+
58
+
59
+ def set_batch_size(value: int) -> None:
60
+ config = load_config()
61
+ config.batch_size = value
62
+ save_config(config)
@@ -0,0 +1,152 @@
1
+ """Semantic search helpers backed by the Google Gemini embedding API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Iterator, List, Protocol, Sequence
9
+
10
+ import numpy as np
11
+ from dotenv import load_dotenv
12
+ from google import genai
13
+ from google.genai import errors as genai_errors
14
+ from sklearn.metrics.pairwise import cosine_similarity
15
+
16
+ from .config import DEFAULT_MODEL, ENV_API_KEY, load_config
17
+ from .text import Messages
18
+
19
+
20
+ @dataclass(slots=True)
21
+ class SearchResult:
22
+ """Container describing a single semantic search hit."""
23
+
24
+ path: Path
25
+ score: float
26
+
27
+
28
+ class EmbeddingBackend(Protocol):
29
+ """Minimal protocol for components that can embed text batches."""
30
+
31
+ def embed(self, texts: Sequence[str]) -> np.ndarray:
32
+ """Return embeddings for *texts* as a 2D numpy array."""
33
+ raise NotImplementedError # pragma: no cover
34
+
35
+
36
+ class GeminiEmbeddingBackend:
37
+ """Embedding backend that calls the Gemini API via google-genai."""
38
+
39
+ def __init__(
40
+ self,
41
+ *,
42
+ model_name: str = DEFAULT_MODEL,
43
+ api_key: str | None = None,
44
+ chunk_size: int | None = None,
45
+ ) -> None:
46
+ load_dotenv()
47
+ config = load_config()
48
+ self.model_name = model_name
49
+ self.chunk_size = chunk_size if chunk_size and chunk_size > 0 else None
50
+ env_key = os.getenv(ENV_API_KEY)
51
+ configured_key = getattr(config, "api_key", None)
52
+ self.api_key = api_key or configured_key or env_key
53
+ if not self.api_key or self.api_key.strip().lower() == "your_api_key_here":
54
+ raise RuntimeError(Messages.ERROR_API_KEY_MISSING)
55
+ self._client = genai.Client(api_key=self.api_key)
56
+
57
+ def embed(self, texts: Sequence[str]) -> np.ndarray:
58
+ if not texts:
59
+ return np.empty((0, 0), dtype=np.float32)
60
+ vectors: list[np.ndarray] = []
61
+ for chunk in _chunk(texts, self.chunk_size):
62
+ try:
63
+ response = self._client.models.embed_content(
64
+ model=self.model_name,
65
+ contents=list(chunk),
66
+ )
67
+ except genai_errors.ClientError as exc:
68
+ raise RuntimeError(_format_genai_error(exc)) from exc
69
+ embeddings = getattr(response, "embeddings", None)
70
+ if not embeddings:
71
+ raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
72
+ for embedding in embeddings:
73
+ values = getattr(embedding, "values", None) or getattr(
74
+ embedding, "value", None
75
+ )
76
+ vectors.append(np.asarray(values, dtype=np.float32))
77
+ return np.vstack(vectors)
78
+
79
+
80
+ class VexorSearcher:
81
+ """Encapsulates embedding generation and similarity computation."""
82
+
83
+ def __init__(
84
+ self,
85
+ model_name: str = DEFAULT_MODEL,
86
+ *,
87
+ backend: EmbeddingBackend | None = None,
88
+ batch_size: int = 0,
89
+ ) -> None:
90
+ self.model_name = model_name
91
+ self.batch_size = max(batch_size, 0)
92
+ self._backend = backend or GeminiEmbeddingBackend(
93
+ model_name=model_name, chunk_size=self.batch_size
94
+ )
95
+ self._device = f"{self.model_name} via Gemini API"
96
+
97
+ @property
98
+ def device(self) -> str:
99
+ """Return a description of the remote backend in use."""
100
+ return self._device
101
+
102
+ def _encode(self, texts: Sequence[str]) -> np.ndarray:
103
+ embeddings = self._backend.embed(texts)
104
+ if embeddings.size == 0:
105
+ return embeddings
106
+ norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
107
+ norms[norms == 0] = 1.0
108
+ return embeddings / norms
109
+
110
+ def embed_texts(self, texts: Sequence[str]) -> np.ndarray:
111
+ """Public helper to encode arbitrary text batches."""
112
+ return self._encode(texts)
113
+
114
+ def search(self, query: str, files: Sequence[Path], top_k: int = 5) -> List[SearchResult]:
115
+ """Return the *top_k* most similar files for *query*."""
116
+ clean_query = query.strip()
117
+ if not clean_query:
118
+ raise ValueError("Query text must not be empty")
119
+ if not files:
120
+ return []
121
+ file_labels = [self._prepare_text(path) for path in files]
122
+ file_vectors = self._encode(file_labels)
123
+ query_vector = self._encode([clean_query])[0]
124
+ similarities = cosine_similarity(
125
+ query_vector.reshape(1, -1), file_vectors
126
+ )[0]
127
+ scored = [
128
+ SearchResult(path=path, score=float(score))
129
+ for path, score in zip(files, similarities)
130
+ ]
131
+ scored.sort(key=lambda item: item.score, reverse=True)
132
+ return scored[:top_k]
133
+
134
+ @staticmethod
135
+ def _prepare_text(path: Path) -> str:
136
+ """Return the text representation of a file path for embedding."""
137
+ return path.name.replace("_", " ")
138
+
139
+
140
+ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
141
+ if size is None or size <= 0:
142
+ yield items
143
+ return
144
+ for idx in range(0, len(items), size):
145
+ yield items[idx : idx + size]
146
+
147
+
148
+ def _format_genai_error(exc: genai_errors.ClientError) -> str:
149
+ message = getattr(exc, "message", None) or str(exc)
150
+ if "API key" in message:
151
+ return Messages.ERROR_API_KEY_INVALID
152
+ return f"{Messages.ERROR_GENAI_PREFIX}{message}"
@@ -0,0 +1,63 @@
1
+ """Centralized user-facing text for Vexor CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ class Styles:
6
+ ERROR = "red"
7
+ WARNING = "yellow"
8
+ SUCCESS = "green"
9
+ INFO = "dim"
10
+ TITLE = "bold cyan"
11
+ TABLE_HEADER = "bold magenta"
12
+
13
+
14
+ class Messages:
15
+ APP_HELP = "Vexor – A vector-powered CLI for semantic search over filenames."
16
+ HELP_QUERY = "Text used to semantically match file names."
17
+ HELP_SEARCH_PATH = "Root directory whose cached index will be used."
18
+ HELP_SEARCH_TOP = "Number of results to display."
19
+ HELP_INCLUDE_HIDDEN = "Use the index built with hidden files included."
20
+ HELP_INDEX_PATH = "Root directory to scan recursively for indexing."
21
+ HELP_INDEX_INCLUDE = "Include hidden files and directories when building the index."
22
+ HELP_SET_API_KEY = "Persist an API key in ~/.vexor/config.json."
23
+ HELP_CLEAR_API_KEY = "Remove the stored API key."
24
+ HELP_SET_MODEL = "Set the default embedding model."
25
+ HELP_SET_BATCH = "Set the default batch size (0 = single request)."
26
+ HELP_SHOW_CONFIG = "Show current configuration."
27
+
28
+ ERROR_API_KEY_MISSING = (
29
+ "Gemini API key is missing or still set to the placeholder. "
30
+ "Configure it via `vexor config --set-api-key <token>` or an environment variable."
31
+ )
32
+ ERROR_API_KEY_INVALID = (
33
+ "Gemini API key is invalid. Verify the stored token and try again."
34
+ )
35
+ ERROR_GENAI_PREFIX = "Gemini API request failed: "
36
+ ERROR_NO_EMBEDDINGS = "Gemini API returned no embeddings."
37
+ ERROR_EMPTY_QUERY = "Query text must not be empty."
38
+ ERROR_BATCH_NEGATIVE = "Batch size must be >= 0"
39
+
40
+ INFO_NO_FILES = "No files found in the selected directory."
41
+ INFO_NO_RESULTS = "No matching files found."
42
+ ERROR_INDEX_MISSING = (
43
+ "No cached index found for {path}. Run `vexor index --path \"{path}\"` first."
44
+ )
45
+ INFO_INDEX_SAVED = "Index saved to {path}."
46
+ INFO_INDEX_EMPTY = "Index contains no files."
47
+ INFO_INDEX_UP_TO_DATE = "Index already matches the current directory; nothing to do."
48
+ WARNING_INDEX_STALE = "Cached index for {path} appears outdated; run `vexor index --path \"{path}\"` to refresh."
49
+ INFO_API_SAVED = "API key saved."
50
+ INFO_API_CLEARED = "API key cleared."
51
+ INFO_MODEL_SET = "Default model set to {value}."
52
+ INFO_BATCH_SET = "Default batch size set to {value}."
53
+ INFO_CONFIG_SUMMARY = (
54
+ "API key set: {api}\n"
55
+ "Default model: {model}\n"
56
+ "Default batch size: {batch}"
57
+ )
58
+
59
+ TABLE_TITLE = "Vexor semantic file search results"
60
+ TABLE_HEADER_INDEX = "#"
61
+ TABLE_HEADER_SIMILARITY = "Similarity"
62
+ TABLE_HEADER_PATH = "File path"
63
+ TABLE_BACKEND_PREFIX = "Backend: "
@@ -0,0 +1,50 @@
1
+ """Utility helpers for filesystem access and path handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Iterable, List
7
+ import os
8
+
9
+
10
+ def resolve_directory(path: Path | str) -> Path:
11
+ """Resolve and validate a user supplied directory path."""
12
+ dir_path = Path(path).expanduser().resolve()
13
+ if not dir_path.exists():
14
+ raise FileNotFoundError(f"Directory does not exist: {dir_path}")
15
+ if not dir_path.is_dir():
16
+ raise NotADirectoryError(f"Path is not a directory: {dir_path}")
17
+ return dir_path
18
+
19
+
20
+ def collect_files(root: Path | str, include_hidden: bool = False) -> List[Path]:
21
+ """Recursively collect files under *root*, optionally keeping hidden entries."""
22
+ directory = resolve_directory(root)
23
+ files: List[Path] = []
24
+ for dirpath, dirnames, filenames in os.walk(directory):
25
+ if not include_hidden:
26
+ dirnames[:] = [d for d in dirnames if not d.startswith(".")]
27
+ filenames = [f for f in filenames if not f.startswith(".")]
28
+ current_dir = Path(dirpath)
29
+ for filename in filenames:
30
+ files.append(current_dir / filename)
31
+ files.sort()
32
+ return files
33
+
34
+
35
+ def format_path(path: Path, base: Path | None = None) -> str:
36
+ """Return a user friendly representation of *path* relative to *base* when possible."""
37
+ if base:
38
+ try:
39
+ relative = path.relative_to(base)
40
+ return f"./{relative.as_posix()}"
41
+ except ValueError:
42
+ return str(path)
43
+ return str(path)
44
+
45
+
46
+ def ensure_positive(value: int, name: str) -> int:
47
+ """Validate that *value* is positive."""
48
+ if value <= 0:
49
+ raise ValueError(f"{name} must be greater than 0")
50
+ return value