vault-ask 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 guillaumevele
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: vault-ask
3
+ Version: 0.1.1
4
+ Summary: Ask your Obsidian vault, get cited answers, never hallucinate.
5
+ Author: guillaumevele
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/guillaumevele/vault-ask
8
+ Project-URL: Repository, https://github.com/guillaumevele/vault-ask
9
+ Project-URL: Issues, https://github.com/guillaumevele/vault-ask/issues
10
+ Keywords: obsidian,rag,llm,cli,knowledge-management,second-brain,ripgrep,grounded-generation,note-taking
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: End Users/Desktop
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Text Processing :: Indexing
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Dynamic: license-file
28
+
29
+ # vault-ask
30
+
31
+ [![CI](https://github.com/guillaumevele/vault-ask/actions/workflows/ci.yml/badge.svg)](https://github.com/guillaumevele/vault-ask/actions/workflows/ci.yml)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
33
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/)
34
+ [![Zero dependencies](https://img.shields.io/badge/dependencies-zero-success.svg)](pyproject.toml)
35
+
36
+ **Ask your Obsidian vault. Get cited answers. Never hallucinate.**
37
+
38
+ A tiny (~300-line, dependency-free) grounded question-answering tool over a folder
39
+ of Markdown notes. It finds the relevant notes, asks *your* LLM to answer **only**
40
+ from them, forces a `[[wikilink]]` citation on every claim, and **refuses instead
41
+ of guessing** when the answer isn't in your vault.
42
+
43
+ ```console
44
+ $ vault-ask "what did I decide about the pricing model?"
45
+ Q: what did I decide about the pricing model?
46
+
47
+ Flat 49 EUR/month, no per-seat pricing, decided after the churn analysis.
48
+ [[Decisions/2026-Pricing|2026-Pricing]]
49
+
50
+ Notes consulted:
51
+ - [[Decisions/2026-Pricing|2026-Pricing]]
52
+ - [[Meetings/2026-01-pricing-review|2026-01-pricing-review]]
53
+ ```
54
+
55
+ Ask something that isn't in your notes and it won't make anything up:
56
+
57
+ ```console
58
+ $ vault-ask "what is my bank account number?"
59
+ Q: what is my bank account number?
60
+
61
+ No note in the vault answers this question.
62
+ ```
63
+
64
+ ## Why
65
+
66
+ A second brain is only useful if knowledge comes *back out*. Most "chat with your
67
+ notes" tools either need a vector database and an indexing pipeline, or happily
68
+ hallucinate plausible answers — a dealbreaker when your notes are medical, legal,
69
+ or financial. `vault-ask` is the opposite: zero index, zero database, and a hard
70
+ refusal guarantee. It runs `ripgrep` over your vault, ranks notes by term rarity
71
+ (TF-IDF), and hands the best excerpts to whatever LLM you already use.
72
+
73
+ ## How it works
74
+
75
+ 1. **Candidate search** — `ripgrep` scans the whole vault in milliseconds.
76
+ 2. **IDF ranking** — notes are scored by the *rarity* of the query terms they
77
+ contain, so a rare, specific word (a project codename) outweighs a word that
78
+ appears in hundreds of notes. No embeddings, no index, no warm-up.
79
+ 3. **Focused excerpts** — only the headings and matching lines of the top notes
80
+ are sent to the model (notes can be long).
81
+ 4. **Grounded prompt** — the model must cite each claim as a `[[link]]`, must not
82
+ add outside knowledge, and must reply with a fixed refusal sentence if the
83
+ excerpts don't answer the question.
84
+ 5. **Robust refusal check** — a refusal (even reworded by the model) is never
85
+ dressed up as a sourced answer; its citations are stripped.
86
+
87
+ Nothing leaves your machine except what your own LLM command chooses to send.
88
+
89
+ ## Install
90
+
91
+ Requires **Python 3.9+** and **[ripgrep](https://github.com/BurntSushi/ripgrep)**
92
+ (`rg`) on your `PATH`.
93
+
94
+ ```bash
95
+ # pip (installs the `vault-ask` command)
96
+ pip install git+https://github.com/guillaumevele/vault-ask.git
97
+ ```
98
+
99
+ Or run it as a single file, no install:
100
+
101
+ ```bash
102
+ git clone https://github.com/guillaumevele/vault-ask.git
103
+ cd vault-ask
104
+ python3 vault_ask.py "your question"
105
+ ```
106
+
107
+ No dependencies beyond the Python standard library and ripgrep.
108
+
109
+ ## Configure your LLM
110
+
111
+ `vault-ask` shells out to whatever LLM command you set in `VAULT_ASK_LLM`. The
112
+ prompt is piped on **stdin** by default, or substituted for `{prompt}` if the
113
+ command contains that placeholder.
114
+
115
+ ```bash
116
+ # Local model via Ollama (prompt on stdin):
117
+ export VAULT_ASK_LLM='ollama run llama3.1'
118
+
119
+ # Simon Willison's `llm` CLI (any provider it supports):
120
+ export VAULT_ASK_LLM='llm -m gpt-4o-mini'
121
+
122
+ # A CLI that takes the prompt as an argument — use the {prompt} placeholder:
123
+ export VAULT_ASK_LLM='your-llm-cli --prompt {prompt}'
124
+ ```
125
+
126
+ Point it at your vault once:
127
+
128
+ ```bash
129
+ export OBSIDIAN_VAULT="$HOME/Obsidian/MyVault"
130
+ ```
131
+
132
+ ## Usage
133
+
134
+ ```bash
135
+ vault-ask "what did I decide about X?"
136
+ vault-ask --vault ~/notes "when is the contract renewal?"
137
+ vault-ask --limit 8 --json "summarize my pricing decisions"
138
+ ```
139
+
140
+ No LLM? Use `--sources-only` to just rank the most relevant notes — a smart grep
141
+ for your vault that needs no model at all:
142
+
143
+ ```bash
144
+ vault-ask --sources-only "pricing model"
145
+ # Most relevant notes for: pricing model
146
+ # - [[Decisions/2026-pricing|2026-pricing]]
147
+ # - [[Meetings/2026-01-pricing-review|2026-01-pricing-review]]
148
+ ```
149
+
150
+ | Flag | Default | Description |
151
+ |------|---------|-------------|
152
+ | `--vault` | `$OBSIDIAN_VAULT` or `.` | path to the vault |
153
+ | `--limit` | `5` | max notes to consult |
154
+ | `--llm` | `$VAULT_ASK_LLM` | LLM command (overrides env) |
155
+ | `--sources-only` | off | rank relevant notes, no LLM call |
156
+ | `--json` | off | raw structured output |
157
+ | `--version` | | print version |
158
+
159
+ ## What it's good at — and what it isn't
160
+
161
+ **Good at:** factual lookups where the words of your question point at a note —
162
+ decisions, numbers, names, "what did I say about …". It's fast and it never lies.
163
+
164
+ **Not good at:** abstract questions whose vocabulary differs from your notes (you
165
+ ask "my funding strategy", the note says "tax credit"). That's the inherent limit
166
+ of keyword retrieval — proper semantic recall needs embeddings, which this tool
167
+ deliberately avoids to stay zero-dependency and zero-index. When it can't match,
168
+ it refuses honestly rather than guessing.
169
+
170
+ ## Tests
171
+
172
+ ```bash
173
+ python3 -m unittest discover -s tests
174
+ ```
175
+
176
+ ## Related
177
+
178
+ [**voice-to-vault**](https://github.com/guillaumevele/voice-to-vault) is the other
179
+ half of the loop: it routes your voice captures into the Obsidian vault that
180
+ `vault-ask` then answers questions about. One files your thoughts, the other
181
+ brings them back.
182
+
183
+ ## License
184
+
185
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,157 @@
1
+ # vault-ask
2
+
3
+ [![CI](https://github.com/guillaumevele/vault-ask/actions/workflows/ci.yml/badge.svg)](https://github.com/guillaumevele/vault-ask/actions/workflows/ci.yml)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
5
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/)
6
+ [![Zero dependencies](https://img.shields.io/badge/dependencies-zero-success.svg)](pyproject.toml)
7
+
8
+ **Ask your Obsidian vault. Get cited answers. Never hallucinate.**
9
+
10
+ A tiny (~300-line, dependency-free) grounded question-answering tool over a folder
11
+ of Markdown notes. It finds the relevant notes, asks *your* LLM to answer **only**
12
+ from them, forces a `[[wikilink]]` citation on every claim, and **refuses instead
13
+ of guessing** when the answer isn't in your vault.
14
+
15
+ ```console
16
+ $ vault-ask "what did I decide about the pricing model?"
17
+ Q: what did I decide about the pricing model?
18
+
19
+ Flat 49 EUR/month, no per-seat pricing, decided after the churn analysis.
20
+ [[Decisions/2026-Pricing|2026-Pricing]]
21
+
22
+ Notes consulted:
23
+ - [[Decisions/2026-Pricing|2026-Pricing]]
24
+ - [[Meetings/2026-01-pricing-review|2026-01-pricing-review]]
25
+ ```
26
+
27
+ Ask something that isn't in your notes and it won't make anything up:
28
+
29
+ ```console
30
+ $ vault-ask "what is my bank account number?"
31
+ Q: what is my bank account number?
32
+
33
+ No note in the vault answers this question.
34
+ ```
35
+
36
+ ## Why
37
+
38
+ A second brain is only useful if knowledge comes *back out*. Most "chat with your
39
+ notes" tools either need a vector database and an indexing pipeline, or happily
40
+ hallucinate plausible answers — a dealbreaker when your notes are medical, legal,
41
+ or financial. `vault-ask` is the opposite: zero index, zero database, and a hard
42
+ refusal guarantee. It runs `ripgrep` over your vault, ranks notes by term rarity
43
+ (TF-IDF), and hands the best excerpts to whatever LLM you already use.
44
+
45
+ ## How it works
46
+
47
+ 1. **Candidate search** — `ripgrep` scans the whole vault in milliseconds.
48
+ 2. **IDF ranking** — notes are scored by the *rarity* of the query terms they
49
+ contain, so a rare, specific word (a project codename) outweighs a word that
50
+ appears in hundreds of notes. No embeddings, no index, no warm-up.
51
+ 3. **Focused excerpts** — only the headings and matching lines of the top notes
52
+ are sent to the model (notes can be long).
53
+ 4. **Grounded prompt** — the model must cite each claim as a `[[link]]`, must not
54
+ add outside knowledge, and must reply with a fixed refusal sentence if the
55
+ excerpts don't answer the question.
56
+ 5. **Robust refusal check** — a refusal (even reworded by the model) is never
57
+ dressed up as a sourced answer; its citations are stripped.
58
+
59
+ Nothing leaves your machine except what your own LLM command chooses to send.
60
+
61
+ ## Install
62
+
63
+ Requires **Python 3.9+** and **[ripgrep](https://github.com/BurntSushi/ripgrep)**
64
+ (`rg`) on your `PATH`.
65
+
66
+ ```bash
67
+ # pip (installs the `vault-ask` command)
68
+ pip install git+https://github.com/guillaumevele/vault-ask.git
69
+ ```
70
+
71
+ Or run it as a single file, no install:
72
+
73
+ ```bash
74
+ git clone https://github.com/guillaumevele/vault-ask.git
75
+ cd vault-ask
76
+ python3 vault_ask.py "your question"
77
+ ```
78
+
79
+ No dependencies beyond the Python standard library and ripgrep.
80
+
81
+ ## Configure your LLM
82
+
83
+ `vault-ask` shells out to whatever LLM command you set in `VAULT_ASK_LLM`. The
84
+ prompt is piped on **stdin** by default, or substituted for `{prompt}` if the
85
+ command contains that placeholder.
86
+
87
+ ```bash
88
+ # Local model via Ollama (prompt on stdin):
89
+ export VAULT_ASK_LLM='ollama run llama3.1'
90
+
91
+ # Simon Willison's `llm` CLI (any provider it supports):
92
+ export VAULT_ASK_LLM='llm -m gpt-4o-mini'
93
+
94
+ # A CLI that takes the prompt as an argument — use the {prompt} placeholder:
95
+ export VAULT_ASK_LLM='your-llm-cli --prompt {prompt}'
96
+ ```
97
+
98
+ Point it at your vault once:
99
+
100
+ ```bash
101
+ export OBSIDIAN_VAULT="$HOME/Obsidian/MyVault"
102
+ ```
103
+
104
+ ## Usage
105
+
106
+ ```bash
107
+ vault-ask "what did I decide about X?"
108
+ vault-ask --vault ~/notes "when is the contract renewal?"
109
+ vault-ask --limit 8 --json "summarize my pricing decisions"
110
+ ```
111
+
112
+ No LLM? Use `--sources-only` to just rank the most relevant notes — a smart grep
113
+ for your vault that needs no model at all:
114
+
115
+ ```bash
116
+ vault-ask --sources-only "pricing model"
117
+ # Most relevant notes for: pricing model
118
+ # - [[Decisions/2026-pricing|2026-pricing]]
119
+ # - [[Meetings/2026-01-pricing-review|2026-01-pricing-review]]
120
+ ```
121
+
122
+ | Flag | Default | Description |
123
+ |------|---------|-------------|
124
+ | `--vault` | `$OBSIDIAN_VAULT` or `.` | path to the vault |
125
+ | `--limit` | `5` | max notes to consult |
126
+ | `--llm` | `$VAULT_ASK_LLM` | LLM command (overrides env) |
127
+ | `--sources-only` | off | rank relevant notes, no LLM call |
128
+ | `--json` | off | raw structured output |
129
+ | `--version` | | print version |
130
+
131
+ ## What it's good at — and what it isn't
132
+
133
+ **Good at:** factual lookups where the words of your question point at a note —
134
+ decisions, numbers, names, "what did I say about …". It's fast and it never lies.
135
+
136
+ **Not good at:** abstract questions whose vocabulary differs from your notes (you
137
+ ask "my funding strategy", the note says "tax credit"). That's the inherent limit
138
+ of keyword retrieval — proper semantic recall needs embeddings, which this tool
139
+ deliberately avoids to stay zero-dependency and zero-index. When it can't match,
140
+ it refuses honestly rather than guessing.
141
+
142
+ ## Tests
143
+
144
+ ```bash
145
+ python3 -m unittest discover -s tests
146
+ ```
147
+
148
+ ## Related
149
+
150
+ [**voice-to-vault**](https://github.com/guillaumevele/voice-to-vault) is the other
151
+ half of the loop: it routes your voice captures into the Obsidian vault that
152
+ `vault-ask` then answers questions about. One files your thoughts, the other
153
+ brings them back.
154
+
155
+ ## License
156
+
157
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "vault-ask"
7
+ version = "0.1.1"
8
+ description = "Ask your Obsidian vault, get cited answers, never hallucinate."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "guillaumevele" }]
13
+ keywords = [
14
+ "obsidian", "rag", "llm", "cli", "knowledge-management",
15
+ "second-brain", "ripgrep", "grounded-generation", "note-taking",
16
+ ]
17
+ classifiers = [
18
+ "Development Status :: 4 - Beta",
19
+ "Environment :: Console",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: End Users/Desktop",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Operating System :: OS Independent",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Text Processing :: Indexing",
30
+ "Topic :: Utilities",
31
+ ]
32
+ dependencies = []
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/guillaumevele/vault-ask"
36
+ Repository = "https://github.com/guillaumevele/vault-ask"
37
+ Issues = "https://github.com/guillaumevele/vault-ask/issues"
38
+
39
+ [project.scripts]
40
+ vault-ask = "vault_ask:main"
41
+
42
+ [tool.setuptools]
43
+ py-modules = ["vault_ask"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,154 @@
1
+ """Tests for vault-ask. Run: python3 -m unittest discover -s tests
2
+
3
+ Requires ripgrep (`rg`) on PATH for the candidate-selection tests.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import sys
8
+ import tempfile
9
+ import unittest
10
+ from pathlib import Path
11
+ from unittest.mock import patch
12
+
13
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
14
+ import vault_ask # noqa: E402
15
+
16
+
17
+ class TestQueryTerms(unittest.TestCase):
18
+ def test_strips_stopwords_and_short_tokens(self):
19
+ terms = vault_ask.query_terms("What did I decide about the pricing for Acme?")
20
+ self.assertIn("decide", terms)
21
+ self.assertIn("pricing", terms)
22
+ self.assertIn("acme", terms)
23
+ self.assertNotIn("what", terms)
24
+ self.assertNotIn("the", terms)
25
+ self.assertNotIn("for", terms)
26
+
27
+ def test_french_stopwords(self):
28
+ terms = vault_ask.query_terms("quel est le financement retenu pour le projet")
29
+ self.assertIn("financement", terms)
30
+ self.assertIn("projet", terms)
31
+ self.assertNotIn("quel", terms)
32
+ self.assertNotIn("retenu", terms)
33
+
34
+
35
+ class TestRefusalDetection(unittest.TestCase):
36
+ def test_exact(self):
37
+ self.assertTrue(vault_ask.is_refusal(vault_ask.REFUSAL))
38
+
39
+ def test_punctuation_and_case_insensitive(self):
40
+ # A reformulated refusal must still count as a refusal (safety guardrail).
41
+ self.assertTrue(vault_ask.is_refusal("no note in the vault answers this question"))
42
+ self.assertTrue(vault_ask.is_refusal("No note in the vault answers this question."))
43
+
44
+ def test_real_answer_is_not_a_refusal(self):
45
+ self.assertFalse(vault_ask.is_refusal("The price is 49 EUR [[Pricing]]."))
46
+
47
+
48
+ class TestPromptGuardrails(unittest.TestCase):
49
+ def test_prompt_carries_sources_and_rules(self):
50
+ notes = [{"link": "[[Decisions/Pricing|Pricing]]", "excerpt": "Price set to 49 EUR."}]
51
+ prompt = vault_ask.build_prompt("what is the price", notes)
52
+ self.assertIn("[[Decisions/Pricing|Pricing]]", prompt)
53
+ self.assertIn(vault_ask.REFUSAL, prompt)
54
+ self.assertIn("Use ONLY the note excerpts", prompt)
55
+ self.assertIn("Price set to 49 EUR", prompt)
56
+
57
+
58
+ class TestCandidateSelection(unittest.TestCase):
59
+ def setUp(self):
60
+ self.tmp = Path(tempfile.mkdtemp())
61
+
62
+ def tearDown(self):
63
+ for p in sorted(self.tmp.rglob("*"), reverse=True):
64
+ p.unlink() if p.is_file() else p.rmdir()
65
+ self.tmp.rmdir()
66
+
67
+ def test_rare_term_outranks_ubiquitous_term(self):
68
+ # "project" is ubiquitous (low IDF); "zylophone" is rare (high IDF).
69
+ for i in range(8):
70
+ (self.tmp / f"noise{i}.md").write_text(
71
+ "# Project\n" + ("project project project\n" * 20), encoding="utf-8")
72
+ (self.tmp / "target.md").write_text(
73
+ "# Decision\nThe chosen budget tool is Zylophone, for the project.\n",
74
+ encoding="utf-8")
75
+ notes = vault_ask.candidate_notes(self.tmp, "budget zylophone project", limit=5)
76
+ self.assertTrue(notes)
77
+ self.assertEqual(Path(notes[0]["file"]).stem, "target")
78
+
79
+ def test_no_terms_returns_empty(self):
80
+ self.assertEqual(vault_ask.candidate_notes(self.tmp, "what is the", limit=5), [])
81
+
82
+ def test_excerpt_keeps_answer_on_adjacent_line(self):
83
+ # The keyword and the actual answer often sit on neighbouring (wrapped)
84
+ # lines; the context window must keep both.
85
+ note = self.tmp / "n.md"
86
+ note.write_text(
87
+ "# Heading\nThe chosen value is 49 EUR\nfor the pricing plan.\n",
88
+ encoding="utf-8")
89
+ excerpt = vault_ask.note_excerpt(note, ["pricing"])
90
+ self.assertIn("49 EUR", excerpt) # answer line (no keyword) kept via context
91
+ self.assertIn("pricing", excerpt)
92
+
93
+
94
+ class TestAsk(unittest.TestCase):
95
+ def setUp(self):
96
+ self.tmp = Path(tempfile.mkdtemp())
97
+
98
+ def tearDown(self):
99
+ for p in sorted(self.tmp.rglob("*"), reverse=True):
100
+ p.unlink() if p.is_file() else p.rmdir()
101
+ self.tmp.rmdir()
102
+
103
+ def test_synthesizes_with_citation(self):
104
+ (self.tmp / "decision.md").write_text(
105
+ "# Decision\nThe chosen budget tool is Zylophone.\n", encoding="utf-8")
106
+ answer = "The chosen tool is Zylophone [[decision|decision]]."
107
+ with patch.object(vault_ask, "run_llm", return_value=answer):
108
+ res = vault_ask.ask(self.tmp, "which budget tool zylophone")
109
+ self.assertTrue(res["grounded"])
110
+ self.assertEqual(res["answer"], answer)
111
+ self.assertTrue(res["sources"])
112
+
113
+ def test_refuses_when_no_candidates(self):
114
+ with patch.object(vault_ask, "run_llm") as llm:
115
+ res = vault_ask.ask(self.tmp, "completely unrelated xyzzy quux")
116
+ llm.assert_not_called()
117
+ self.assertFalse(res["grounded"])
118
+ self.assertEqual(res["answer"], vault_ask.REFUSAL)
119
+
120
+ def test_refusal_from_llm_drops_sources(self):
121
+ (self.tmp / "note.md").write_text("# Note\nZylophone budget tool.\n", encoding="utf-8")
122
+ with patch.object(vault_ask, "run_llm", return_value="No note in the vault answers this question"):
123
+ res = vault_ask.ask(self.tmp, "zylophone budget")
124
+ self.assertFalse(res["grounded"])
125
+ self.assertEqual(res["answer"], vault_ask.REFUSAL)
126
+ self.assertEqual(res["sources"], [])
127
+
128
+ def test_no_llm_returns_candidates_not_hallucination(self):
129
+ (self.tmp / "note.md").write_text("# Note\nZylophone budget tool.\n", encoding="utf-8")
130
+ with patch.object(vault_ask, "run_llm", return_value=None):
131
+ res = vault_ask.ask(self.tmp, "zylophone budget")
132
+ self.assertFalse(res["grounded"])
133
+ self.assertIsNone(res["answer"])
134
+ self.assertEqual(res["reason"], "no-llm")
135
+ self.assertTrue(res["candidates"])
136
+
137
+ def test_sources_only_skips_llm(self):
138
+ (self.tmp / "note.md").write_text("# Note\nZylophone budget tool.\n", encoding="utf-8")
139
+ with patch.object(vault_ask, "run_llm") as llm:
140
+ res = vault_ask.ask(self.tmp, "zylophone budget", sources_only=True)
141
+ llm.assert_not_called()
142
+ self.assertEqual(res["mode"], "sources-only")
143
+ self.assertTrue(res["sources"])
144
+ self.assertIsNone(res["answer"])
145
+
146
+ def test_missing_ripgrep_gives_clear_reason(self):
147
+ with patch.object(vault_ask.shutil, "which", return_value=None):
148
+ res = vault_ask.ask(self.tmp, "anything at all")
149
+ self.assertFalse(res["ok"])
150
+ self.assertEqual(res["reason"], "ripgrep-not-found")
151
+
152
+
153
+ if __name__ == "__main__":
154
+ unittest.main(verbosity=2)
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: vault-ask
3
+ Version: 0.1.1
4
+ Summary: Ask your Obsidian vault, get cited answers, never hallucinate.
5
+ Author: guillaumevele
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/guillaumevele/vault-ask
8
+ Project-URL: Repository, https://github.com/guillaumevele/vault-ask
9
+ Project-URL: Issues, https://github.com/guillaumevele/vault-ask/issues
10
+ Keywords: obsidian,rag,llm,cli,knowledge-management,second-brain,ripgrep,grounded-generation,note-taking
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: End Users/Desktop
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Text Processing :: Indexing
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Dynamic: license-file
28
+
29
+ # vault-ask
30
+
31
+ [![CI](https://github.com/guillaumevele/vault-ask/actions/workflows/ci.yml/badge.svg)](https://github.com/guillaumevele/vault-ask/actions/workflows/ci.yml)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
33
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/)
34
+ [![Zero dependencies](https://img.shields.io/badge/dependencies-zero-success.svg)](pyproject.toml)
35
+
36
+ **Ask your Obsidian vault. Get cited answers. Never hallucinate.**
37
+
38
+ A tiny (~300-line, dependency-free) grounded question-answering tool over a folder
39
+ of Markdown notes. It finds the relevant notes, asks *your* LLM to answer **only**
40
+ from them, forces a `[[wikilink]]` citation on every claim, and **refuses instead
41
+ of guessing** when the answer isn't in your vault.
42
+
43
+ ```console
44
+ $ vault-ask "what did I decide about the pricing model?"
45
+ Q: what did I decide about the pricing model?
46
+
47
+ Flat 49 EUR/month, no per-seat pricing, decided after the churn analysis.
48
+ [[Decisions/2026-Pricing|2026-Pricing]]
49
+
50
+ Notes consulted:
51
+ - [[Decisions/2026-Pricing|2026-Pricing]]
52
+ - [[Meetings/2026-01-pricing-review|2026-01-pricing-review]]
53
+ ```
54
+
55
+ Ask something that isn't in your notes and it won't make anything up:
56
+
57
+ ```console
58
+ $ vault-ask "what is my bank account number?"
59
+ Q: what is my bank account number?
60
+
61
+ No note in the vault answers this question.
62
+ ```
63
+
64
+ ## Why
65
+
66
+ A second brain is only useful if knowledge comes *back out*. Most "chat with your
67
+ notes" tools either need a vector database and an indexing pipeline, or happily
68
+ hallucinate plausible answers — a dealbreaker when your notes are medical, legal,
69
+ or financial. `vault-ask` is the opposite: zero index, zero database, and a hard
70
+ refusal guarantee. It runs `ripgrep` over your vault, ranks notes by term rarity
71
+ (TF-IDF), and hands the best excerpts to whatever LLM you already use.
72
+
73
+ ## How it works
74
+
75
+ 1. **Candidate search** — `ripgrep` scans the whole vault in milliseconds.
76
+ 2. **IDF ranking** — notes are scored by the *rarity* of the query terms they
77
+ contain, so a rare, specific word (a project codename) outweighs a word that
78
+ appears in hundreds of notes. No embeddings, no index, no warm-up.
79
+ 3. **Focused excerpts** — only the headings and matching lines of the top notes
80
+ are sent to the model (notes can be long).
81
+ 4. **Grounded prompt** — the model must cite each claim as a `[[link]]`, must not
82
+ add outside knowledge, and must reply with a fixed refusal sentence if the
83
+ excerpts don't answer the question.
84
+ 5. **Robust refusal check** — a refusal (even reworded by the model) is never
85
+ dressed up as a sourced answer; its citations are stripped.
86
+
87
+ Nothing leaves your machine except what your own LLM command chooses to send.
88
+
89
+ ## Install
90
+
91
+ Requires **Python 3.9+** and **[ripgrep](https://github.com/BurntSushi/ripgrep)**
92
+ (`rg`) on your `PATH`.
93
+
94
+ ```bash
95
+ # pip (installs the `vault-ask` command)
96
+ pip install git+https://github.com/guillaumevele/vault-ask.git
97
+ ```
98
+
99
+ Or run it as a single file, no install:
100
+
101
+ ```bash
102
+ git clone https://github.com/guillaumevele/vault-ask.git
103
+ cd vault-ask
104
+ python3 vault_ask.py "your question"
105
+ ```
106
+
107
+ No dependencies beyond the Python standard library and ripgrep.
108
+
109
+ ## Configure your LLM
110
+
111
+ `vault-ask` shells out to whatever LLM command you set in `VAULT_ASK_LLM`. The
112
+ prompt is piped on **stdin** by default, or substituted for `{prompt}` if the
113
+ command contains that placeholder.
114
+
115
+ ```bash
116
+ # Local model via Ollama (prompt on stdin):
117
+ export VAULT_ASK_LLM='ollama run llama3.1'
118
+
119
+ # Simon Willison's `llm` CLI (any provider it supports):
120
+ export VAULT_ASK_LLM='llm -m gpt-4o-mini'
121
+
122
+ # A CLI that takes the prompt as an argument — use the {prompt} placeholder:
123
+ export VAULT_ASK_LLM='your-llm-cli --prompt {prompt}'
124
+ ```
125
+
126
+ Point it at your vault once:
127
+
128
+ ```bash
129
+ export OBSIDIAN_VAULT="$HOME/Obsidian/MyVault"
130
+ ```
131
+
132
+ ## Usage
133
+
134
+ ```bash
135
+ vault-ask "what did I decide about X?"
136
+ vault-ask --vault ~/notes "when is the contract renewal?"
137
+ vault-ask --limit 8 --json "summarize my pricing decisions"
138
+ ```
139
+
140
+ No LLM? Use `--sources-only` to just rank the most relevant notes — a smart grep
141
+ for your vault that needs no model at all:
142
+
143
+ ```bash
144
+ vault-ask --sources-only "pricing model"
145
+ # Most relevant notes for: pricing model
146
+ # - [[Decisions/2026-pricing|2026-pricing]]
147
+ # - [[Meetings/2026-01-pricing-review|2026-01-pricing-review]]
148
+ ```
149
+
150
+ | Flag | Default | Description |
151
+ |------|---------|-------------|
152
+ | `--vault` | `$OBSIDIAN_VAULT` or `.` | path to the vault |
153
+ | `--limit` | `5` | max notes to consult |
154
+ | `--llm` | `$VAULT_ASK_LLM` | LLM command (overrides env) |
155
+ | `--sources-only` | off | rank relevant notes, no LLM call |
156
+ | `--json` | off | raw structured output |
157
+ | `--version` | | print version |
158
+
159
+ ## What it's good at — and what it isn't
160
+
161
+ **Good at:** factual lookups where the words of your question point at a note —
162
+ decisions, numbers, names, "what did I say about …". It's fast and it never lies.
163
+
164
+ **Not good at:** abstract questions whose vocabulary differs from your notes (you
165
+ ask "my funding strategy", the note says "tax credit"). That's the inherent limit
166
+ of keyword retrieval — proper semantic recall needs embeddings, which this tool
167
+ deliberately avoids to stay zero-dependency and zero-index. When it can't match,
168
+ it refuses honestly rather than guessing.
169
+
170
+ ## Tests
171
+
172
+ ```bash
173
+ python3 -m unittest discover -s tests
174
+ ```
175
+
176
+ ## Related
177
+
178
+ [**voice-to-vault**](https://github.com/guillaumevele/voice-to-vault) is the other
179
+ half of the loop: it routes your voice captures into the Obsidian vault that
180
+ `vault-ask` then answers questions about. One files your thoughts, the other
181
+ brings them back.
182
+
183
+ ## License
184
+
185
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ vault_ask.py
5
+ tests/test_vault_ask.py
6
+ vault_ask.egg-info/PKG-INFO
7
+ vault_ask.egg-info/SOURCES.txt
8
+ vault_ask.egg-info/dependency_links.txt
9
+ vault_ask.egg-info/entry_points.txt
10
+ vault_ask.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ vault-ask = vault_ask:main
@@ -0,0 +1 @@
1
+ vault_ask
@@ -0,0 +1,379 @@
1
+ #!/usr/bin/env python3
2
+ """vault-ask — Ask your Obsidian vault, get cited answers, never hallucinate.
3
+
4
+ A tiny, dependency-free grounded question-answering tool over a Markdown
5
+ knowledge base (built for Obsidian, works on any folder of .md files).
6
+
7
+ How it works:
8
+ 1. Fast candidate selection with ripgrep over the whole vault.
9
+ 2. Notes are ranked by IDF coverage — rare, specific terms (e.g. a project
10
+ codename) outweigh ubiquitous ones (e.g. a word in hundreds of notes).
11
+ 3. Query-focused excerpts of the top notes are sent to your LLM with a strict
12
+ prompt: every claim MUST cite its source note as a [[wikilink]], and if the
13
+ excerpts don't answer the question the model MUST refuse instead of guessing.
14
+ 4. A robust refusal check guarantees a refusal is never dressed up as a
15
+ sourced answer.
16
+
17
+ The LLM is whatever command you configure via $VAULT_ASK_LLM, so it works with a
18
+ local model (Ollama), a CLI like `llm`, or any subscription CLI you already use.
19
+ Nothing leaves your machine except what your own LLM command sends.
20
+
21
+ Usage:
22
+ export VAULT_ASK_LLM='ollama run llama3.1' # or 'llm -m gpt-4o-mini', etc.
23
+ vault_ask.py --vault ~/Obsidian/MyVault "what did I decide about pricing?"
24
+
25
+ Requires: Python 3.9+, ripgrep (`rg`) on PATH.
26
+ License: MIT.
27
+ """
28
+ from __future__ import annotations
29
+
30
+ import argparse
31
+ import json
32
+ import math
33
+ import os
34
+ import re
35
+ import shlex
36
+ import shutil
37
+ import subprocess
38
+ import sys
39
+ import unicodedata
40
+ from pathlib import Path
41
+
42
+ __version__ = "0.1.1"
43
+
44
+ REFUSAL = "No note in the vault answers this question."
45
+
46
+ # Directories that are noise, not knowledge — skipped during candidate search.
47
+ DEFAULT_EXCLUDED_DIRS = (".obsidian", ".trash", ".git", "node_modules")
48
+
49
+ # Stop / question / function words (EN + FR): noise for keyword candidate search.
50
+ STOPWORDS = {
51
+ # English
52
+ "what", "which", "where", "when", "why", "how", "who", "whom", "whose",
53
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
54
+ "do", "does", "did", "have", "has", "had", "for", "with", "from", "into",
55
+ "about", "that", "this", "these", "those", "and", "or", "but", "not",
56
+ "you", "your", "yours", "my", "mine", "our", "their", "its", "his", "her",
57
+ "can", "could", "should", "would", "will", "shall", "may", "might", "must",
58
+ "get", "got", "make", "made", "any", "some", "all", "more", "most", "than",
59
+ # French
60
+ "quel", "quels", "quelle", "quelles", "pourquoi", "comment", "quand",
61
+ "qui", "quoi", "est", "sont", "etait", "etre", "avoir", "faut", "fait",
62
+ "faire", "pour", "avec", "dans", "sur", "sous", "par", "des", "les",
63
+ "une", "mon", "mes", "ton", "tes", "son", "ses", "nos", "vos", "leur",
64
+ "leurs", "que", "dont", "cette", "cet", "ces", "celle", "celui", "donc",
65
+ "alors", "ainsi", "aussi", "plus", "moins", "tout", "tous", "toute",
66
+ "toutes", "deja", "encore", "vraiment", "bien", "retenu", "retenue",
67
+ }
68
+
69
+
70
+ def normalize(text: str) -> str:
71
+ """Lowercase + strip accents (NFKD) for accent/case-insensitive matching."""
72
+ decomposed = unicodedata.normalize("NFKD", str(text or ""))
73
+ stripped = "".join(ch for ch in decomposed if not unicodedata.combining(ch))
74
+ return stripped.lower()
75
+
76
+
77
+ def query_terms(query: str, min_len: int = 3) -> list[str]:
78
+ """Content terms of the query: tokens >= min_len that are not stopwords."""
79
+ tokens = re.split(r"[^a-z0-9]+", normalize(query))
80
+ return [t for t in tokens if len(t) >= min_len and t not in STOPWORDS]
81
+
82
+
83
+ def _vault_root(vault: Path) -> Path:
84
+ return vault.expanduser().resolve()
85
+
86
+
87
+ def obsidian_link(vault: Path, path: Path) -> str:
88
+ """Obsidian-style [[relative/path|title]] link to a note."""
89
+ try:
90
+ rel = path.resolve().relative_to(_vault_root(vault))
91
+ except ValueError:
92
+ rel = Path(path.name)
93
+ return f"[[{rel.with_suffix('')}|{path.stem}]]"
94
+
95
+
96
+ def note_excerpt(path: Path, terms: list[str], max_chars: int = 650, context: int = 1) -> str:
97
+ """Query-focused excerpt: headings + lines mentioning a term, plus a small
98
+ context window around each match (notes can be long, and a matched keyword's
99
+ answer often sits on the neighbouring wrapped line)."""
100
+ try:
101
+ text = path.read_text(encoding="utf-8")
102
+ except OSError:
103
+ return ""
104
+ if text.startswith("---\n"):
105
+ end = text.find("\n---\n", 4)
106
+ if end != -1:
107
+ text = text[end + 5:]
108
+ lines = text.splitlines()
109
+ keep_idx: set[int] = set()
110
+ for i, line in enumerate(lines):
111
+ stripped = line.strip()
112
+ if not stripped:
113
+ continue
114
+ norm = normalize(line)
115
+ if stripped.startswith("#") or any(term in norm for term in terms):
116
+ for j in range(max(0, i - context), min(len(lines), i + context + 1)):
117
+ keep_idx.add(j)
118
+ kept = [lines[i].strip() for i in sorted(keep_idx) if lines[i].strip()]
119
+ body = "\n".join(kept) if kept else "\n".join(
120
+ l.strip() for l in lines if l.strip()
121
+ )
122
+ return body[:max_chars]
123
+
124
+
125
+ def candidate_notes(
126
+ vault: Path,
127
+ query: str,
128
+ limit: int = 5,
129
+ excluded_dirs: tuple[str, ...] = DEFAULT_EXCLUDED_DIRS,
130
+ timeout_s: int = 20,
131
+ ) -> list[dict]:
132
+ """Select the most relevant notes via ripgrep, ranked by IDF coverage.
133
+
134
+ A note that contains rare, specific query terms ranks above a note merely
135
+ dense in a ubiquitous term, so the discriminating words decide relevance.
136
+ """
137
+ root = _vault_root(vault)
138
+ terms = query_terms(query)
139
+ if not terms or not root.is_dir():
140
+ return []
141
+ excludes: list[str] = []
142
+ for name in excluded_dirs:
143
+ excludes += ["-g", f"!{name}/**", "-g", f"!{name}"]
144
+
145
+ term_files: dict[str, dict[str, int]] = {}
146
+ for term in terms:
147
+ try:
148
+ proc = subprocess.run(
149
+ ["rg", "-c", "-i", "--glob", "*.md", *excludes, "--", term, str(root)],
150
+ capture_output=True, text=True, timeout=timeout_s,
151
+ )
152
+ except (OSError, subprocess.SubprocessError):
153
+ continue
154
+ if proc.returncode not in (0, 1): # 1 = no matches, fine
155
+ continue
156
+ files: dict[str, int] = {}
157
+ for raw in proc.stdout.splitlines():
158
+ path, _, count = raw.rpartition(":")
159
+ path = path.strip()
160
+ if not path:
161
+ continue
162
+ try:
163
+ files[path] = int(count)
164
+ except ValueError:
165
+ files[path] = 1
166
+ if files:
167
+ term_files[term] = files
168
+ if not term_files:
169
+ return []
170
+
171
+ all_paths: set[str] = set()
172
+ for files in term_files.values():
173
+ all_paths |= set(files.keys())
174
+ total = max(len(all_paths), 1)
175
+
176
+ coverage: dict[str, set] = {}
177
+ idf_coverage: dict[str, float] = {} # sum of idf over DISTINCT terms matched
178
+ tf_score: dict[str, float] = {} # tf*idf, tie-breaker
179
+ for term, files in term_files.items():
180
+ idf = math.log((total + 1) / (len(files) + 1)) + 1.0
181
+ for path, tf in files.items():
182
+ coverage.setdefault(path, set()).add(term)
183
+ idf_coverage[path] = idf_coverage.get(path, 0.0) + idf
184
+ tf_score[path] = tf_score.get(path, 0.0) + min(tf, 8) * idf
185
+
186
+ ranked = sorted(
187
+ idf_coverage,
188
+ key=lambda p: (idf_coverage[p], tf_score[p]),
189
+ reverse=True,
190
+ )
191
+ notes: list[dict] = []
192
+ for path_str in ranked[:limit]:
193
+ path = Path(path_str)
194
+ notes.append({
195
+ "file": str(path),
196
+ "title": path.stem,
197
+ "link": obsidian_link(vault, path),
198
+ "excerpt": note_excerpt(path, terms),
199
+ "matched_terms": sorted(coverage[path_str]),
200
+ })
201
+ return notes
202
+
203
+
204
+ def build_prompt(query: str, notes: list[dict]) -> str:
205
+ """Grounded prompt: mandatory [[citations]], explicit refusal if unsupported."""
206
+ blocks = []
207
+ for note in notes:
208
+ excerpt = (note.get("excerpt") or "").strip()
209
+ if not excerpt:
210
+ continue
211
+ blocks.append(f"[Source: {note['link']}]\n{excerpt}")
212
+ sources = "\n\n---\n\n".join(blocks)
213
+ return (
214
+ "You answer questions strictly from a personal Markdown knowledge base.\n"
215
+ "Use ONLY the note excerpts below. Absolute rules, no exceptions:\n"
216
+ "1. Every claim MUST be followed by its source as a [[link]], copied "
217
+ "EXACTLY from the 'Source:' line.\n"
218
+ "2. Invent nothing; add no outside knowledge.\n"
219
+ f"3. If the excerpts do not answer the question, reply with EXACTLY this "
220
+ f"and nothing else: {REFUSAL}\n"
221
+ "4. Be concise and factual: at most 3 lines, no preamble.\n\n"
222
+ f"QUESTION: {query}\n\n"
223
+ f"EXCERPTS:\n{sources}"
224
+ )
225
+
226
+
227
+ def is_refusal(text: str) -> bool:
228
+ """Robust refusal detection (punctuation/case/accent insensitive). A refusal
229
+ must never be mistaken for a sourced answer."""
230
+ norm = normalize(text).strip().rstrip(".").strip()
231
+ target = normalize(REFUSAL).strip().rstrip(".").strip()
232
+ return bool(norm) and norm == target
233
+
234
+
235
+ def run_llm(prompt: str, *, command: str | None = None, timeout_s: int = 120) -> str | None:
236
+ """Run the configured LLM command. If the command contains '{prompt}' the
237
+ prompt is substituted as an argument, otherwise it is piped via stdin.
238
+ Returns the text answer, or None on any failure (caller falls back)."""
239
+ command = command or os.environ.get("VAULT_ASK_LLM", "").strip()
240
+ if not command:
241
+ return None
242
+ try:
243
+ if "{prompt}" in command:
244
+ full = command.replace("{prompt}", shlex.quote(prompt))
245
+ proc = subprocess.run(
246
+ full, shell=True, capture_output=True, text=True, timeout=timeout_s,
247
+ )
248
+ else:
249
+ proc = subprocess.run(
250
+ shlex.split(command), input=prompt,
251
+ capture_output=True, text=True, timeout=timeout_s,
252
+ )
253
+ except (OSError, subprocess.SubprocessError):
254
+ return None
255
+ if proc.returncode != 0:
256
+ return None
257
+ out = (proc.stdout or "").strip()
258
+ return out or None
259
+
260
+
261
+ def ripgrep_available() -> bool:
262
+ return shutil.which("rg") is not None
263
+
264
+
265
+ def ask(
266
+ vault: Path,
267
+ query: str,
268
+ limit: int = 5,
269
+ command: str | None = None,
270
+ sources_only: bool = False,
271
+ ) -> dict:
272
+ """Grounded Q&A over the vault. Always returns a structured result; a missing
273
+ LLM or zero candidates yields an honest refusal, never a fabricated answer.
274
+ With sources_only=True, returns the ranked relevant notes and skips the LLM."""
275
+ query = re.sub(r"\s+", " ", str(query or "").strip())
276
+ if not query:
277
+ return {"ok": False, "reason": "empty-query"}
278
+ if not ripgrep_available():
279
+ return {"ok": False, "reason": "ripgrep-not-found"}
280
+ notes = candidate_notes(vault, query, limit=limit)
281
+ result = {
282
+ "ok": True,
283
+ "query": query,
284
+ "candidates": [{"title": n["title"], "link": n["link"]} for n in notes],
285
+ }
286
+ if sources_only:
287
+ result["answer"] = None
288
+ result["grounded"] = False
289
+ result["sources"] = [n["link"] for n in notes]
290
+ result["mode"] = "sources-only"
291
+ return result
292
+ if not notes:
293
+ result["answer"] = REFUSAL
294
+ result["grounded"] = False
295
+ result["sources"] = []
296
+ return result
297
+ text = run_llm(build_prompt(query, notes), command=command)
298
+ if not text:
299
+ result["answer"] = None
300
+ result["grounded"] = False
301
+ result["sources"] = []
302
+ result["reason"] = "no-llm"
303
+ return result
304
+ refused = is_refusal(text)
305
+ result["answer"] = REFUSAL if refused else text
306
+ result["grounded"] = not refused
307
+ result["sources"] = [] if refused else [n["link"] for n in notes]
308
+ return result
309
+
310
+
311
+ def format_result(result: dict) -> str:
312
+ if not result.get("ok"):
313
+ reason = result.get("reason", "error")
314
+ if reason == "ripgrep-not-found":
315
+ return (
316
+ "vault-ask: ripgrep (`rg`) was not found on your PATH.\n"
317
+ "Install it: https://github.com/BurntSushi/ripgrep#installation"
318
+ )
319
+ if reason == "empty-query":
320
+ return "vault-ask: please provide a question."
321
+ return f"vault-ask: {reason}"
322
+ cands = result.get("candidates") or []
323
+ if result.get("mode") == "sources-only":
324
+ lines = [f"Most relevant notes for: {result['query']}", ""]
325
+ lines += [f"- {c['link']}" for c in cands] or ["(no matching notes)"]
326
+ return "\n".join(lines)
327
+ lines = [f"Q: {result['query']}", ""]
328
+ if result.get("answer"):
329
+ lines.append(result["answer"])
330
+ elif result.get("reason") == "no-llm":
331
+ lines.append(
332
+ "(No LLM configured or it failed — set $VAULT_ASK_LLM, "
333
+ "or use --sources-only. Relevant notes below.)"
334
+ )
335
+ if cands:
336
+ lines += ["", "Notes consulted:"]
337
+ lines += [f"- {c['link']}" for c in cands]
338
+ return "\n".join(lines)
339
+
340
+
341
+ def main(argv: list[str] | None = None) -> int:
342
+ parser = argparse.ArgumentParser(
343
+ description="Ask your Obsidian vault, get cited answers, never hallucinate.",
344
+ )
345
+ parser.add_argument("question", nargs="*", help="your question")
346
+ parser.add_argument(
347
+ "--vault",
348
+ default=os.environ.get("OBSIDIAN_VAULT", "."),
349
+ help="path to the vault (default: $OBSIDIAN_VAULT or current dir)",
350
+ )
351
+ parser.add_argument("--limit", type=int, default=5, help="max notes to consult")
352
+ parser.add_argument(
353
+ "--llm", default=None,
354
+ help="LLM command (default: $VAULT_ASK_LLM). Use '{prompt}' for arg-style.",
355
+ )
356
+ parser.add_argument(
357
+ "--sources-only", action="store_true",
358
+ help="just list the most relevant notes, no LLM call (a smart grep for your vault)",
359
+ )
360
+ parser.add_argument("--json", action="store_true", help="output raw JSON")
361
+ parser.add_argument("--version", action="version", version=f"vault-ask {__version__}")
362
+ args = parser.parse_args(argv)
363
+
364
+ question = " ".join(args.question).strip()
365
+ if not question:
366
+ parser.error("provide a question")
367
+ result = ask(
368
+ Path(args.vault), question,
369
+ limit=args.limit, command=args.llm, sources_only=args.sources_only,
370
+ )
371
+ if args.json:
372
+ print(json.dumps(result, indent=2, ensure_ascii=False))
373
+ else:
374
+ print(format_result(result))
375
+ return 0 if result.get("ok") else 1
376
+
377
+
378
+ if __name__ == "__main__":
379
+ raise SystemExit(main())