veridge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- veridge-0.1.0/LICENSE +21 -0
- veridge-0.1.0/PKG-INFO +193 -0
- veridge-0.1.0/README.md +167 -0
- veridge-0.1.0/pyproject.toml +52 -0
- veridge-0.1.0/setup.cfg +4 -0
- veridge-0.1.0/tests/test_cli.py +72 -0
- veridge-0.1.0/tests/test_freshness_store.py +47 -0
- veridge-0.1.0/tests/test_impact.py +70 -0
- veridge-0.1.0/tests/test_indexer.py +58 -0
- veridge-0.1.0/tests/test_layers.py +32 -0
- veridge-0.1.0/tests/test_model.py +56 -0
- veridge-0.1.0/tests/test_parse.py +45 -0
- veridge-0.1.0/tests/test_query.py +52 -0
- veridge-0.1.0/tests/test_rank_budget.py +39 -0
- veridge-0.1.0/tests/test_treesitter.py +70 -0
- veridge-0.1.0/tests/test_why_tour.py +55 -0
- veridge-0.1.0/veridge/__init__.py +20 -0
- veridge-0.1.0/veridge/__main__.py +8 -0
- veridge-0.1.0/veridge/budget.py +59 -0
- veridge-0.1.0/veridge/classify.py +63 -0
- veridge-0.1.0/veridge/cli.py +276 -0
- veridge-0.1.0/veridge/freshness.py +145 -0
- veridge-0.1.0/veridge/ignore.py +61 -0
- veridge-0.1.0/veridge/impact.py +69 -0
- veridge-0.1.0/veridge/indexer.py +292 -0
- veridge-0.1.0/veridge/layers.py +70 -0
- veridge-0.1.0/veridge/mcp_server.py +106 -0
- veridge-0.1.0/veridge/model.py +220 -0
- veridge-0.1.0/veridge/parse_docs.py +80 -0
- veridge-0.1.0/veridge/parse_python.py +83 -0
- veridge-0.1.0/veridge/py.typed +0 -0
- veridge-0.1.0/veridge/query.py +340 -0
- veridge-0.1.0/veridge/rank.py +87 -0
- veridge-0.1.0/veridge/sessions.py +73 -0
- veridge-0.1.0/veridge/store.py +62 -0
- veridge-0.1.0/veridge/treesitter.py +211 -0
- veridge-0.1.0/veridge/walk.py +29 -0
- veridge-0.1.0/veridge.egg-info/PKG-INFO +193 -0
- veridge-0.1.0/veridge.egg-info/SOURCES.txt +41 -0
- veridge-0.1.0/veridge.egg-info/dependency_links.txt +1 -0
- veridge-0.1.0/veridge.egg-info/entry_points.txt +3 -0
- veridge-0.1.0/veridge.egg-info/requires.txt +11 -0
- veridge-0.1.0/veridge.egg-info/top_level.txt +1 -0
veridge-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Lodestar contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
veridge-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: veridge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The always-fresh, low-token map of a whole project — docs, code (to the symbol), decisions — ranked by what matters and served to AI and humans alike.
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: knowledge-graph,code-graph,ai-agents,context,mcp,pagerank,repo-map
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
13
|
+
Classifier: Typing :: Typed
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
19
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
20
|
+
Provides-Extra: mcp
|
|
21
|
+
Requires-Dist: mcp>=1.2; extra == "mcp"
|
|
22
|
+
Provides-Extra: treesitter
|
|
23
|
+
Requires-Dist: tree-sitter>=0.21; extra == "treesitter"
|
|
24
|
+
Requires-Dist: tree-sitter-language-pack>=0.2; extra == "treesitter"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# Veridge
|
|
28
|
+
|
|
29
|
+
[](https://github.com/galimar/veridge/actions/workflows/ci.yml)
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
[](pyproject.toml)
|
|
32
|
+
[](pyproject.toml)
|
|
33
|
+
|
|
34
|
+
**The always-fresh, low-token map of a *whole* project** — documents, code (down to the
|
|
35
|
+
function/class), decisions and work sessions — unified in one typed graph, **ranked by what
|
|
36
|
+
matters**, and served both to an AI assistant (as the minimal relevant context) and to a human.
|
|
37
|
+
|
|
38
|
+
> *Veridge* fuses **veridical** (truthful, verified) with **ridge** — the crest line that runs
|
|
39
|
+
> through and connects a whole terrain. That's what it builds: the *true, always-fresh backbone*
|
|
40
|
+
> of how a project fits together — the structural through-line you and your assistant navigate by.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Why it exists
|
|
45
|
+
|
|
46
|
+
A project's knowledge lives in three places that drift apart: the **code**, the **documents**
|
|
47
|
+
(designs, decisions, notes) and the **history** of how it got here. As it grows, keeping a
|
|
48
|
+
mental model of how everything connects gets harder — and an AI assistant loses the thread
|
|
49
|
+
between one session and the next, so every session restarts by re-reading and re-searching
|
|
50
|
+
files. That is slow, incomplete, and burns tokens repeatedly.
|
|
51
|
+
|
|
52
|
+
Veridge builds the project's graph **once** and keeps it fresh, then answers questions about it
|
|
53
|
+
in a few hundred tokens. It unifies documents, code (down to the symbol), decisions and sessions
|
|
54
|
+
in one map, **ranks** it so an answer is the *relevant* slice rather than everything, and serves
|
|
55
|
+
that slice within a token budget — to an AI assistant and to a human alike.
|
|
56
|
+
|
|
57
|
+
## What makes it different
|
|
58
|
+
|
|
59
|
+
| Pillar | What it means |
|
|
60
|
+
|---|---|
|
|
61
|
+
| **One unified graph** | files **+ symbols** (functions/classes) **+ areas + decisions + git sessions** — code, docs and history in a single map. |
|
|
62
|
+
| **Symbol-level, with a real call graph** | Python is parsed with the stdlib `ast` into a `defines`/`imports`/`calls` graph. Other languages plug in via the optional `[treesitter]` extra. |
|
|
63
|
+
| **Ranked by relevance** | global **PageRank** ("what matters in this project") and **personalised PageRank** ("what matters *for this task*"). |
|
|
64
|
+
| **Token-budgeted, task-aware** | `veridge focus "<task>"` returns the **minimal relevant subgraph within a token budget** — relevant context, not the whole repo. |
|
|
65
|
+
| **Anti-drift gate** | content-hash freshness: refuses to call the map "fine" while something is stale, broken or orphaned. |
|
|
66
|
+
| **Zero infrastructure** | the core runs on the **Python standard library alone** — no DB, no embeddings, no server. Read-only on your sources. |
|
|
67
|
+
| **MCP-first** | the same ranked, budgeted queries are exposed to MCP-aware assistants behind an optional extra. |
|
|
68
|
+
|
|
69
|
+
It is **not a RAG system**: no embeddings, no vector store, no LLM to build the graph. It maps
|
|
70
|
+
*structure* and *importance*, which makes it complementary to RAG and purpose-built for one
|
|
71
|
+
thing: **the cheapest accurate context for orienting on a project.**
|
|
72
|
+
|
|
73
|
+
## Install
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install -e . # from a clone
|
|
77
|
+
pip install -e ".[mcp]" # + optional MCP server
|
|
78
|
+
pip install -e ".[treesitter]" # + symbol-level JS/TS/Go/Rust/Java parsing
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Requires Python 3.10+. Runtime dependencies of the core: **none**.
|
|
82
|
+
|
|
83
|
+
## Quickstart
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
veridge build . # index -> .veridge/graph.json (+ manifest)
|
|
87
|
+
veridge map . # PageRank-ranked digest: areas, sizes, what matters
|
|
88
|
+
veridge focus "<task>" . # the signature query: minimal relevant subgraph, budgeted
|
|
89
|
+
veridge impact src/util.py . # blast-radius: what a change here affects (ranked, budgeted)
|
|
90
|
+
veridge impact --diff . # blast-radius of your current working changes (vs git HEAD)
|
|
91
|
+
veridge tour . # dependency-ordered reading tour of the key files
|
|
92
|
+
veridge why src/cli.py src/model.py . # shortest typed path between two nodes
|
|
93
|
+
veridge find greet . # find nodes (files or symbols) by name/path
|
|
94
|
+
veridge neighbors src/util.py . # a node and its typed connections
|
|
95
|
+
veridge gate . # anti-drift: broken refs, stale files, orphans
|
|
96
|
+
veridge stats . # counts by node/edge type
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
`veridge map` also groups files by **architectural layer** (entrypoint / api / service / core /
|
|
100
|
+
data / ui / util / tests / config / docs) — inferred heuristically, no LLM.
|
|
101
|
+
|
|
102
|
+
### The signature query: `focus`
|
|
103
|
+
|
|
104
|
+
Give it a task, a file, or a symbol name and a token budget. It seeds a **personalised
|
|
105
|
+
PageRank** on whatever the query matches, then admits the highest-ranked nodes until the
|
|
106
|
+
budget is spent — returning exactly the context worth loading, and nothing else:
|
|
107
|
+
|
|
108
|
+
```text
|
|
109
|
+
$ veridge focus "personalised pagerank ranking" . --budget 400
|
|
110
|
+
focus 'personalised pagerank ranking' · 24 nodes · ~391/400 tokens
|
|
111
|
+
seeds: veridge/rank.py#pagerank, tests/test_rank_budget.py#test_personalised_pagerank...
|
|
112
|
+
0.1487 veridge/rank.py#pagerank [symbol, deg 8]
|
|
113
|
+
0.0334 veridge/query.py#focus [symbol, deg 10]
|
|
114
|
+
0.0289 veridge/query.py#project_map [symbol, deg 9]
|
|
115
|
+
0.0243 veridge/budget.py [file, deg 8]
|
|
116
|
+
...
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Blast-radius: `impact`
|
|
120
|
+
|
|
121
|
+
"If I change this, what breaks — and what's the minimal context to review it safely?" That's
|
|
122
|
+
*reverse reachability* over the call/import/reference graph, so Veridge answers it **for free
|
|
123
|
+
and exactly** — no LLM call, no token cost. The affected set is ranked by a proximity-weighted
|
|
124
|
+
PageRank and trimmed to a token budget, so even a hub with hundreds of dependents returns its
|
|
125
|
+
most important ones:
|
|
126
|
+
|
|
127
|
+
```text
|
|
128
|
+
$ veridge impact veridge/model.py . --budget 300
|
|
129
|
+
impact (dependents) of 'veridge/model.py' · 82 affected by
|
|
130
|
+
showing 20 · ~290/300 tokens
|
|
131
|
+
0.0256 d2 veridge/query.py#impact [symbol]
|
|
132
|
+
0.0239 d1 veridge/query.py [file]
|
|
133
|
+
0.0226 d1 veridge/cli.py [file]
|
|
134
|
+
...
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
`d1`/`d2` is the propagation distance. Use `--diff` to seed from your working changes
|
|
138
|
+
(`git diff --name-only HEAD`) — "the blast-radius of what I'm about to commit" — or `--deps` to
|
|
139
|
+
invert the question (what the seed *relies on*).
|
|
140
|
+
|
|
141
|
+
Same idea over MCP:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
pip install -e ".[mcp]"
|
|
145
|
+
veridge-mcp . # serves project_map / focus / impact / find / neighbors / health over stdio
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## How it works
|
|
149
|
+
|
|
150
|
+
1. **Index** (read-only) — walk the project; classify each file; extract **symbols, imports
|
|
151
|
+
and calls** — Python via the stdlib `ast` (zero-deps core), and **JS/TS/Go/Rust/Java via
|
|
152
|
+
the optional `[treesitter]` extra**, both feeding one cross-language call graph; extract
|
|
153
|
+
**doc references** (markdown links, `[[wikilinks]]`, and **plain path mentions in prose** —
|
|
154
|
+
the part generic tools miss); pull out **decision ids** (`ADR-N`/`RFC-N`/`D-X-N`); add **git
|
|
155
|
+
sessions**. Everything lands in one typed graph with indexed adjacency, so queries are
|
|
156
|
+
O(degree), not O(edges).
|
|
157
|
+
2. **Rank** — PageRank over the type-weighted, undirected graph; personalised PageRank for
|
|
158
|
+
task-aware queries.
|
|
159
|
+
3. **Serve** — compact, contents-free rows, selected to fit a token budget. An assistant
|
|
160
|
+
queries; a human reads the digest.
|
|
161
|
+
4. **Stay fresh** — a content-hash manifest diffs the tree on every `gate`, so drift is loud.
|
|
162
|
+
|
|
163
|
+
The graph never duplicates file contents; `.veridge/` is derived and always regenerable.
|
|
164
|
+
|
|
165
|
+
## Design principles (please keep these intact)
|
|
166
|
+
|
|
167
|
+
**read-only · zero-deps core · low-token · ranked · deterministic.** Determinism matters: nodes
|
|
168
|
+
and edges are sorted on serialization, so `graph.json` is reproducible and diffs are clean.
|
|
169
|
+
|
|
170
|
+
## Status & roadmap
|
|
171
|
+
|
|
172
|
+
Alpha (v0.1). Working today: the unified graph (files + symbols + areas + decisions +
|
|
173
|
+
sessions), **multi-language symbols** (Python in the core; JS/TS/Go/Rust/Java via the optional
|
|
174
|
+
`[treesitter]` extra), the PageRank ranking, the token-budgeted `focus` query, **`veridge
|
|
175
|
+
impact`** (deterministic blast-radius, incl. `--diff` mode), **deterministic comprehension**
|
|
176
|
+
(`map` layers, `veridge tour`, `veridge why`), the anti-drift gate, the CLI and the optional MCP
|
|
177
|
+
server. **Next up:** a human viewer, then watch-mode freshness. See the full plan in
|
|
178
|
+
[ROADMAP.md](ROADMAP.md).
|
|
179
|
+
|
|
180
|
+
## Development
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
pip install -e ".[dev,mcp,treesitter]"
|
|
184
|
+
ruff check veridge tests
|
|
185
|
+
pytest -q
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Contributions are welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) and the design principles
|
|
189
|
+
(read-only, zero-deps core, low-token, ranked, deterministic).
|
|
190
|
+
|
|
191
|
+
## License
|
|
192
|
+
|
|
193
|
+
MIT.
|
veridge-0.1.0/README.md
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# Veridge
|
|
2
|
+
|
|
3
|
+
[](https://github.com/galimar/veridge/actions/workflows/ci.yml)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](pyproject.toml)
|
|
6
|
+
[](pyproject.toml)
|
|
7
|
+
|
|
8
|
+
**The always-fresh, low-token map of a *whole* project** — documents, code (down to the
|
|
9
|
+
function/class), decisions and work sessions — unified in one typed graph, **ranked by what
|
|
10
|
+
matters**, and served both to an AI assistant (as the minimal relevant context) and to a human.
|
|
11
|
+
|
|
12
|
+
> *Veridge* fuses **veridical** (truthful, verified) with **ridge** — the crest line that runs
|
|
13
|
+
> through and connects a whole terrain. That's what it builds: the *true, always-fresh backbone*
|
|
14
|
+
> of how a project fits together — the structural through-line you and your assistant navigate by.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Why it exists
|
|
19
|
+
|
|
20
|
+
A project's knowledge lives in three places that drift apart: the **code**, the **documents**
|
|
21
|
+
(designs, decisions, notes) and the **history** of how it got here. As it grows, keeping a
|
|
22
|
+
mental model of how everything connects gets harder — and an AI assistant loses the thread
|
|
23
|
+
between one session and the next, so every session restarts by re-reading and re-searching
|
|
24
|
+
files. That is slow, incomplete, and burns tokens repeatedly.
|
|
25
|
+
|
|
26
|
+
Veridge builds the project's graph **once** and keeps it fresh, then answers questions about it
|
|
27
|
+
in a few hundred tokens. It unifies documents, code (down to the symbol), decisions and sessions
|
|
28
|
+
in one map, **ranks** it so an answer is the *relevant* slice rather than everything, and serves
|
|
29
|
+
that slice within a token budget — to an AI assistant and to a human alike.
|
|
30
|
+
|
|
31
|
+
## What makes it different
|
|
32
|
+
|
|
33
|
+
| Pillar | What it means |
|
|
34
|
+
|---|---|
|
|
35
|
+
| **One unified graph** | files **+ symbols** (functions/classes) **+ areas + decisions + git sessions** — code, docs and history in a single map. |
|
|
36
|
+
| **Symbol-level, with a real call graph** | Python is parsed with the stdlib `ast` into a `defines`/`imports`/`calls` graph. Other languages plug in via the optional `[treesitter]` extra. |
|
|
37
|
+
| **Ranked by relevance** | global **PageRank** ("what matters in this project") and **personalised PageRank** ("what matters *for this task*"). |
|
|
38
|
+
| **Token-budgeted, task-aware** | `veridge focus "<task>"` returns the **minimal relevant subgraph within a token budget** — relevant context, not the whole repo. |
|
|
39
|
+
| **Anti-drift gate** | content-hash freshness: refuses to call the map "fine" while something is stale, broken or orphaned. |
|
|
40
|
+
| **Zero infrastructure** | the core runs on the **Python standard library alone** — no DB, no embeddings, no server. Read-only on your sources. |
|
|
41
|
+
| **MCP-first** | the same ranked, budgeted queries are exposed to MCP-aware assistants behind an optional extra. |
|
|
42
|
+
|
|
43
|
+
It is **not a RAG system**: no embeddings, no vector store, no LLM to build the graph. It maps
|
|
44
|
+
*structure* and *importance*, which makes it complementary to RAG and purpose-built for one
|
|
45
|
+
thing: **the cheapest accurate context for orienting on a project.**
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install -e . # from a clone
|
|
51
|
+
pip install -e ".[mcp]" # + optional MCP server
|
|
52
|
+
pip install -e ".[treesitter]" # + symbol-level JS/TS/Go/Rust/Java parsing
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Requires Python 3.10+. Runtime dependencies of the core: **none**.
|
|
56
|
+
|
|
57
|
+
## Quickstart
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
veridge build . # index -> .veridge/graph.json (+ manifest)
|
|
61
|
+
veridge map . # PageRank-ranked digest: areas, sizes, what matters
|
|
62
|
+
veridge focus "<task>" . # the signature query: minimal relevant subgraph, budgeted
|
|
63
|
+
veridge impact src/util.py . # blast-radius: what a change here affects (ranked, budgeted)
|
|
64
|
+
veridge impact --diff . # blast-radius of your current working changes (vs git HEAD)
|
|
65
|
+
veridge tour . # dependency-ordered reading tour of the key files
|
|
66
|
+
veridge why src/cli.py src/model.py . # shortest typed path between two nodes
|
|
67
|
+
veridge find greet . # find nodes (files or symbols) by name/path
|
|
68
|
+
veridge neighbors src/util.py . # a node and its typed connections
|
|
69
|
+
veridge gate . # anti-drift: broken refs, stale files, orphans
|
|
70
|
+
veridge stats . # counts by node/edge type
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`veridge map` also groups files by **architectural layer** (entrypoint / api / service / core /
|
|
74
|
+
data / ui / util / tests / config / docs) — inferred heuristically, no LLM.
|
|
75
|
+
|
|
76
|
+
### The signature query: `focus`
|
|
77
|
+
|
|
78
|
+
Give it a task, a file, or a symbol name and a token budget. It seeds a **personalised
|
|
79
|
+
PageRank** on whatever the query matches, then admits the highest-ranked nodes until the
|
|
80
|
+
budget is spent — returning exactly the context worth loading, and nothing else:
|
|
81
|
+
|
|
82
|
+
```text
|
|
83
|
+
$ veridge focus "personalised pagerank ranking" . --budget 400
|
|
84
|
+
focus 'personalised pagerank ranking' · 24 nodes · ~391/400 tokens
|
|
85
|
+
seeds: veridge/rank.py#pagerank, tests/test_rank_budget.py#test_personalised_pagerank...
|
|
86
|
+
0.1487 veridge/rank.py#pagerank [symbol, deg 8]
|
|
87
|
+
0.0334 veridge/query.py#focus [symbol, deg 10]
|
|
88
|
+
0.0289 veridge/query.py#project_map [symbol, deg 9]
|
|
89
|
+
0.0243 veridge/budget.py [file, deg 8]
|
|
90
|
+
...
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Blast-radius: `impact`
|
|
94
|
+
|
|
95
|
+
"If I change this, what breaks — and what's the minimal context to review it safely?" That's
|
|
96
|
+
*reverse reachability* over the call/import/reference graph, so Veridge answers it **for free
|
|
97
|
+
and exactly** — no LLM call, no token cost. The affected set is ranked by a proximity-weighted
|
|
98
|
+
PageRank and trimmed to a token budget, so even a hub with hundreds of dependents returns its
|
|
99
|
+
most important ones:
|
|
100
|
+
|
|
101
|
+
```text
|
|
102
|
+
$ veridge impact veridge/model.py . --budget 300
|
|
103
|
+
impact (dependents) of 'veridge/model.py' · 82 affected by
|
|
104
|
+
showing 20 · ~290/300 tokens
|
|
105
|
+
0.0256 d2 veridge/query.py#impact [symbol]
|
|
106
|
+
0.0239 d1 veridge/query.py [file]
|
|
107
|
+
0.0226 d1 veridge/cli.py [file]
|
|
108
|
+
...
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
`d1`/`d2` is the propagation distance. Use `--diff` to seed from your working changes
|
|
112
|
+
(`git diff --name-only HEAD`) — "the blast-radius of what I'm about to commit" — or `--deps` to
|
|
113
|
+
invert the question (what the seed *relies on*).
|
|
114
|
+
|
|
115
|
+
Same idea over MCP:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
pip install -e ".[mcp]"
|
|
119
|
+
veridge-mcp . # serves project_map / focus / impact / find / neighbors / health over stdio
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## How it works
|
|
123
|
+
|
|
124
|
+
1. **Index** (read-only) — walk the project; classify each file; extract **symbols, imports
|
|
125
|
+
and calls** — Python via the stdlib `ast` (zero-deps core), and **JS/TS/Go/Rust/Java via
|
|
126
|
+
the optional `[treesitter]` extra**, both feeding one cross-language call graph; extract
|
|
127
|
+
**doc references** (markdown links, `[[wikilinks]]`, and **plain path mentions in prose** —
|
|
128
|
+
the part generic tools miss); pull out **decision ids** (`ADR-N`/`RFC-N`/`D-X-N`); add **git
|
|
129
|
+
sessions**. Everything lands in one typed graph with indexed adjacency, so queries are
|
|
130
|
+
O(degree), not O(edges).
|
|
131
|
+
2. **Rank** — PageRank over the type-weighted, undirected graph; personalised PageRank for
|
|
132
|
+
task-aware queries.
|
|
133
|
+
3. **Serve** — compact, contents-free rows, selected to fit a token budget. An assistant
|
|
134
|
+
queries; a human reads the digest.
|
|
135
|
+
4. **Stay fresh** — a content-hash manifest diffs the tree on every `gate`, so drift is loud.
|
|
136
|
+
|
|
137
|
+
The graph never duplicates file contents; `.veridge/` is derived and always regenerable.
|
|
138
|
+
|
|
139
|
+
## Design principles (please keep these intact)
|
|
140
|
+
|
|
141
|
+
**read-only · zero-deps core · low-token · ranked · deterministic.** Determinism matters: nodes
|
|
142
|
+
and edges are sorted on serialization, so `graph.json` is reproducible and diffs are clean.
|
|
143
|
+
|
|
144
|
+
## Status & roadmap
|
|
145
|
+
|
|
146
|
+
Alpha (v0.1). Working today: the unified graph (files + symbols + areas + decisions +
|
|
147
|
+
sessions), **multi-language symbols** (Python in the core; JS/TS/Go/Rust/Java via the optional
|
|
148
|
+
`[treesitter]` extra), the PageRank ranking, the token-budgeted `focus` query, **`veridge
|
|
149
|
+
impact`** (deterministic blast-radius, incl. `--diff` mode), **deterministic comprehension**
|
|
150
|
+
(`map` layers, `veridge tour`, `veridge why`), the anti-drift gate, the CLI and the optional MCP
|
|
151
|
+
server. **Next up:** a human viewer, then watch-mode freshness. See the full plan in
|
|
152
|
+
[ROADMAP.md](ROADMAP.md).
|
|
153
|
+
|
|
154
|
+
## Development
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install -e ".[dev,mcp,treesitter]"
|
|
158
|
+
ruff check veridge tests
|
|
159
|
+
pytest -q
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Contributions are welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) and the design principles
|
|
163
|
+
(read-only, zero-deps core, low-token, ranked, deterministic).
|
|
164
|
+
|
|
165
|
+
## License
|
|
166
|
+
|
|
167
|
+
MIT.
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "veridge"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "The always-fresh, low-token map of a whole project — docs, code (to the symbol), decisions — ranked by what matters and served to AI and humans alike."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["knowledge-graph", "code-graph", "ai-agents", "context", "mcp", "pagerank", "repo-map"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Software Development :: Libraries",
|
|
20
|
+
"Typing :: Typed",
|
|
21
|
+
]
|
|
22
|
+
dependencies = []
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
dev = ["pytest>=7", "ruff>=0.4"]
|
|
26
|
+
mcp = ["mcp>=1.2"]
|
|
27
|
+
# Optional: symbol-level parsing for languages beyond Python's stdlib `ast`.
|
|
28
|
+
treesitter = ["tree-sitter>=0.21", "tree-sitter-language-pack>=0.2"]
|
|
29
|
+
|
|
30
|
+
[project.scripts]
|
|
31
|
+
veridge = "veridge.cli:main"
|
|
32
|
+
veridge-mcp = "veridge.mcp_server:main"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools]
|
|
35
|
+
packages = ["veridge"]
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.dynamic]
|
|
38
|
+
version = { attr = "veridge.__version__" }
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.package-data]
|
|
41
|
+
veridge = ["py.typed"]
|
|
42
|
+
|
|
43
|
+
[tool.ruff]
|
|
44
|
+
line-length = 100
|
|
45
|
+
target-version = "py310"
|
|
46
|
+
|
|
47
|
+
[tool.ruff.lint]
|
|
48
|
+
select = ["E", "F", "I", "UP", "B", "W"]
|
|
49
|
+
|
|
50
|
+
[tool.pytest.ini_options]
|
|
51
|
+
testpaths = ["tests"]
|
|
52
|
+
addopts = "-q"
|
veridge-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from veridge import cli
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_build_then_map(project, capsys):
|
|
7
|
+
assert cli.main(["build", str(project)]) == 0
|
|
8
|
+
assert (project / ".veridge" / "graph.json").is_file()
|
|
9
|
+
assert cli.main(["map", str(project)]) == 0
|
|
10
|
+
out = capsys.readouterr().out
|
|
11
|
+
assert "most important (PageRank)" in out
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_focus_cli(project, capsys):
|
|
15
|
+
cli.main(["build", str(project)])
|
|
16
|
+
assert cli.main(["focus", "util", str(project), "--budget", "600"]) == 0
|
|
17
|
+
out = capsys.readouterr().out
|
|
18
|
+
assert "focus 'util'" in out
|
|
19
|
+
assert "src/util.py" in out
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_impact_cli(project, capsys):
|
|
23
|
+
cli.main(["build", str(project)])
|
|
24
|
+
assert cli.main(["impact", "src/util.py", str(project), "--budget", "2000"]) == 0
|
|
25
|
+
out = capsys.readouterr().out
|
|
26
|
+
assert "impact (dependents)" in out
|
|
27
|
+
assert "src/app.py" in out
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_impact_cli_deps_direction(project, capsys):
|
|
31
|
+
cli.main(["build", str(project)])
|
|
32
|
+
assert cli.main(["impact", "src/app.py#run", str(project), "--deps", "--json"]) == 0
|
|
33
|
+
out = capsys.readouterr().out
|
|
34
|
+
assert '"dependencies"' in out
|
|
35
|
+
assert "src/util.py#greet" in out
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_gate_cli_red_on_broken(project, capsys):
|
|
39
|
+
cli.main(["build", str(project)])
|
|
40
|
+
rc = cli.main(["gate", str(project)])
|
|
41
|
+
out = capsys.readouterr().out
|
|
42
|
+
assert "broken references: 1" in out
|
|
43
|
+
assert rc == 1
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_find_cli(project, capsys):
|
|
47
|
+
cli.main(["build", str(project)])
|
|
48
|
+
cli.main(["find", "greet", str(project)])
|
|
49
|
+
assert "src/util.py#greet" in capsys.readouterr().out
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_map_json(project, capsys):
|
|
53
|
+
cli.main(["build", str(project)])
|
|
54
|
+
assert cli.main(["map", str(project), "--json"]) == 0
|
|
55
|
+
out = capsys.readouterr().out
|
|
56
|
+
assert '"most_important"' in out
|
|
57
|
+
assert '"by_layer"' in out
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_why_cli(project, capsys):
|
|
61
|
+
cli.main(["build", str(project)])
|
|
62
|
+
assert cli.main(["why", "src/app.py", "src/util.py", str(project)]) == 0
|
|
63
|
+
out = capsys.readouterr().out
|
|
64
|
+
assert "imports" in out
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_tour_cli(project, capsys):
|
|
68
|
+
cli.main(["build", str(project)])
|
|
69
|
+
assert cli.main(["tour", str(project), "--budget", "3000"]) == 0
|
|
70
|
+
out = capsys.readouterr().out
|
|
71
|
+
assert "tour of" in out
|
|
72
|
+
assert "src/util.py" in out
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from veridge import store
|
|
4
|
+
from veridge.freshness import build_manifest, diff_manifest, evaluate, index
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_manifest_diff_detects_change(project):
|
|
8
|
+
m1 = build_manifest(project)
|
|
9
|
+
(project / "src" / "util.py").write_text("def greet(n):\n return n\n", encoding="utf-8")
|
|
10
|
+
(project / "new.md").write_text("new\n", encoding="utf-8")
|
|
11
|
+
m2 = build_manifest(project)
|
|
12
|
+
d = diff_manifest(m1, m2)
|
|
13
|
+
assert "new.md" in d["added"]
|
|
14
|
+
assert "src/util.py" in d["changed"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_gate_reports_broken_and_orphans(graph, project):
|
|
18
|
+
m = build_manifest(project)
|
|
19
|
+
rep = evaluate(graph, m, m)
|
|
20
|
+
assert rep.stale_count == 0
|
|
21
|
+
assert ("README.md", "src/missing.py") in rep.broken
|
|
22
|
+
assert "config.toml" in rep.orphans
|
|
23
|
+
assert rep.ok is False # a broken ref keeps the gate red
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_gate_ok_when_clean(project):
|
|
27
|
+
# A project with no broken refs and a matching manifest is green.
|
|
28
|
+
(project / "README.md").write_text("# clean\n", encoding="utf-8")
|
|
29
|
+
g, m = index(project)
|
|
30
|
+
rep = evaluate(g, m, m)
|
|
31
|
+
assert rep.broken == []
|
|
32
|
+
assert rep.ok is True
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_store_roundtrip(graph, project):
|
|
36
|
+
_, m = index(project)
|
|
37
|
+
store.save(project, graph, m)
|
|
38
|
+
g2 = store.load_graph(project)
|
|
39
|
+
m2 = store.load_manifest(project)
|
|
40
|
+
assert g2 is not None and m2 is not None
|
|
41
|
+
assert set(g2.nodes) == set(graph.nodes)
|
|
42
|
+
assert g2.degree("src/util.py") == graph.degree("src/util.py")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_load_missing_is_none(tmp_path):
|
|
46
|
+
assert store.load_graph(tmp_path) is None
|
|
47
|
+
assert store.load_manifest(tmp_path) is None
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from veridge import query
|
|
4
|
+
from veridge.impact import dependencies, dependents, expand_seed
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_expand_seed_lifts_both_ways(graph):
|
|
8
|
+
# A file seed pulls in the symbols it defines.
|
|
9
|
+
assert "src/util.py#greet" in expand_seed(graph, "src/util.py")
|
|
10
|
+
# A symbol seed pulls in its defining file.
|
|
11
|
+
assert "src/util.py" in expand_seed(graph, "src/util.py#greet")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_dependents_reverse_reachability(graph):
|
|
15
|
+
affected = dependents(graph, expand_seed(graph, "src/util.py"))
|
|
16
|
+
# Direct dependents of util / greet.
|
|
17
|
+
assert affected.get("src/app.py") == 1 # imports util
|
|
18
|
+
assert affected.get("docs/guide.md") == 1 # references util in prose
|
|
19
|
+
assert affected.get("src/app.py#run") == 1 # calls greet
|
|
20
|
+
# Transitive: callers of run are two hops out.
|
|
21
|
+
assert affected.get("src/app.py#App.start") == 2
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_dependencies_forward(graph):
|
|
25
|
+
deps = dependencies(graph, expand_seed(graph, "src/app.py#run"))
|
|
26
|
+
assert "src/util.py#greet" in deps # run calls greet
|
|
27
|
+
assert "src/util.py" in deps # app.py imports util.py
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_query_impact_ranked_and_budgeted(graph):
|
|
31
|
+
res = query.impact(graph, "src/util.py", budget_tokens=4000)
|
|
32
|
+
ids = {r["id"] for r in res["nodes"]}
|
|
33
|
+
assert res["total_affected"] >= 5
|
|
34
|
+
assert {"src/app.py", "src/app.py#run", "docs/guide.md"} <= ids
|
|
35
|
+
assert res["used_tokens"] <= 4000
|
|
36
|
+
# Every shown node carries its distance and rank.
|
|
37
|
+
assert all("dist" in r and "score" in r for r in res["nodes"])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_query_impact_hops_cap(graph):
|
|
41
|
+
near = query.impact(graph, "src/util.py", hops=1, budget_tokens=4000)
|
|
42
|
+
ids = {r["id"] for r in near["nodes"]}
|
|
43
|
+
assert "src/app.py#run" in ids # distance 1
|
|
44
|
+
assert "src/app.py#App.start" not in ids # distance 2, excluded by hops=1
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_query_impact_budget_trims(graph):
|
|
48
|
+
small = query.impact(graph, "src/util.py", budget_tokens=15)
|
|
49
|
+
big = query.impact(graph, "src/util.py", budget_tokens=4000)
|
|
50
|
+
assert len(small["nodes"]) < len(big["nodes"])
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_query_impact_leaf_is_safe(graph):
|
|
54
|
+
# Nothing points at the README, so changing it has no dependents.
|
|
55
|
+
res = query.impact(graph, "README.md")
|
|
56
|
+
assert res["total_affected"] == 0
|
|
57
|
+
assert "safe to change" in res["note"]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_query_impact_seed_by_name(graph):
|
|
61
|
+
res = query.impact(graph, "greet", budget_tokens=4000)
|
|
62
|
+
ids = {r["id"] for r in res["nodes"]}
|
|
63
|
+
assert "src/app.py#run" in ids # resolved 'greet' -> its callers
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_query_impact_explicit_seed_ids_diff_mode(graph):
|
|
67
|
+
# Simulates --diff: seeds handed in directly (e.g. from `git diff --name-only`).
|
|
68
|
+
res = query.impact(graph, "diff", seed_ids=["src/util.py"], budget_tokens=4000)
|
|
69
|
+
assert res["total_affected"] >= 5
|
|
70
|
+
assert any(r["id"] == "src/app.py" for r in res["nodes"])
|