veridge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. veridge-0.1.0/LICENSE +21 -0
  2. veridge-0.1.0/PKG-INFO +193 -0
  3. veridge-0.1.0/README.md +167 -0
  4. veridge-0.1.0/pyproject.toml +52 -0
  5. veridge-0.1.0/setup.cfg +4 -0
  6. veridge-0.1.0/tests/test_cli.py +72 -0
  7. veridge-0.1.0/tests/test_freshness_store.py +47 -0
  8. veridge-0.1.0/tests/test_impact.py +70 -0
  9. veridge-0.1.0/tests/test_indexer.py +58 -0
  10. veridge-0.1.0/tests/test_layers.py +32 -0
  11. veridge-0.1.0/tests/test_model.py +56 -0
  12. veridge-0.1.0/tests/test_parse.py +45 -0
  13. veridge-0.1.0/tests/test_query.py +52 -0
  14. veridge-0.1.0/tests/test_rank_budget.py +39 -0
  15. veridge-0.1.0/tests/test_treesitter.py +70 -0
  16. veridge-0.1.0/tests/test_why_tour.py +55 -0
  17. veridge-0.1.0/veridge/__init__.py +20 -0
  18. veridge-0.1.0/veridge/__main__.py +8 -0
  19. veridge-0.1.0/veridge/budget.py +59 -0
  20. veridge-0.1.0/veridge/classify.py +63 -0
  21. veridge-0.1.0/veridge/cli.py +276 -0
  22. veridge-0.1.0/veridge/freshness.py +145 -0
  23. veridge-0.1.0/veridge/ignore.py +61 -0
  24. veridge-0.1.0/veridge/impact.py +69 -0
  25. veridge-0.1.0/veridge/indexer.py +292 -0
  26. veridge-0.1.0/veridge/layers.py +70 -0
  27. veridge-0.1.0/veridge/mcp_server.py +106 -0
  28. veridge-0.1.0/veridge/model.py +220 -0
  29. veridge-0.1.0/veridge/parse_docs.py +80 -0
  30. veridge-0.1.0/veridge/parse_python.py +83 -0
  31. veridge-0.1.0/veridge/py.typed +0 -0
  32. veridge-0.1.0/veridge/query.py +340 -0
  33. veridge-0.1.0/veridge/rank.py +87 -0
  34. veridge-0.1.0/veridge/sessions.py +73 -0
  35. veridge-0.1.0/veridge/store.py +62 -0
  36. veridge-0.1.0/veridge/treesitter.py +211 -0
  37. veridge-0.1.0/veridge/walk.py +29 -0
  38. veridge-0.1.0/veridge.egg-info/PKG-INFO +193 -0
  39. veridge-0.1.0/veridge.egg-info/SOURCES.txt +41 -0
  40. veridge-0.1.0/veridge.egg-info/dependency_links.txt +1 -0
  41. veridge-0.1.0/veridge.egg-info/entry_points.txt +3 -0
  42. veridge-0.1.0/veridge.egg-info/requires.txt +11 -0
  43. veridge-0.1.0/veridge.egg-info/top_level.txt +1 -0
veridge-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Lodestar contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
veridge-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: veridge
3
+ Version: 0.1.0
4
+ Summary: The always-fresh, low-token map of a whole project — docs, code (to the symbol), decisions — ranked by what matters and served to AI and humans alike.
5
+ License: MIT
6
+ Keywords: knowledge-graph,code-graph,ai-agents,context,mcp,pagerank,repo-map
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development :: Libraries
13
+ Classifier: Typing :: Typed
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7; extra == "dev"
19
+ Requires-Dist: ruff>=0.4; extra == "dev"
20
+ Provides-Extra: mcp
21
+ Requires-Dist: mcp>=1.2; extra == "mcp"
22
+ Provides-Extra: treesitter
23
+ Requires-Dist: tree-sitter>=0.21; extra == "treesitter"
24
+ Requires-Dist: tree-sitter-language-pack>=0.2; extra == "treesitter"
25
+ Dynamic: license-file
26
+
27
+ # Veridge
28
+
29
+ [![CI](https://github.com/galimar/veridge/actions/workflows/ci.yml/badge.svg)](https://github.com/galimar/veridge/actions/workflows/ci.yml)
30
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
31
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](pyproject.toml)
32
+ [![Runtime deps](https://img.shields.io/badge/runtime%20deps-none-success.svg)](pyproject.toml)
33
+
34
+ **The always-fresh, low-token map of a *whole* project** — documents, code (down to the
35
+ function/class), decisions and work sessions — unified in one typed graph, **ranked by what
36
+ matters**, and served both to an AI assistant (as the minimal relevant context) and to a human.
37
+
38
+ > *Veridge* fuses **veridical** (truthful, verified) with **ridge** — the crest line that runs
39
+ > through and connects a whole terrain. That's what it builds: the *true, always-fresh backbone*
40
+ > of how a project fits together — the structural through-line you and your assistant navigate by.
41
+
42
+ ---
43
+
44
+ ## Why it exists
45
+
46
+ A project's knowledge lives in three places that drift apart: the **code**, the **documents**
47
+ (designs, decisions, notes) and the **history** of how it got here. As it grows, keeping a
48
+ mental model of how everything connects gets harder — and an AI assistant loses the thread
49
+ between one session and the next, so every session restarts by re-reading and re-searching
50
+ files. That is slow, incomplete, and burns tokens repeatedly.
51
+
52
+ Veridge builds the project's graph **once** and keeps it fresh, then answers questions about it
53
+ in a few hundred tokens. It unifies documents, code (down to the symbol), decisions and sessions
54
+ in one map, **ranks** it so an answer is the *relevant* slice rather than everything, and serves
55
+ that slice within a token budget — to an AI assistant and to a human alike.
56
+
57
+ ## What makes it different
58
+
59
+ | Pillar | What it means |
60
+ |---|---|
61
+ | **One unified graph** | files **+ symbols** (functions/classes) **+ areas + decisions + git sessions** — code, docs and history in a single map. |
62
+ | **Symbol-level, with a real call graph** | Python is parsed with the stdlib `ast` into a `defines`/`imports`/`calls` graph. Other languages plug in via the optional `[treesitter]` extra. |
63
+ | **Ranked by relevance** | global **PageRank** ("what matters in this project") and **personalised PageRank** ("what matters *for this task*"). |
64
+ | **Token-budgeted, task-aware** | `veridge focus "<task>"` returns the **minimal relevant subgraph within a token budget** — relevant context, not the whole repo. |
65
+ | **Anti-drift gate** | content-hash freshness: refuses to call the map "fine" while something is stale, broken or orphaned. |
66
+ | **Zero infrastructure** | the core runs on the **Python standard library alone** — no DB, no embeddings, no server. Read-only on your sources. |
67
+ | **MCP-first** | the same ranked, budgeted queries are exposed to MCP-aware assistants behind an optional extra. |
68
+
69
+ It is **not a RAG system**: no embeddings, no vector store, no LLM to build the graph. It maps
70
+ *structure* and *importance*, which makes it complementary to RAG and purpose-built for one
71
+ thing: **the cheapest accurate context for orienting on a project.**
72
+
73
+ ## Install
74
+
75
+ ```bash
76
+ pip install -e . # from a clone
77
+ pip install -e ".[mcp]" # + optional MCP server
78
+ pip install -e ".[treesitter]" # + symbol-level JS/TS/Go/Rust/Java parsing
79
+ ```
80
+
81
+ Requires Python 3.10+. Runtime dependencies of the core: **none**.
82
+
83
+ ## Quickstart
84
+
85
+ ```bash
86
+ veridge build . # index -> .veridge/graph.json (+ manifest)
87
+ veridge map . # PageRank-ranked digest: areas, sizes, what matters
88
+ veridge focus "<task>" . # the signature query: minimal relevant subgraph, budgeted
89
+ veridge impact src/util.py . # blast-radius: what a change here affects (ranked, budgeted)
90
+ veridge impact --diff . # blast-radius of your current working changes (vs git HEAD)
91
+ veridge tour . # dependency-ordered reading tour of the key files
92
+ veridge why src/cli.py src/model.py . # shortest typed path between two nodes
93
+ veridge find greet . # find nodes (files or symbols) by name/path
94
+ veridge neighbors src/util.py . # a node and its typed connections
95
+ veridge gate . # anti-drift: broken refs, stale files, orphans
96
+ veridge stats . # counts by node/edge type
97
+ ```
98
+
99
+ `veridge map` also groups files by **architectural layer** (entrypoint / api / service / core /
100
+ data / ui / util / tests / config / docs) — inferred heuristically, no LLM.
101
+
102
+ ### The signature query: `focus`
103
+
104
+ Give it a task, a file, or a symbol name and a token budget. It seeds a **personalised
105
+ PageRank** on whatever the query matches, then admits the highest-ranked nodes until the
106
+ budget is spent — returning exactly the context worth loading, and nothing else:
107
+
108
+ ```text
109
+ $ veridge focus "personalised pagerank ranking" . --budget 400
110
+ focus 'personalised pagerank ranking' · 24 nodes · ~391/400 tokens
111
+ seeds: veridge/rank.py#pagerank, tests/test_rank_budget.py#test_personalised_pagerank...
112
+ 0.1487 veridge/rank.py#pagerank [symbol, deg 8]
113
+ 0.0334 veridge/query.py#focus [symbol, deg 10]
114
+ 0.0289 veridge/query.py#project_map [symbol, deg 9]
115
+ 0.0243 veridge/budget.py [file, deg 8]
116
+ ...
117
+ ```
118
+
119
+ ### Blast-radius: `impact`
120
+
121
+ "If I change this, what breaks — and what's the minimal context to review it safely?" That's
122
+ *reverse reachability* over the call/import/reference graph, so Veridge answers it **for free
123
+ and exactly** — no LLM call, no token cost. The affected set is ranked by a proximity-weighted
124
+ PageRank and trimmed to a token budget, so even a hub with hundreds of dependents returns its
125
+ most important ones:
126
+
127
+ ```text
128
+ $ veridge impact veridge/model.py . --budget 300
129
+ impact (dependents) of 'veridge/model.py' · 82 affected by
130
+ showing 20 · ~290/300 tokens
131
+ 0.0256 d2 veridge/query.py#impact [symbol]
132
+ 0.0239 d1 veridge/query.py [file]
133
+ 0.0226 d1 veridge/cli.py [file]
134
+ ...
135
+ ```
136
+
137
+ `d1`/`d2` is the propagation distance. Use `--diff` to seed from your working changes
138
+ (`git diff --name-only HEAD`) — "the blast-radius of what I'm about to commit" — or `--deps` to
139
+ invert the question (what the seed *relies on*).
140
+
141
+ Same idea over MCP:
142
+
143
+ ```bash
144
+ pip install -e ".[mcp]"
145
+ veridge-mcp . # serves project_map / focus / impact / find / neighbors / health over stdio
146
+ ```
147
+
148
+ ## How it works
149
+
150
+ 1. **Index** (read-only) — walk the project; classify each file; extract **symbols, imports
151
+ and calls** — Python via the stdlib `ast` (zero-deps core), and **JS/TS/Go/Rust/Java via
152
+ the optional `[treesitter]` extra**, both feeding one cross-language call graph; extract
153
+ **doc references** (markdown links, `[[wikilinks]]`, and **plain path mentions in prose** —
154
+ the part generic tools miss); pull out **decision ids** (`ADR-N`/`RFC-N`/`D-X-N`); add **git
155
+ sessions**. Everything lands in one typed graph with indexed adjacency, so queries are
156
+ O(degree), not O(edges).
157
+ 2. **Rank** — PageRank over the type-weighted, undirected graph; personalised PageRank for
158
+ task-aware queries.
159
+ 3. **Serve** — compact, contents-free rows, selected to fit a token budget. An assistant
160
+ queries; a human reads the digest.
161
+ 4. **Stay fresh** — a content-hash manifest diffs the tree on every `gate`, so drift is loud.
162
+
163
+ The graph never duplicates file contents; `.veridge/` is derived and always regenerable.
164
+
165
+ ## Design principles (please keep these intact)
166
+
167
+ **read-only · zero-deps core · low-token · ranked · deterministic.** Determinism matters: nodes
168
+ and edges are sorted on serialization, so `graph.json` is reproducible and diffs are clean.
169
+
170
+ ## Status & roadmap
171
+
172
+ Alpha (v0.1). Working today: the unified graph (files + symbols + areas + decisions +
173
+ sessions), **multi-language symbols** (Python in the core; JS/TS/Go/Rust/Java via the optional
174
+ `[treesitter]` extra), the PageRank ranking, the token-budgeted `focus` query, **`veridge
175
+ impact`** (deterministic blast-radius, incl. `--diff` mode), **deterministic comprehension**
176
+ (`map` layers, `veridge tour`, `veridge why`), the anti-drift gate, the CLI and the optional MCP
177
+ server. **Next up:** a human viewer, then watch-mode freshness. See the full plan in
178
+ [ROADMAP.md](ROADMAP.md).
179
+
180
+ ## Development
181
+
182
+ ```bash
183
+ pip install -e ".[dev,mcp,treesitter]"
184
+ ruff check veridge tests
185
+ pytest -q
186
+ ```
187
+
188
+ Contributions are welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) and the design principles
189
+ (read-only, zero-deps core, low-token, ranked, deterministic).
190
+
191
+ ## License
192
+
193
+ MIT.
@@ -0,0 +1,167 @@
1
+ # Veridge
2
+
3
+ [![CI](https://github.com/galimar/veridge/actions/workflows/ci.yml/badge.svg)](https://github.com/galimar/veridge/actions/workflows/ci.yml)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
5
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](pyproject.toml)
6
+ [![Runtime deps](https://img.shields.io/badge/runtime%20deps-none-success.svg)](pyproject.toml)
7
+
8
+ **The always-fresh, low-token map of a *whole* project** — documents, code (down to the
9
+ function/class), decisions and work sessions — unified in one typed graph, **ranked by what
10
+ matters**, and served both to an AI assistant (as the minimal relevant context) and to a human.
11
+
12
+ > *Veridge* fuses **veridical** (truthful, verified) with **ridge** — the crest line that runs
13
+ > through and connects a whole terrain. That's what it builds: the *true, always-fresh backbone*
14
+ > of how a project fits together — the structural through-line you and your assistant navigate by.
15
+
16
+ ---
17
+
18
+ ## Why it exists
19
+
20
+ A project's knowledge lives in three places that drift apart: the **code**, the **documents**
21
+ (designs, decisions, notes) and the **history** of how it got here. As it grows, keeping a
22
+ mental model of how everything connects gets harder — and an AI assistant loses the thread
23
+ between one session and the next, so every session restarts by re-reading and re-searching
24
+ files. That is slow, incomplete, and burns tokens repeatedly.
25
+
26
+ Veridge builds the project's graph **once** and keeps it fresh, then answers questions about it
27
+ in a few hundred tokens. It unifies documents, code (down to the symbol), decisions and sessions
28
+ in one map, **ranks** it so an answer is the *relevant* slice rather than everything, and serves
29
+ that slice within a token budget — to an AI assistant and to a human alike.
30
+
31
+ ## What makes it different
32
+
33
+ | Pillar | What it means |
34
+ |---|---|
35
+ | **One unified graph** | files **+ symbols** (functions/classes) **+ areas + decisions + git sessions** — code, docs and history in a single map. |
36
+ | **Symbol-level, with a real call graph** | Python is parsed with the stdlib `ast` into a `defines`/`imports`/`calls` graph. Other languages plug in via the optional `[treesitter]` extra. |
37
+ | **Ranked by relevance** | global **PageRank** ("what matters in this project") and **personalised PageRank** ("what matters *for this task*"). |
38
+ | **Token-budgeted, task-aware** | `veridge focus "<task>"` returns the **minimal relevant subgraph within a token budget** — relevant context, not the whole repo. |
39
+ | **Anti-drift gate** | content-hash freshness: refuses to call the map "fine" while something is stale, broken or orphaned. |
40
+ | **Zero infrastructure** | the core runs on the **Python standard library alone** — no DB, no embeddings, no server. Read-only on your sources. |
41
+ | **MCP-first** | the same ranked, budgeted queries are exposed to MCP-aware assistants behind an optional extra. |
42
+
43
+ It is **not a RAG system**: no embeddings, no vector store, no LLM to build the graph. It maps
44
+ *structure* and *importance*, which makes it complementary to RAG and purpose-built for one
45
+ thing: **the cheapest accurate context for orienting on a project.**
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ pip install -e . # from a clone
51
+ pip install -e ".[mcp]" # + optional MCP server
52
+ pip install -e ".[treesitter]" # + symbol-level JS/TS/Go/Rust/Java parsing
53
+ ```
54
+
55
+ Requires Python 3.10+. Runtime dependencies of the core: **none**.
56
+
57
+ ## Quickstart
58
+
59
+ ```bash
60
+ veridge build . # index -> .veridge/graph.json (+ manifest)
61
+ veridge map . # PageRank-ranked digest: areas, sizes, what matters
62
+ veridge focus "<task>" . # the signature query: minimal relevant subgraph, budgeted
63
+ veridge impact src/util.py . # blast-radius: what a change here affects (ranked, budgeted)
64
+ veridge impact --diff . # blast-radius of your current working changes (vs git HEAD)
65
+ veridge tour . # dependency-ordered reading tour of the key files
66
+ veridge why src/cli.py src/model.py . # shortest typed path between two nodes
67
+ veridge find greet . # find nodes (files or symbols) by name/path
68
+ veridge neighbors src/util.py . # a node and its typed connections
69
+ veridge gate . # anti-drift: broken refs, stale files, orphans
70
+ veridge stats . # counts by node/edge type
71
+ ```
72
+
73
+ `veridge map` also groups files by **architectural layer** (entrypoint / api / service / core /
74
+ data / ui / util / tests / config / docs) — inferred heuristically, no LLM.
75
+
76
+ ### The signature query: `focus`
77
+
78
+ Give it a task, a file, or a symbol name and a token budget. It seeds a **personalised
79
+ PageRank** on whatever the query matches, then admits the highest-ranked nodes until the
80
+ budget is spent — returning exactly the context worth loading, and nothing else:
81
+
82
+ ```text
83
+ $ veridge focus "personalised pagerank ranking" . --budget 400
84
+ focus 'personalised pagerank ranking' · 24 nodes · ~391/400 tokens
85
+ seeds: veridge/rank.py#pagerank, tests/test_rank_budget.py#test_personalised_pagerank...
86
+ 0.1487 veridge/rank.py#pagerank [symbol, deg 8]
87
+ 0.0334 veridge/query.py#focus [symbol, deg 10]
88
+ 0.0289 veridge/query.py#project_map [symbol, deg 9]
89
+ 0.0243 veridge/budget.py [file, deg 8]
90
+ ...
91
+ ```
92
+
93
+ ### Blast-radius: `impact`
94
+
95
+ "If I change this, what breaks — and what's the minimal context to review it safely?" That's
96
+ *reverse reachability* over the call/import/reference graph, so Veridge answers it **for free
97
+ and exactly** — no LLM call, no token cost. The affected set is ranked by a proximity-weighted
98
+ PageRank and trimmed to a token budget, so even a hub with hundreds of dependents returns its
99
+ most important ones:
100
+
101
+ ```text
102
+ $ veridge impact veridge/model.py . --budget 300
103
+ impact (dependents) of 'veridge/model.py' · 82 affected by
104
+ showing 20 · ~290/300 tokens
105
+ 0.0256 d2 veridge/query.py#impact [symbol]
106
+ 0.0239 d1 veridge/query.py [file]
107
+ 0.0226 d1 veridge/cli.py [file]
108
+ ...
109
+ ```
110
+
111
+ `d1`/`d2` is the propagation distance. Use `--diff` to seed from your working changes
112
+ (`git diff --name-only HEAD`) — "the blast-radius of what I'm about to commit" — or `--deps` to
113
+ invert the question (what the seed *relies on*).
114
+
115
+ Same idea over MCP:
116
+
117
+ ```bash
118
+ pip install -e ".[mcp]"
119
+ veridge-mcp . # serves project_map / focus / impact / find / neighbors / health over stdio
120
+ ```
121
+
122
+ ## How it works
123
+
124
+ 1. **Index** (read-only) — walk the project; classify each file; extract **symbols, imports
125
+ and calls** — Python via the stdlib `ast` (zero-deps core), and **JS/TS/Go/Rust/Java via
126
+ the optional `[treesitter]` extra**, both feeding one cross-language call graph; extract
127
+ **doc references** (markdown links, `[[wikilinks]]`, and **plain path mentions in prose** —
128
+ the part generic tools miss); pull out **decision ids** (`ADR-N`/`RFC-N`/`D-X-N`); add **git
129
+ sessions**. Everything lands in one typed graph with indexed adjacency, so queries are
130
+ O(degree), not O(edges).
131
+ 2. **Rank** — PageRank over the type-weighted, undirected graph; personalised PageRank for
132
+ task-aware queries.
133
+ 3. **Serve** — compact, contents-free rows, selected to fit a token budget. An assistant
134
+ queries; a human reads the digest.
135
+ 4. **Stay fresh** — a content-hash manifest diffs the tree on every `gate`, so drift is loud.
136
+
137
+ The graph never duplicates file contents; `.veridge/` is derived and always regenerable.
138
+
139
+ ## Design principles (please keep these intact)
140
+
141
+ **read-only · zero-deps core · low-token · ranked · deterministic.** Determinism matters: nodes
142
+ and edges are sorted on serialization, so `graph.json` is reproducible and diffs are clean.
143
+
144
+ ## Status & roadmap
145
+
146
+ Alpha (v0.1). Working today: the unified graph (files + symbols + areas + decisions +
147
+ sessions), **multi-language symbols** (Python in the core; JS/TS/Go/Rust/Java via the optional
148
+ `[treesitter]` extra), the PageRank ranking, the token-budgeted `focus` query, **`veridge
149
+ impact`** (deterministic blast-radius, incl. `--diff` mode), **deterministic comprehension**
150
+ (`map` layers, `veridge tour`, `veridge why`), the anti-drift gate, the CLI and the optional MCP
151
+ server. **Next up:** a human viewer, then watch-mode freshness. See the full plan in
152
+ [ROADMAP.md](ROADMAP.md).
153
+
154
+ ## Development
155
+
156
+ ```bash
157
+ pip install -e ".[dev,mcp,treesitter]"
158
+ ruff check veridge tests
159
+ pytest -q
160
+ ```
161
+
162
+ Contributions are welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) and the design principles
163
+ (read-only, zero-deps core, low-token, ranked, deterministic).
164
+
165
+ ## License
166
+
167
+ MIT.
@@ -0,0 +1,52 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "veridge"
7
+ dynamic = ["version"]
8
+ description = "The always-fresh, low-token map of a whole project — docs, code (to the symbol), decisions — ranked by what matters and served to AI and humans alike."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ keywords = ["knowledge-graph", "code-graph", "ai-agents", "context", "mcp", "pagerank", "repo-map"]
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Software Development :: Libraries",
20
+ "Typing :: Typed",
21
+ ]
22
+ dependencies = []
23
+
24
+ [project.optional-dependencies]
25
+ dev = ["pytest>=7", "ruff>=0.4"]
26
+ mcp = ["mcp>=1.2"]
27
+ # Optional: symbol-level parsing for languages beyond Python's stdlib `ast`.
28
+ treesitter = ["tree-sitter>=0.21", "tree-sitter-language-pack>=0.2"]
29
+
30
+ [project.scripts]
31
+ veridge = "veridge.cli:main"
32
+ veridge-mcp = "veridge.mcp_server:main"
33
+
34
+ [tool.setuptools]
35
+ packages = ["veridge"]
36
+
37
+ [tool.setuptools.dynamic]
38
+ version = { attr = "veridge.__version__" }
39
+
40
+ [tool.setuptools.package-data]
41
+ veridge = ["py.typed"]
42
+
43
+ [tool.ruff]
44
+ line-length = 100
45
+ target-version = "py310"
46
+
47
+ [tool.ruff.lint]
48
+ select = ["E", "F", "I", "UP", "B", "W"]
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
52
+ addopts = "-q"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,72 @@
1
+ from __future__ import annotations
2
+
3
+ from veridge import cli
4
+
5
+
6
+ def test_build_then_map(project, capsys):
7
+ assert cli.main(["build", str(project)]) == 0
8
+ assert (project / ".veridge" / "graph.json").is_file()
9
+ assert cli.main(["map", str(project)]) == 0
10
+ out = capsys.readouterr().out
11
+ assert "most important (PageRank)" in out
12
+
13
+
14
+ def test_focus_cli(project, capsys):
15
+ cli.main(["build", str(project)])
16
+ assert cli.main(["focus", "util", str(project), "--budget", "600"]) == 0
17
+ out = capsys.readouterr().out
18
+ assert "focus 'util'" in out
19
+ assert "src/util.py" in out
20
+
21
+
22
+ def test_impact_cli(project, capsys):
23
+ cli.main(["build", str(project)])
24
+ assert cli.main(["impact", "src/util.py", str(project), "--budget", "2000"]) == 0
25
+ out = capsys.readouterr().out
26
+ assert "impact (dependents)" in out
27
+ assert "src/app.py" in out
28
+
29
+
30
+ def test_impact_cli_deps_direction(project, capsys):
31
+ cli.main(["build", str(project)])
32
+ assert cli.main(["impact", "src/app.py#run", str(project), "--deps", "--json"]) == 0
33
+ out = capsys.readouterr().out
34
+ assert '"dependencies"' in out
35
+ assert "src/util.py#greet" in out
36
+
37
+
38
+ def test_gate_cli_red_on_broken(project, capsys):
39
+ cli.main(["build", str(project)])
40
+ rc = cli.main(["gate", str(project)])
41
+ out = capsys.readouterr().out
42
+ assert "broken references: 1" in out
43
+ assert rc == 1
44
+
45
+
46
+ def test_find_cli(project, capsys):
47
+ cli.main(["build", str(project)])
48
+ cli.main(["find", "greet", str(project)])
49
+ assert "src/util.py#greet" in capsys.readouterr().out
50
+
51
+
52
+ def test_map_json(project, capsys):
53
+ cli.main(["build", str(project)])
54
+ assert cli.main(["map", str(project), "--json"]) == 0
55
+ out = capsys.readouterr().out
56
+ assert '"most_important"' in out
57
+ assert '"by_layer"' in out
58
+
59
+
60
+ def test_why_cli(project, capsys):
61
+ cli.main(["build", str(project)])
62
+ assert cli.main(["why", "src/app.py", "src/util.py", str(project)]) == 0
63
+ out = capsys.readouterr().out
64
+ assert "imports" in out
65
+
66
+
67
+ def test_tour_cli(project, capsys):
68
+ cli.main(["build", str(project)])
69
+ assert cli.main(["tour", str(project), "--budget", "3000"]) == 0
70
+ out = capsys.readouterr().out
71
+ assert "tour of" in out
72
+ assert "src/util.py" in out
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from veridge import store
4
+ from veridge.freshness import build_manifest, diff_manifest, evaluate, index
5
+
6
+
7
+ def test_manifest_diff_detects_change(project):
8
+ m1 = build_manifest(project)
9
+ (project / "src" / "util.py").write_text("def greet(n):\n return n\n", encoding="utf-8")
10
+ (project / "new.md").write_text("new\n", encoding="utf-8")
11
+ m2 = build_manifest(project)
12
+ d = diff_manifest(m1, m2)
13
+ assert "new.md" in d["added"]
14
+ assert "src/util.py" in d["changed"]
15
+
16
+
17
+ def test_gate_reports_broken_and_orphans(graph, project):
18
+ m = build_manifest(project)
19
+ rep = evaluate(graph, m, m)
20
+ assert rep.stale_count == 0
21
+ assert ("README.md", "src/missing.py") in rep.broken
22
+ assert "config.toml" in rep.orphans
23
+ assert rep.ok is False # a broken ref keeps the gate red
24
+
25
+
26
+ def test_gate_ok_when_clean(project):
27
+ # A project with no broken refs and a matching manifest is green.
28
+ (project / "README.md").write_text("# clean\n", encoding="utf-8")
29
+ g, m = index(project)
30
+ rep = evaluate(g, m, m)
31
+ assert rep.broken == []
32
+ assert rep.ok is True
33
+
34
+
35
+ def test_store_roundtrip(graph, project):
36
+ _, m = index(project)
37
+ store.save(project, graph, m)
38
+ g2 = store.load_graph(project)
39
+ m2 = store.load_manifest(project)
40
+ assert g2 is not None and m2 is not None
41
+ assert set(g2.nodes) == set(graph.nodes)
42
+ assert g2.degree("src/util.py") == graph.degree("src/util.py")
43
+
44
+
45
+ def test_load_missing_is_none(tmp_path):
46
+ assert store.load_graph(tmp_path) is None
47
+ assert store.load_manifest(tmp_path) is None
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from veridge import query
4
+ from veridge.impact import dependencies, dependents, expand_seed
5
+
6
+
7
+ def test_expand_seed_lifts_both_ways(graph):
8
+ # A file seed pulls in the symbols it defines.
9
+ assert "src/util.py#greet" in expand_seed(graph, "src/util.py")
10
+ # A symbol seed pulls in its defining file.
11
+ assert "src/util.py" in expand_seed(graph, "src/util.py#greet")
12
+
13
+
14
+ def test_dependents_reverse_reachability(graph):
15
+ affected = dependents(graph, expand_seed(graph, "src/util.py"))
16
+ # Direct dependents of util / greet.
17
+ assert affected.get("src/app.py") == 1 # imports util
18
+ assert affected.get("docs/guide.md") == 1 # references util in prose
19
+ assert affected.get("src/app.py#run") == 1 # calls greet
20
+ # Transitive: callers of run are two hops out.
21
+ assert affected.get("src/app.py#App.start") == 2
22
+
23
+
24
+ def test_dependencies_forward(graph):
25
+ deps = dependencies(graph, expand_seed(graph, "src/app.py#run"))
26
+ assert "src/util.py#greet" in deps # run calls greet
27
+ assert "src/util.py" in deps # app.py imports util.py
28
+
29
+
30
+ def test_query_impact_ranked_and_budgeted(graph):
31
+ res = query.impact(graph, "src/util.py", budget_tokens=4000)
32
+ ids = {r["id"] for r in res["nodes"]}
33
+ assert res["total_affected"] >= 5
34
+ assert {"src/app.py", "src/app.py#run", "docs/guide.md"} <= ids
35
+ assert res["used_tokens"] <= 4000
36
+ # Every shown node carries its distance and rank.
37
+ assert all("dist" in r and "score" in r for r in res["nodes"])
38
+
39
+
40
+ def test_query_impact_hops_cap(graph):
41
+ near = query.impact(graph, "src/util.py", hops=1, budget_tokens=4000)
42
+ ids = {r["id"] for r in near["nodes"]}
43
+ assert "src/app.py#run" in ids # distance 1
44
+ assert "src/app.py#App.start" not in ids # distance 2, excluded by hops=1
45
+
46
+
47
+ def test_query_impact_budget_trims(graph):
48
+ small = query.impact(graph, "src/util.py", budget_tokens=15)
49
+ big = query.impact(graph, "src/util.py", budget_tokens=4000)
50
+ assert len(small["nodes"]) < len(big["nodes"])
51
+
52
+
53
+ def test_query_impact_leaf_is_safe(graph):
54
+ # Nothing points at the README, so changing it has no dependents.
55
+ res = query.impact(graph, "README.md")
56
+ assert res["total_affected"] == 0
57
+ assert "safe to change" in res["note"]
58
+
59
+
60
+ def test_query_impact_seed_by_name(graph):
61
+ res = query.impact(graph, "greet", budget_tokens=4000)
62
+ ids = {r["id"] for r in res["nodes"]}
63
+ assert "src/app.py#run" in ids # resolved 'greet' -> its callers
64
+
65
+
66
+ def test_query_impact_explicit_seed_ids_diff_mode(graph):
67
+ # Simulates --diff: seeds handed in directly (e.g. from `git diff --name-only`).
68
+ res = query.impact(graph, "diff", seed_ids=["src/util.py"], budget_tokens=4000)
69
+ assert res["total_affected"] >= 5
70
+ assert any(r["id"] == "src/app.py" for r in res["nodes"])