sqlprism 1.1.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/workflows/ci.yml +1 -1
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/workflows/docs.yml +2 -2
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.gitignore +1 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/CLAUDE.md +7 -4
- {sqlprism-1.1.0 → sqlprism-1.2.1}/PKG-INFO +17 -6
- {sqlprism-1.1.0 → sqlprism-1.2.1}/README.md +15 -4
- sqlprism-1.2.1/docs/api/conventions.md +23 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/architecture/schema.md +54 -1
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/cli.md +31 -1
- sqlprism-1.2.1/docs/guide/conventions.md +82 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/mcp-tools.md +52 -1
- {sqlprism-1.1.0 → sqlprism-1.2.1}/mkdocs.yml +2 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/pyproject.toml +2 -2
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/cli.py +162 -2
- sqlprism-1.2.1/src/sqlprism/core/conventions.py +1590 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/graph.py +1541 -230
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/indexer.py +322 -54
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/mcp_tools.py +318 -115
- sqlprism-1.2.1/src/sqlprism/core/naming.py +32 -0
- sqlprism-1.2.1/src/sqlprism/languages/dbt.py +1117 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/languages/sql.py +150 -22
- sqlprism-1.2.1/src/sqlprism/languages/sqlmesh.py +645 -0
- sqlprism-1.2.1/tests/test_conventions.py +2847 -0
- sqlprism-1.2.1/tests/test_federation.py +488 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_graph.py +346 -27
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_graph_tools.py +246 -0
- sqlprism-1.2.1/tests/test_indexer.py +3679 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_mcp_tools.py +91 -10
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_renderers.py +1047 -7
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_sql_parser.py +369 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_types.py +1 -1
- {sqlprism-1.1.0 → sqlprism-1.2.1}/uv.lock +36 -6
- sqlprism-1.1.0/src/sqlprism/languages/dbt.py +0 -372
- sqlprism-1.1.0/src/sqlprism/languages/sqlmesh.py +0 -324
- sqlprism-1.1.0/tests/test_federation.py +0 -144
- sqlprism-1.1.0/tests/test_indexer.py +0 -1891
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/creating-branches-and-prs/PR-TEMPLATE.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/creating-branches-and-prs/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/implementing-issues/AGENT-INSTRUCTIONS.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/implementing-issues/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/implementing-issues/TASK-FORMAT.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/managing-project-releases/BDD-TEMPLATE.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/managing-project-releases/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/reviewing-prs/COMMENT-TEMPLATE.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/reviewing-prs/REVIEWERS.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/reviewing-prs/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/writing-graph-queries/EXAMPLES.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/writing-graph-queries/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/dependabot.yml +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/workflows/publish.yml +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/LICENSE +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/dbt.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/graph.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/indexer.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/mcp-tools.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/sql-parser.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/sqlmesh.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/types.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/architecture/overview.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/getting-started/configuration.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/getting-started/installation.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/dbt.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/sqlmesh.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/index.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/languages/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/languages/utils.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/types.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_cli.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_config.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_languages.py +0 -0
|
@@ -37,10 +37,10 @@ jobs:
|
|
|
37
37
|
run: uv run mkdocs build
|
|
38
38
|
|
|
39
39
|
- name: Upload Pages artifact
|
|
40
|
-
uses: actions/upload-pages-artifact@
|
|
40
|
+
uses: actions/upload-pages-artifact@v5
|
|
41
41
|
with:
|
|
42
42
|
path: site/
|
|
43
43
|
|
|
44
44
|
- name: Deploy to GitHub Pages
|
|
45
45
|
id: deployment
|
|
46
|
-
uses: actions/deploy-pages@
|
|
46
|
+
uses: actions/deploy-pages@v5
|
|
@@ -18,17 +18,20 @@ src/sqlprism/
|
|
|
18
18
|
core/
|
|
19
19
|
graph.py — DuckDB storage layer (MVCC, repo_type tracking)
|
|
20
20
|
indexer.py — Orchestrates parsing + indexing; file-level reindex with repo-type dispatch
|
|
21
|
-
mcp_tools.py — MCP server tools (
|
|
21
|
+
mcp_tools.py — MCP server tools (24 tools, non-blocking reindex, per-repo debounce)
|
|
22
|
+
conventions.py — Convention inference engine (layers, naming, references, tags, overrides)
|
|
22
23
|
languages/
|
|
23
24
|
sql.py — sqlglot-based SQL parser
|
|
24
25
|
dbt.py — dbt renderer (full project + selective render_models)
|
|
25
26
|
sqlmesh.py — sqlmesh renderer (full project + selective render_models)
|
|
26
27
|
utils.py — Shared venv/env utilities
|
|
27
28
|
types.py — Data classes (ParseResult, NodeResult, etc.)
|
|
28
|
-
cli.py — Click CLI (serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, status, init)
|
|
29
|
+
cli.py — Click CLI (serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init)
|
|
29
30
|
tests/
|
|
30
|
-
test_indexer.py
|
|
31
|
-
test_renderers.py
|
|
31
|
+
test_indexer.py — Indexer + integration tests
|
|
32
|
+
test_renderers.py — dbt/sqlmesh renderer tests
|
|
33
|
+
test_conventions.py — Convention engine + placement + tags tests
|
|
34
|
+
test_sql_parser.py — SQL parser, lineage, and dialect tests
|
|
32
35
|
```
|
|
33
36
|
|
|
34
37
|
## Conventions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlprism
|
|
3
|
-
Version: 1.1
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: SQL codebase indexer with column-level lineage, impact analysis, and MCP server support
|
|
5
5
|
Project-URL: Homepage, https://github.com/darkcofy/sqlprism
|
|
6
6
|
Project-URL: Documentation, https://darkcofy.github.io/sqlprism/
|
|
@@ -23,7 +23,7 @@ Requires-Dist: duckdb>=1.5.0
|
|
|
23
23
|
Requires-Dist: mcp[cli]>=1.0.0
|
|
24
24
|
Requires-Dist: pydantic>=2.0.0
|
|
25
25
|
Requires-Dist: pyyaml>=6.0
|
|
26
|
-
Requires-Dist: sqlglot<30
|
|
26
|
+
Requires-Dist: sqlglot[c]<31,>=30.0.0
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
28
|
|
|
29
29
|
# SQLPrism
|
|
@@ -74,6 +74,8 @@ uv run sqlprism reindex # index plain SQL repos
|
|
|
74
74
|
|
|
75
75
|
For [dbt](https://www.getdbt.com/) and [SQLMesh](https://sqlmesh.com/) projects, use `reindex-dbt` and `reindex-sqlmesh` respectively. See the [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/) for full options.
|
|
76
76
|
|
|
77
|
+
> **Prerequisite:** dbt and SQLMesh are **not** dependencies of sqlprism. The renderers shell out to `dbt compile` / `sqlmesh` inside the target project's own virtualenv (via `uv run` by default). Install the renderer in that project — for example `uv add dbt-core dbt-<adapter>` or `uv add sqlmesh` — before running `reindex-dbt` / `reindex-sqlmesh`. If the renderer is missing, sqlprism will raise a clear error pointing at the project directory.
|
|
78
|
+
|
|
77
79
|
### 3. Connect your MCP client
|
|
78
80
|
|
|
79
81
|
**Claude Code:**
|
|
@@ -225,6 +227,9 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
225
227
|
| `sqlprism reindex-dbt` | Compile and index a [dbt](https://www.getdbt.com/) project. |
|
|
226
228
|
| `sqlprism reindex-sqlmesh` | Render and index a [SQLMesh](https://sqlmesh.com/) project. |
|
|
227
229
|
| `sqlprism serve` | Start the MCP server (stdio or HTTP). |
|
|
230
|
+
| `sqlprism conventions init` | Generate `sqlprism.conventions.yml` from inferred conventions. |
|
|
231
|
+
| `sqlprism conventions refresh` | Re-run convention inference after reindex. |
|
|
232
|
+
| `sqlprism conventions diff` | Show what changed since last `--init`. |
|
|
228
233
|
| `sqlprism status` | Show index status. |
|
|
229
234
|
| `sqlprism query search` | Find entities by name pattern. |
|
|
230
235
|
| `sqlprism query references` | Find inbound/outbound dependencies. |
|
|
@@ -236,7 +241,7 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
236
241
|
|
|
237
242
|
Full reference: [MCP tools guide](https://darkcofy.github.io/sqlprism/guide/mcp-tools/)
|
|
238
243
|
|
|
239
|
-
When running as an MCP server (`sqlprism serve`),
|
|
244
|
+
When running as an MCP server (`sqlprism serve`), 24 tools are exposed:
|
|
240
245
|
|
|
241
246
|
| Tool | Description |
|
|
242
247
|
|---|---|
|
|
@@ -258,6 +263,11 @@ When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
|
|
|
258
263
|
| `reindex_files` | Fast on-save reindex with per-repo debounce. |
|
|
259
264
|
| `reindex_dbt` | Background dbt compile + index. |
|
|
260
265
|
| `reindex_sqlmesh` | Background SQLMesh render + index. |
|
|
266
|
+
| `get_conventions` | Inferred project conventions — naming, references, columns. |
|
|
267
|
+
| `find_similar_models` | Find existing models similar to what you're building. |
|
|
268
|
+
| `suggest_placement` | Recommend where to place a new model based on references. |
|
|
269
|
+
| `search_by_tag` | Find models by semantic tag (business domain concept). |
|
|
270
|
+
| `list_tags` | List all semantic tags with model counts and confidence. |
|
|
261
271
|
| `index_status` | Index stats, cross-repo edges, and name collisions. |
|
|
262
272
|
|
|
263
273
|
## Architecture
|
|
@@ -274,8 +284,9 @@ src/sqlprism/
|
|
|
274
284
|
core/
|
|
275
285
|
graph.py <- DuckDB storage layer (MVCC), queries, snippets, repo_type tracking
|
|
276
286
|
indexer.py <- Orchestrator: scan -> checksum -> parse -> store; file-level reindex with repo-type dispatch
|
|
277
|
-
mcp_tools.py <- FastMCP tool definitions (
|
|
278
|
-
|
|
287
|
+
mcp_tools.py <- FastMCP tool definitions (24 tools, non-blocking reindex, per-repo debounce)
|
|
288
|
+
conventions.py <- Convention inference engine: layers, naming, references, tags, overrides
|
|
289
|
+
cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init
|
|
279
290
|
```
|
|
280
291
|
|
|
281
292
|
The SQL parser extracts:
|
|
@@ -294,7 +305,7 @@ SQLPrism optionally integrates with [DuckPGQ](https://github.com/cwida/duckpgq)
|
|
|
294
305
|
|
|
295
306
|
```bash
|
|
296
307
|
uv sync
|
|
297
|
-
uv run pytest # run tests (
|
|
308
|
+
uv run pytest # run tests (630+ tests)
|
|
298
309
|
uv run pytest --cov=sqlprism # run with coverage report
|
|
299
310
|
uv run pytest --cov=sqlprism --cov-report=html:coverage_html # HTML report
|
|
300
311
|
```
|
|
@@ -46,6 +46,8 @@ uv run sqlprism reindex # index plain SQL repos
|
|
|
46
46
|
|
|
47
47
|
For [dbt](https://www.getdbt.com/) and [SQLMesh](https://sqlmesh.com/) projects, use `reindex-dbt` and `reindex-sqlmesh` respectively. See the [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/) for full options.
|
|
48
48
|
|
|
49
|
+
> **Prerequisite:** dbt and SQLMesh are **not** dependencies of sqlprism. The renderers shell out to `dbt compile` / `sqlmesh` inside the target project's own virtualenv (via `uv run` by default). Install the renderer in that project — for example `uv add dbt-core dbt-<adapter>` or `uv add sqlmesh` — before running `reindex-dbt` / `reindex-sqlmesh`. If the renderer is missing, sqlprism will raise a clear error pointing at the project directory.
|
|
50
|
+
|
|
49
51
|
### 3. Connect your MCP client
|
|
50
52
|
|
|
51
53
|
**Claude Code:**
|
|
@@ -197,6 +199,9 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
197
199
|
| `sqlprism reindex-dbt` | Compile and index a [dbt](https://www.getdbt.com/) project. |
|
|
198
200
|
| `sqlprism reindex-sqlmesh` | Render and index a [SQLMesh](https://sqlmesh.com/) project. |
|
|
199
201
|
| `sqlprism serve` | Start the MCP server (stdio or HTTP). |
|
|
202
|
+
| `sqlprism conventions init` | Generate `sqlprism.conventions.yml` from inferred conventions. |
|
|
203
|
+
| `sqlprism conventions refresh` | Re-run convention inference after reindex. |
|
|
204
|
+
| `sqlprism conventions diff` | Show what changed since last `--init`. |
|
|
200
205
|
| `sqlprism status` | Show index status. |
|
|
201
206
|
| `sqlprism query search` | Find entities by name pattern. |
|
|
202
207
|
| `sqlprism query references` | Find inbound/outbound dependencies. |
|
|
@@ -208,7 +213,7 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
208
213
|
|
|
209
214
|
Full reference: [MCP tools guide](https://darkcofy.github.io/sqlprism/guide/mcp-tools/)
|
|
210
215
|
|
|
211
|
-
When running as an MCP server (`sqlprism serve`),
|
|
216
|
+
When running as an MCP server (`sqlprism serve`), 24 tools are exposed:
|
|
212
217
|
|
|
213
218
|
| Tool | Description |
|
|
214
219
|
|---|---|
|
|
@@ -230,6 +235,11 @@ When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
|
|
|
230
235
|
| `reindex_files` | Fast on-save reindex with per-repo debounce. |
|
|
231
236
|
| `reindex_dbt` | Background dbt compile + index. |
|
|
232
237
|
| `reindex_sqlmesh` | Background SQLMesh render + index. |
|
|
238
|
+
| `get_conventions` | Inferred project conventions — naming, references, columns. |
|
|
239
|
+
| `find_similar_models` | Find existing models similar to what you're building. |
|
|
240
|
+
| `suggest_placement` | Recommend where to place a new model based on references. |
|
|
241
|
+
| `search_by_tag` | Find models by semantic tag (business domain concept). |
|
|
242
|
+
| `list_tags` | List all semantic tags with model counts and confidence. |
|
|
233
243
|
| `index_status` | Index stats, cross-repo edges, and name collisions. |
|
|
234
244
|
|
|
235
245
|
## Architecture
|
|
@@ -246,8 +256,9 @@ src/sqlprism/
|
|
|
246
256
|
core/
|
|
247
257
|
graph.py <- DuckDB storage layer (MVCC), queries, snippets, repo_type tracking
|
|
248
258
|
indexer.py <- Orchestrator: scan -> checksum -> parse -> store; file-level reindex with repo-type dispatch
|
|
249
|
-
mcp_tools.py <- FastMCP tool definitions (
|
|
250
|
-
|
|
259
|
+
mcp_tools.py <- FastMCP tool definitions (24 tools, non-blocking reindex, per-repo debounce)
|
|
260
|
+
conventions.py <- Convention inference engine: layers, naming, references, tags, overrides
|
|
261
|
+
cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init
|
|
251
262
|
```
|
|
252
263
|
|
|
253
264
|
The SQL parser extracts:
|
|
@@ -266,7 +277,7 @@ SQLPrism optionally integrates with [DuckPGQ](https://github.com/cwida/duckpgq)
|
|
|
266
277
|
|
|
267
278
|
```bash
|
|
268
279
|
uv sync
|
|
269
|
-
uv run pytest # run tests (
|
|
280
|
+
uv run pytest # run tests (630+ tests)
|
|
270
281
|
uv run pytest --cov=sqlprism # run with coverage report
|
|
271
282
|
uv run pytest --cov=sqlprism --cov-report=html:coverage_html # HTML report
|
|
272
283
|
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Convention Engine
|
|
2
|
+
|
|
3
|
+
The convention inference engine detects project patterns from the indexed SQL graph — naming conventions, layer references, required columns, column style, and semantic tags.
|
|
4
|
+
|
|
5
|
+
## ConventionEngine
|
|
6
|
+
|
|
7
|
+
::: sqlprism.core.conventions.ConventionEngine
|
|
8
|
+
|
|
9
|
+
## Data Classes
|
|
10
|
+
|
|
11
|
+
::: sqlprism.core.conventions.Layer
|
|
12
|
+
|
|
13
|
+
::: sqlprism.core.conventions.NamingPattern
|
|
14
|
+
|
|
15
|
+
::: sqlprism.core.conventions.ReferenceRule
|
|
16
|
+
|
|
17
|
+
::: sqlprism.core.conventions.RequiredColumn
|
|
18
|
+
|
|
19
|
+
::: sqlprism.core.conventions.ColumnStyle
|
|
20
|
+
|
|
21
|
+
::: sqlprism.core.conventions.TagAssignment
|
|
22
|
+
|
|
23
|
+
::: sqlprism.core.conventions.Cluster
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
The index is stored in a single DuckDB file (default: `~/.sqlprism/graph.duckdb`). Current schema version: **1.0**.
|
|
4
4
|
|
|
5
|
-
## Tables (
|
|
5
|
+
## Tables (9)
|
|
6
6
|
|
|
7
7
|
### `repos`
|
|
8
8
|
|
|
@@ -99,6 +99,51 @@ End-to-end column lineage chains.
|
|
|
99
99
|
|
|
100
100
|
A lineage chain traces one output column back to its source. Multiple chains (different `chain_index` values) exist when a column has multiple source paths (e.g. COALESCE of two columns).
|
|
101
101
|
|
|
102
|
+
### `columns`
|
|
103
|
+
|
|
104
|
+
Column definitions extracted from DDL or schema files.
|
|
105
|
+
|
|
106
|
+
| Column | Type | Description |
|
|
107
|
+
|---|---|---|
|
|
108
|
+
| `column_id` | INTEGER PK | Auto-increment ID. |
|
|
109
|
+
| `node_id` | INTEGER | FK to `nodes`. |
|
|
110
|
+
| `column_name` | TEXT | Column name. |
|
|
111
|
+
| `data_type` | TEXT or NULL | Column data type (e.g. `INTEGER`, `TEXT`). |
|
|
112
|
+
| `position` | INTEGER | Column ordinal position in the table. |
|
|
113
|
+
| `source` | TEXT | How the column was discovered: `definition`, `usage`, `lineage`. |
|
|
114
|
+
|
|
115
|
+
### `conventions`
|
|
116
|
+
|
|
117
|
+
Inferred or overridden project conventions per layer.
|
|
118
|
+
|
|
119
|
+
| Column | Type | Description |
|
|
120
|
+
|---|---|---|
|
|
121
|
+
| `convention_id` | INTEGER PK | Auto-increment ID. |
|
|
122
|
+
| `repo_id` | INTEGER | FK to `repos`. |
|
|
123
|
+
| `layer` | TEXT | Layer name (e.g. `staging`, `marts`). |
|
|
124
|
+
| `convention_type` | TEXT | `naming`, `references`, `required_columns`, or `column_style`. |
|
|
125
|
+
| `payload` | JSON | Convention data (pattern, allowed_targets, etc.). |
|
|
126
|
+
| `confidence` | FLOAT | Confidence score (0.0-1.0). |
|
|
127
|
+
| `source` | TEXT | `inferred` or `override`. |
|
|
128
|
+
| `model_count` | INTEGER | Number of models in this layer when inferred. |
|
|
129
|
+
|
|
130
|
+
Unique constraint: `(repo_id, layer, convention_type)`.
|
|
131
|
+
|
|
132
|
+
### `semantic_tags`
|
|
133
|
+
|
|
134
|
+
Semantic tags assigned to models by clustering or explicit override.
|
|
135
|
+
|
|
136
|
+
| Column | Type | Description |
|
|
137
|
+
|---|---|---|
|
|
138
|
+
| `tag_id` | INTEGER PK | Auto-increment ID. |
|
|
139
|
+
| `repo_id` | INTEGER | FK to `repos`. |
|
|
140
|
+
| `tag_name` | TEXT | Tag name (e.g. `customer`, `order`, `revenue`). |
|
|
141
|
+
| `node_id` | INTEGER | FK to `nodes`. |
|
|
142
|
+
| `confidence` | FLOAT | Confidence score (0.0-1.0). |
|
|
143
|
+
| `source` | TEXT | `inferred`, `anchor`, or `explicit`. |
|
|
144
|
+
|
|
145
|
+
Unique constraint: `(repo_id, tag_name, node_id)`.
|
|
146
|
+
|
|
102
147
|
## Indexes
|
|
103
148
|
|
|
104
149
|
```sql
|
|
@@ -117,6 +162,11 @@ CREATE INDEX idx_col_usage_type ON column_usage(usage_type);
|
|
|
117
162
|
CREATE INDEX idx_lineage_output ON column_lineage(output_node, output_column);
|
|
118
163
|
CREATE INDEX idx_lineage_hop ON column_lineage(hop_table, hop_column);
|
|
119
164
|
CREATE INDEX idx_lineage_file ON column_lineage(file_id);
|
|
165
|
+
CREATE INDEX idx_conventions_repo ON conventions(repo_id);
|
|
166
|
+
CREATE INDEX idx_conventions_type ON conventions(convention_type);
|
|
167
|
+
CREATE INDEX idx_tags_name ON semantic_tags(tag_name);
|
|
168
|
+
CREATE INDEX idx_tags_node ON semantic_tags(node_id);
|
|
169
|
+
CREATE INDEX idx_tags_repo ON semantic_tags(repo_id);
|
|
120
170
|
```
|
|
121
171
|
|
|
122
172
|
## Entity Relationship
|
|
@@ -130,4 +180,7 @@ repos 1──* files 1──* nodes
|
|
|
130
180
|
|
|
131
181
|
files 1──* column_usage *──1 nodes
|
|
132
182
|
files 1──* column_lineage
|
|
183
|
+
nodes 1──* columns
|
|
184
|
+
repos 1──* conventions
|
|
185
|
+
repos 1──* semantic_tags *──1 nodes
|
|
133
186
|
```
|
|
@@ -34,7 +34,7 @@ sqlprism status [--config PATH] [--db PATH]
|
|
|
34
34
|
|
|
35
35
|
### `sqlprism serve`
|
|
36
36
|
|
|
37
|
-
Starts the MCP server, exposing all
|
|
37
|
+
Starts the MCP server, exposing all 24 tools to any MCP client.
|
|
38
38
|
|
|
39
39
|
```bash
|
|
40
40
|
sqlprism serve [--config PATH] [--db PATH] [--transport stdio|streamable-http] [--port 8000]
|
|
@@ -130,6 +130,36 @@ sqlprism reindex-dbt \
|
|
|
130
130
|
| `--profiles-dir` | No | Path to directory containing `profiles.yml`. Defaults to the project directory. |
|
|
131
131
|
| `--dbt-command` | No | Base command to invoke dbt. `compile` is appended automatically. Default: `uv run dbt`. Use `dbt` if globally installed, or `uvx --with dbt-starrocks dbt` for ephemeral install. |
|
|
132
132
|
|
|
133
|
+
## Convention Commands
|
|
134
|
+
|
|
135
|
+
### `sqlprism conventions init`
|
|
136
|
+
|
|
137
|
+
Generate a `sqlprism.conventions.yml` file from inferred conventions. Includes confidence scores as comments.
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
sqlprism conventions init [--config PATH] [--db PATH] [--force]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
| Parameter | Description |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `--force` | Overwrite existing conventions file. Without this flag, init refuses to overwrite. |
|
|
146
|
+
|
|
147
|
+
### `sqlprism conventions refresh`
|
|
148
|
+
|
|
149
|
+
Re-run convention inference after reindex. Preserves explicit overrides (source: 'override').
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
sqlprism conventions refresh [--config PATH] [--db PATH]
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### `sqlprism conventions diff`
|
|
156
|
+
|
|
157
|
+
Show what conventions changed since the last `init`. Compares the YAML file against current inferred conventions.
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
sqlprism conventions diff [--config PATH] [--db PATH]
|
|
161
|
+
```
|
|
162
|
+
|
|
133
163
|
## Query Commands
|
|
134
164
|
|
|
135
165
|
All query commands output JSON to stdout. They share common parameters:
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Conventions
|
|
2
|
+
|
|
3
|
+
The conventions engine automatically infers project patterns from the indexed SQL graph -- naming conventions, layer references, required columns, column naming style, and semantic tags. It runs after every reindex, giving AI agents (and developers) a machine-readable description of how the project is structured.
|
|
4
|
+
|
|
5
|
+
## How It Works
|
|
6
|
+
|
|
7
|
+
- Convention inference runs automatically after reindex.
|
|
8
|
+
- Detects layers from directory structure (`models/staging/`, `models/marts/`, etc.).
|
|
9
|
+
- Infers naming patterns per layer (e.g. `stg_{source}_{entity}`).
|
|
10
|
+
- Infers reference rules (which layers reference which).
|
|
11
|
+
- Detects required columns (columns appearing in >70% of models per layer).
|
|
12
|
+
- Detects column naming style (`snake_case`, `camelCase`, etc.).
|
|
13
|
+
- Assigns semantic tags via structural clustering (no ML, deterministic).
|
|
14
|
+
|
|
15
|
+
## Confidence Scores
|
|
16
|
+
|
|
17
|
+
All conventions have a confidence score (0.0--1.0):
|
|
18
|
+
|
|
19
|
+
| Range | Meaning |
|
|
20
|
+
|---|---|
|
|
21
|
+
| **>0.9** | High confidence. Follow this convention. |
|
|
22
|
+
| **0.7--0.9** | Moderate. Likely correct but worth verifying. |
|
|
23
|
+
| **<0.7** | Low confidence. Consider an explicit override. |
|
|
24
|
+
|
|
25
|
+
## Overrides
|
|
26
|
+
|
|
27
|
+
You can override inferred conventions with a YAML file:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
sqlprism conventions init # generates sqlprism.conventions.yml
|
|
31
|
+
# edit the file to add explicit overrides
|
|
32
|
+
sqlprism conventions refresh # re-runs inference, preserving overrides
|
|
33
|
+
sqlprism conventions diff # shows changes since last init
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Overrides take precedence over inferred conventions (`source: 'override'` vs `source: 'inferred'`).
|
|
37
|
+
|
|
38
|
+
## MCP Tools
|
|
39
|
+
|
|
40
|
+
Five MCP tools expose conventions to AI agents:
|
|
41
|
+
|
|
42
|
+
| Tool | Description |
|
|
43
|
+
|---|---|
|
|
44
|
+
| `get_conventions` | Naming rules, reference rules, required columns per layer. |
|
|
45
|
+
| `find_similar_models` | Find existing models similar to what you're building. |
|
|
46
|
+
| `suggest_placement` | Recommend where to place a new model based on references. |
|
|
47
|
+
| `search_by_tag` | Find models by semantic tag (business domain concept). |
|
|
48
|
+
| `list_tags` | List all semantic tags with model counts and confidence. |
|
|
49
|
+
|
|
50
|
+
See [MCP Tools](mcp-tools.md) for parameter details.
|
|
51
|
+
|
|
52
|
+
## Semantic Tags
|
|
53
|
+
|
|
54
|
+
Tags are assigned by structural clustering -- models that share many upstream references get grouped and auto-labeled based on common name tokens. Tags represent business domain concepts (e.g. "customer", "order", "revenue").
|
|
55
|
+
|
|
56
|
+
Tag sources:
|
|
57
|
+
|
|
58
|
+
| Source | Description |
|
|
59
|
+
|---|---|
|
|
60
|
+
| **inferred** | Automatically assigned via clustering. |
|
|
61
|
+
| **anchor** | Manually specified in the YAML override as cluster anchors. |
|
|
62
|
+
| **explicit** | Manually assigned to specific models in the YAML override. |
|
|
63
|
+
|
|
64
|
+
## Example Workflow
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# 1. Index your project
|
|
68
|
+
sqlprism reindex
|
|
69
|
+
|
|
70
|
+
# 2. Generate conventions file
|
|
71
|
+
sqlprism conventions init
|
|
72
|
+
# -> creates sqlprism.conventions.yml with inferred conventions
|
|
73
|
+
|
|
74
|
+
# 3. Review and adjust
|
|
75
|
+
# Edit the YAML to fix any conventions the engine got wrong
|
|
76
|
+
|
|
77
|
+
# 4. Re-run inference (preserves your overrides)
|
|
78
|
+
sqlprism conventions refresh
|
|
79
|
+
|
|
80
|
+
# 5. Check what changed
|
|
81
|
+
sqlprism conventions diff
|
|
82
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MCP Tools
|
|
2
2
|
|
|
3
|
-
When running as an MCP server (`sqlprism serve`),
|
|
3
|
+
When running as an MCP server (`sqlprism serve`), 24 tools are exposed. Any MCP client (Claude Code, Claude Desktop, Cursor, Continue.dev) can call these.
|
|
4
4
|
|
|
5
5
|
## Query Tools
|
|
6
6
|
|
|
@@ -237,6 +237,57 @@ Analyze the downstream impact of proposed column changes BEFORE modifying code.
|
|
|
237
237
|
| `changes` | list | Yes | | List of column changes (remove_column, rename_column, add_column). |
|
|
238
238
|
| `repo` | string | No | | Filter by repo name. |
|
|
239
239
|
|
|
240
|
+
## Convention Tools
|
|
241
|
+
|
|
242
|
+
### `get_conventions`
|
|
243
|
+
|
|
244
|
+
Get naming conventions, reference rules, and required columns for a layer. Returns inferred conventions with confidence scores.
|
|
245
|
+
|
|
246
|
+
| Parameter | Type | Required | Default | Description |
|
|
247
|
+
|---|---|---|---|---|
|
|
248
|
+
| `layer` | string | No | | Layer name (e.g. 'staging', 'marts'). Omit for all layers. |
|
|
249
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
250
|
+
|
|
251
|
+
### `search_by_tag`
|
|
252
|
+
|
|
253
|
+
Find models tagged with a business domain concept. Returns models ranked by confidence.
|
|
254
|
+
|
|
255
|
+
| Parameter | Type | Required | Default | Description |
|
|
256
|
+
|---|---|---|---|---|
|
|
257
|
+
| `tag` | string | Yes | | Tag name to search for (e.g. 'customer', 'order'). |
|
|
258
|
+
| `min_confidence` | float | No | | Minimum confidence threshold (0.0-1.0). |
|
|
259
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
260
|
+
|
|
261
|
+
### `list_tags`
|
|
262
|
+
|
|
263
|
+
List all semantic tags with model counts and average confidence. Provides the project's business domain vocabulary.
|
|
264
|
+
|
|
265
|
+
| Parameter | Type | Required | Default | Description |
|
|
266
|
+
|---|---|---|---|---|
|
|
267
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
268
|
+
|
|
269
|
+
### `find_similar_models`
|
|
270
|
+
|
|
271
|
+
Find existing models similar to what you're building. Compares reference overlap, column overlap, and layer placement.
|
|
272
|
+
|
|
273
|
+
| Parameter | Type | Required | Default | Description |
|
|
274
|
+
|---|---|---|---|---|
|
|
275
|
+
| `references` | list[string] | No | | Tables this model will reference. |
|
|
276
|
+
| `output_columns` | list[string] | No | | Columns this model will output. |
|
|
277
|
+
| `model` | string | No | | Existing model name to find similar models to. |
|
|
278
|
+
| `limit` | int | No | 5 | Max results (1-50). |
|
|
279
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
280
|
+
|
|
281
|
+
### `suggest_placement`
|
|
282
|
+
|
|
283
|
+
Recommend where to place a new model based on its references. Uses inferred layer flow rules and naming conventions.
|
|
284
|
+
|
|
285
|
+
| Parameter | Type | Required | Default | Description |
|
|
286
|
+
|---|---|---|---|---|
|
|
287
|
+
| `references` | list[string] | Yes | | Tables this new model will reference. |
|
|
288
|
+
| `name` | string | No | | Proposed model name (for naming validation). |
|
|
289
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
290
|
+
|
|
240
291
|
## DuckPGQ Tools
|
|
241
292
|
|
|
242
293
|
The following tools require the [DuckPGQ](https://github.com/cwida/duckpgq) extension: `find_path`, `find_critical_models`, `find_subgraphs`. The extension is installed automatically on first use. Tools that don't require DuckPGQ (`detect_cycles`, `find_bottlenecks`, `check_impact`) use plain SQL and work everywhere.
|
|
@@ -59,6 +59,7 @@ nav:
|
|
|
59
59
|
- User Guide:
|
|
60
60
|
- CLI Reference: guide/cli.md
|
|
61
61
|
- MCP Tools: guide/mcp-tools.md
|
|
62
|
+
- Conventions: guide/conventions.md
|
|
62
63
|
- SQLMesh Integration: guide/sqlmesh.md
|
|
63
64
|
- dbt Integration: guide/dbt.md
|
|
64
65
|
- Architecture:
|
|
@@ -69,6 +70,7 @@ nav:
|
|
|
69
70
|
- GraphDB (Storage): api/graph.md
|
|
70
71
|
- Indexer (Orchestrator): api/indexer.md
|
|
71
72
|
- SQL Parser: api/sql-parser.md
|
|
73
|
+
- Convention Engine: api/conventions.md
|
|
72
74
|
- SQLMesh Renderer: api/sqlmesh.md
|
|
73
75
|
- dbt Renderer: api/dbt.md
|
|
74
76
|
- MCP Server: api/mcp-tools.md
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlprism"
|
|
7
|
-
version = "1.1
|
|
7
|
+
version = "1.2.1"
|
|
8
8
|
description = "SQL codebase indexer with column-level lineage, impact analysis, and MCP server support"
|
|
9
9
|
license = "Apache-2.0"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -26,7 +26,7 @@ classifiers = [
|
|
|
26
26
|
dependencies = [
|
|
27
27
|
"mcp[cli]>=1.0.0",
|
|
28
28
|
"duckdb>=1.5.0",
|
|
29
|
-
"sqlglot>=
|
|
29
|
+
"sqlglot[c]>=30.0.0,<31",
|
|
30
30
|
"pydantic>=2.0.0",
|
|
31
31
|
"click>=8.0.0",
|
|
32
32
|
"pyyaml>=6.0",
|