sqlprism 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.github/workflows/docs.yml +2 -2
- {sqlprism-1.1.0 → sqlprism-1.2.0}/CLAUDE.md +7 -4
- {sqlprism-1.1.0 → sqlprism-1.2.0}/PKG-INFO +15 -6
- {sqlprism-1.1.0 → sqlprism-1.2.0}/README.md +13 -4
- sqlprism-1.2.0/docs/api/conventions.md +23 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/architecture/schema.md +54 -1
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/guide/cli.md +31 -1
- sqlprism-1.2.0/docs/guide/conventions.md +82 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/guide/mcp-tools.md +52 -1
- {sqlprism-1.1.0 → sqlprism-1.2.0}/mkdocs.yml +2 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/pyproject.toml +2 -2
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/cli.py +155 -1
- sqlprism-1.2.0/src/sqlprism/core/conventions.py +1590 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/core/graph.py +923 -4
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/core/indexer.py +73 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/core/mcp_tools.py +318 -115
- sqlprism-1.2.0/tests/test_conventions.py +2847 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_graph.py +3 -3
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_indexer.py +201 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_mcp_tools.py +11 -6
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_renderers.py +2 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_sql_parser.py +8 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_types.py +1 -1
- {sqlprism-1.1.0 → sqlprism-1.2.0}/uv.lock +36 -6
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/creating-branches-and-prs/PR-TEMPLATE.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/creating-branches-and-prs/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/implementing-issues/AGENT-INSTRUCTIONS.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/implementing-issues/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/implementing-issues/TASK-FORMAT.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/managing-project-releases/BDD-TEMPLATE.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/managing-project-releases/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/reviewing-prs/COMMENT-TEMPLATE.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/reviewing-prs/REVIEWERS.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/reviewing-prs/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/writing-graph-queries/EXAMPLES.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.claude/skills/writing-graph-queries/SKILL.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.github/dependabot.yml +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.github/workflows/ci.yml +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.github/workflows/publish.yml +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/.gitignore +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/LICENSE +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/dbt.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/graph.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/indexer.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/mcp-tools.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/sql-parser.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/sqlmesh.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/api/types.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/architecture/overview.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/getting-started/configuration.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/getting-started/installation.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/guide/dbt.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/guide/sqlmesh.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/docs/index.md +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/core/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/languages/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/languages/dbt.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/languages/sql.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/languages/sqlmesh.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/languages/utils.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/src/sqlprism/types.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/__init__.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_cli.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_config.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_federation.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_graph_tools.py +0 -0
- {sqlprism-1.1.0 → sqlprism-1.2.0}/tests/test_languages.py +0 -0
|
@@ -37,10 +37,10 @@ jobs:
|
|
|
37
37
|
run: uv run mkdocs build
|
|
38
38
|
|
|
39
39
|
- name: Upload Pages artifact
|
|
40
|
-
uses: actions/upload-pages-artifact@
|
|
40
|
+
uses: actions/upload-pages-artifact@v4
|
|
41
41
|
with:
|
|
42
42
|
path: site/
|
|
43
43
|
|
|
44
44
|
- name: Deploy to GitHub Pages
|
|
45
45
|
id: deployment
|
|
46
|
-
uses: actions/deploy-pages@
|
|
46
|
+
uses: actions/deploy-pages@v5
|
|
@@ -18,17 +18,20 @@ src/sqlprism/
|
|
|
18
18
|
core/
|
|
19
19
|
graph.py — DuckDB storage layer (MVCC, repo_type tracking)
|
|
20
20
|
indexer.py — Orchestrates parsing + indexing; file-level reindex with repo-type dispatch
|
|
21
|
-
mcp_tools.py — MCP server tools (
|
|
21
|
+
mcp_tools.py — MCP server tools (24 tools, non-blocking reindex, per-repo debounce)
|
|
22
|
+
conventions.py — Convention inference engine (layers, naming, references, tags, overrides)
|
|
22
23
|
languages/
|
|
23
24
|
sql.py — sqlglot-based SQL parser
|
|
24
25
|
dbt.py — dbt renderer (full project + selective render_models)
|
|
25
26
|
sqlmesh.py — sqlmesh renderer (full project + selective render_models)
|
|
26
27
|
utils.py — Shared venv/env utilities
|
|
27
28
|
types.py — Data classes (ParseResult, NodeResult, etc.)
|
|
28
|
-
cli.py — Click CLI (serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, status, init)
|
|
29
|
+
cli.py — Click CLI (serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init)
|
|
29
30
|
tests/
|
|
30
|
-
test_indexer.py
|
|
31
|
-
test_renderers.py
|
|
31
|
+
test_indexer.py — Indexer + integration tests
|
|
32
|
+
test_renderers.py — dbt/sqlmesh renderer tests
|
|
33
|
+
test_conventions.py — Convention engine + placement + tags tests
|
|
34
|
+
test_sql_parser.py — SQL parser, lineage, and dialect tests
|
|
32
35
|
```
|
|
33
36
|
|
|
34
37
|
## Conventions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlprism
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: SQL codebase indexer with column-level lineage, impact analysis, and MCP server support
|
|
5
5
|
Project-URL: Homepage, https://github.com/darkcofy/sqlprism
|
|
6
6
|
Project-URL: Documentation, https://darkcofy.github.io/sqlprism/
|
|
@@ -23,7 +23,7 @@ Requires-Dist: duckdb>=1.5.0
|
|
|
23
23
|
Requires-Dist: mcp[cli]>=1.0.0
|
|
24
24
|
Requires-Dist: pydantic>=2.0.0
|
|
25
25
|
Requires-Dist: pyyaml>=6.0
|
|
26
|
-
Requires-Dist: sqlglot<30
|
|
26
|
+
Requires-Dist: sqlglot[c]<31,>=30.0.0
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
28
|
|
|
29
29
|
# SQLPrism
|
|
@@ -225,6 +225,9 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
225
225
|
| `sqlprism reindex-dbt` | Compile and index a [dbt](https://www.getdbt.com/) project. |
|
|
226
226
|
| `sqlprism reindex-sqlmesh` | Render and index a [SQLMesh](https://sqlmesh.com/) project. |
|
|
227
227
|
| `sqlprism serve` | Start the MCP server (stdio or HTTP). |
|
|
228
|
+
| `sqlprism conventions init` | Generate `sqlprism.conventions.yml` from inferred conventions. |
|
|
229
|
+
| `sqlprism conventions refresh` | Re-run convention inference after reindex. |
|
|
230
|
+
| `sqlprism conventions diff` | Show what changed since last `--init`. |
|
|
228
231
|
| `sqlprism status` | Show index status. |
|
|
229
232
|
| `sqlprism query search` | Find entities by name pattern. |
|
|
230
233
|
| `sqlprism query references` | Find inbound/outbound dependencies. |
|
|
@@ -236,7 +239,7 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
236
239
|
|
|
237
240
|
Full reference: [MCP tools guide](https://darkcofy.github.io/sqlprism/guide/mcp-tools/)
|
|
238
241
|
|
|
239
|
-
When running as an MCP server (`sqlprism serve`),
|
|
242
|
+
When running as an MCP server (`sqlprism serve`), 24 tools are exposed:
|
|
240
243
|
|
|
241
244
|
| Tool | Description |
|
|
242
245
|
|---|---|
|
|
@@ -258,6 +261,11 @@ When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
|
|
|
258
261
|
| `reindex_files` | Fast on-save reindex with per-repo debounce. |
|
|
259
262
|
| `reindex_dbt` | Background dbt compile + index. |
|
|
260
263
|
| `reindex_sqlmesh` | Background SQLMesh render + index. |
|
|
264
|
+
| `get_conventions` | Inferred project conventions — naming, references, columns. |
|
|
265
|
+
| `find_similar_models` | Find existing models similar to what you're building. |
|
|
266
|
+
| `suggest_placement` | Recommend where to place a new model based on references. |
|
|
267
|
+
| `search_by_tag` | Find models by semantic tag (business domain concept). |
|
|
268
|
+
| `list_tags` | List all semantic tags with model counts and confidence. |
|
|
261
269
|
| `index_status` | Index stats, cross-repo edges, and name collisions. |
|
|
262
270
|
|
|
263
271
|
## Architecture
|
|
@@ -274,8 +282,9 @@ src/sqlprism/
|
|
|
274
282
|
core/
|
|
275
283
|
graph.py <- DuckDB storage layer (MVCC), queries, snippets, repo_type tracking
|
|
276
284
|
indexer.py <- Orchestrator: scan -> checksum -> parse -> store; file-level reindex with repo-type dispatch
|
|
277
|
-
mcp_tools.py <- FastMCP tool definitions (
|
|
278
|
-
|
|
285
|
+
mcp_tools.py <- FastMCP tool definitions (24 tools, non-blocking reindex, per-repo debounce)
|
|
286
|
+
conventions.py <- Convention inference engine: layers, naming, references, tags, overrides
|
|
287
|
+
cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init
|
|
279
288
|
```
|
|
280
289
|
|
|
281
290
|
The SQL parser extracts:
|
|
@@ -294,7 +303,7 @@ SQLPrism optionally integrates with [DuckPGQ](https://github.com/cwida/duckpgq)
|
|
|
294
303
|
|
|
295
304
|
```bash
|
|
296
305
|
uv sync
|
|
297
|
-
uv run pytest # run tests (
|
|
306
|
+
uv run pytest # run tests (510+ tests)
|
|
298
307
|
uv run pytest --cov=sqlprism # run with coverage report
|
|
299
308
|
uv run pytest --cov=sqlprism --cov-report=html:coverage_html # HTML report
|
|
300
309
|
```
|
|
@@ -197,6 +197,9 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
197
197
|
| `sqlprism reindex-dbt` | Compile and index a [dbt](https://www.getdbt.com/) project. |
|
|
198
198
|
| `sqlprism reindex-sqlmesh` | Render and index a [SQLMesh](https://sqlmesh.com/) project. |
|
|
199
199
|
| `sqlprism serve` | Start the MCP server (stdio or HTTP). |
|
|
200
|
+
| `sqlprism conventions init` | Generate `sqlprism.conventions.yml` from inferred conventions. |
|
|
201
|
+
| `sqlprism conventions refresh` | Re-run convention inference after reindex. |
|
|
202
|
+
| `sqlprism conventions diff` | Show what changed since last `--init`. |
|
|
200
203
|
| `sqlprism status` | Show index status. |
|
|
201
204
|
| `sqlprism query search` | Find entities by name pattern. |
|
|
202
205
|
| `sqlprism query references` | Find inbound/outbound dependencies. |
|
|
@@ -208,7 +211,7 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
|
|
|
208
211
|
|
|
209
212
|
Full reference: [MCP tools guide](https://darkcofy.github.io/sqlprism/guide/mcp-tools/)
|
|
210
213
|
|
|
211
|
-
When running as an MCP server (`sqlprism serve`),
|
|
214
|
+
When running as an MCP server (`sqlprism serve`), 24 tools are exposed:
|
|
212
215
|
|
|
213
216
|
| Tool | Description |
|
|
214
217
|
|---|---|
|
|
@@ -230,6 +233,11 @@ When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
|
|
|
230
233
|
| `reindex_files` | Fast on-save reindex with per-repo debounce. |
|
|
231
234
|
| `reindex_dbt` | Background dbt compile + index. |
|
|
232
235
|
| `reindex_sqlmesh` | Background SQLMesh render + index. |
|
|
236
|
+
| `get_conventions` | Inferred project conventions — naming, references, columns. |
|
|
237
|
+
| `find_similar_models` | Find existing models similar to what you're building. |
|
|
238
|
+
| `suggest_placement` | Recommend where to place a new model based on references. |
|
|
239
|
+
| `search_by_tag` | Find models by semantic tag (business domain concept). |
|
|
240
|
+
| `list_tags` | List all semantic tags with model counts and confidence. |
|
|
233
241
|
| `index_status` | Index stats, cross-repo edges, and name collisions. |
|
|
234
242
|
|
|
235
243
|
## Architecture
|
|
@@ -246,8 +254,9 @@ src/sqlprism/
|
|
|
246
254
|
core/
|
|
247
255
|
graph.py <- DuckDB storage layer (MVCC), queries, snippets, repo_type tracking
|
|
248
256
|
indexer.py <- Orchestrator: scan -> checksum -> parse -> store; file-level reindex with repo-type dispatch
|
|
249
|
-
mcp_tools.py <- FastMCP tool definitions (
|
|
250
|
-
|
|
257
|
+
mcp_tools.py <- FastMCP tool definitions (24 tools, non-blocking reindex, per-repo debounce)
|
|
258
|
+
conventions.py <- Convention inference engine: layers, naming, references, tags, overrides
|
|
259
|
+
cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init
|
|
251
260
|
```
|
|
252
261
|
|
|
253
262
|
The SQL parser extracts:
|
|
@@ -266,7 +275,7 @@ SQLPrism optionally integrates with [DuckPGQ](https://github.com/cwida/duckpgq)
|
|
|
266
275
|
|
|
267
276
|
```bash
|
|
268
277
|
uv sync
|
|
269
|
-
uv run pytest # run tests (
|
|
278
|
+
uv run pytest # run tests (510+ tests)
|
|
270
279
|
uv run pytest --cov=sqlprism # run with coverage report
|
|
271
280
|
uv run pytest --cov=sqlprism --cov-report=html:coverage_html # HTML report
|
|
272
281
|
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Convention Engine
|
|
2
|
+
|
|
3
|
+
The convention inference engine detects project patterns from the indexed SQL graph — naming conventions, layer references, required columns, column style, and semantic tags.
|
|
4
|
+
|
|
5
|
+
## ConventionEngine
|
|
6
|
+
|
|
7
|
+
::: sqlprism.core.conventions.ConventionEngine
|
|
8
|
+
|
|
9
|
+
## Data Classes
|
|
10
|
+
|
|
11
|
+
::: sqlprism.core.conventions.Layer
|
|
12
|
+
|
|
13
|
+
::: sqlprism.core.conventions.NamingPattern
|
|
14
|
+
|
|
15
|
+
::: sqlprism.core.conventions.ReferenceRule
|
|
16
|
+
|
|
17
|
+
::: sqlprism.core.conventions.RequiredColumn
|
|
18
|
+
|
|
19
|
+
::: sqlprism.core.conventions.ColumnStyle
|
|
20
|
+
|
|
21
|
+
::: sqlprism.core.conventions.TagAssignment
|
|
22
|
+
|
|
23
|
+
::: sqlprism.core.conventions.Cluster
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
The index is stored in a single DuckDB file (default: `~/.sqlprism/graph.duckdb`). Current schema version: **1.0**.
|
|
4
4
|
|
|
5
|
-
## Tables (
|
|
5
|
+
## Tables (9)
|
|
6
6
|
|
|
7
7
|
### `repos`
|
|
8
8
|
|
|
@@ -99,6 +99,51 @@ End-to-end column lineage chains.
|
|
|
99
99
|
|
|
100
100
|
A lineage chain traces one output column back to its source. Multiple chains (different `chain_index` values) exist when a column has multiple source paths (e.g. COALESCE of two columns).
|
|
101
101
|
|
|
102
|
+
### `columns`
|
|
103
|
+
|
|
104
|
+
Column definitions extracted from DDL or schema files.
|
|
105
|
+
|
|
106
|
+
| Column | Type | Description |
|
|
107
|
+
|---|---|---|
|
|
108
|
+
| `column_id` | INTEGER PK | Auto-increment ID. |
|
|
109
|
+
| `node_id` | INTEGER | FK to `nodes`. |
|
|
110
|
+
| `column_name` | TEXT | Column name. |
|
|
111
|
+
| `data_type` | TEXT or NULL | Column data type (e.g. `INTEGER`, `TEXT`). |
|
|
112
|
+
| `position` | INTEGER | Column ordinal position in the table. |
|
|
113
|
+
| `source` | TEXT | How the column was discovered: `definition`, `usage`, `lineage`. |
|
|
114
|
+
|
|
115
|
+
### `conventions`
|
|
116
|
+
|
|
117
|
+
Inferred or overridden project conventions per layer.
|
|
118
|
+
|
|
119
|
+
| Column | Type | Description |
|
|
120
|
+
|---|---|---|
|
|
121
|
+
| `convention_id` | INTEGER PK | Auto-increment ID. |
|
|
122
|
+
| `repo_id` | INTEGER | FK to `repos`. |
|
|
123
|
+
| `layer` | TEXT | Layer name (e.g. `staging`, `marts`). |
|
|
124
|
+
| `convention_type` | TEXT | `naming`, `references`, `required_columns`, or `column_style`. |
|
|
125
|
+
| `payload` | JSON | Convention data (pattern, allowed_targets, etc.). |
|
|
126
|
+
| `confidence` | FLOAT | Confidence score (0.0-1.0). |
|
|
127
|
+
| `source` | TEXT | `inferred` or `override`. |
|
|
128
|
+
| `model_count` | INTEGER | Number of models in this layer when inferred. |
|
|
129
|
+
|
|
130
|
+
Unique constraint: `(repo_id, layer, convention_type)`.
|
|
131
|
+
|
|
132
|
+
### `semantic_tags`
|
|
133
|
+
|
|
134
|
+
Semantic tags assigned to models by clustering or explicit override.
|
|
135
|
+
|
|
136
|
+
| Column | Type | Description |
|
|
137
|
+
|---|---|---|
|
|
138
|
+
| `tag_id` | INTEGER PK | Auto-increment ID. |
|
|
139
|
+
| `repo_id` | INTEGER | FK to `repos`. |
|
|
140
|
+
| `tag_name` | TEXT | Tag name (e.g. `customer`, `order`, `revenue`). |
|
|
141
|
+
| `node_id` | INTEGER | FK to `nodes`. |
|
|
142
|
+
| `confidence` | FLOAT | Confidence score (0.0-1.0). |
|
|
143
|
+
| `source` | TEXT | `inferred`, `anchor`, or `explicit`. |
|
|
144
|
+
|
|
145
|
+
Unique constraint: `(repo_id, tag_name, node_id)`.
|
|
146
|
+
|
|
102
147
|
## Indexes
|
|
103
148
|
|
|
104
149
|
```sql
|
|
@@ -117,6 +162,11 @@ CREATE INDEX idx_col_usage_type ON column_usage(usage_type);
|
|
|
117
162
|
CREATE INDEX idx_lineage_output ON column_lineage(output_node, output_column);
|
|
118
163
|
CREATE INDEX idx_lineage_hop ON column_lineage(hop_table, hop_column);
|
|
119
164
|
CREATE INDEX idx_lineage_file ON column_lineage(file_id);
|
|
165
|
+
CREATE INDEX idx_conventions_repo ON conventions(repo_id);
|
|
166
|
+
CREATE INDEX idx_conventions_type ON conventions(convention_type);
|
|
167
|
+
CREATE INDEX idx_tags_name ON semantic_tags(tag_name);
|
|
168
|
+
CREATE INDEX idx_tags_node ON semantic_tags(node_id);
|
|
169
|
+
CREATE INDEX idx_tags_repo ON semantic_tags(repo_id);
|
|
120
170
|
```
|
|
121
171
|
|
|
122
172
|
## Entity Relationship
|
|
@@ -130,4 +180,7 @@ repos 1──* files 1──* nodes
|
|
|
130
180
|
|
|
131
181
|
files 1──* column_usage *──1 nodes
|
|
132
182
|
files 1──* column_lineage
|
|
183
|
+
nodes 1──* columns
|
|
184
|
+
repos 1──* conventions
|
|
185
|
+
repos 1──* semantic_tags *──1 nodes
|
|
133
186
|
```
|
|
@@ -34,7 +34,7 @@ sqlprism status [--config PATH] [--db PATH]
|
|
|
34
34
|
|
|
35
35
|
### `sqlprism serve`
|
|
36
36
|
|
|
37
|
-
Starts the MCP server, exposing all
|
|
37
|
+
Starts the MCP server, exposing all 24 tools to any MCP client.
|
|
38
38
|
|
|
39
39
|
```bash
|
|
40
40
|
sqlprism serve [--config PATH] [--db PATH] [--transport stdio|streamable-http] [--port 8000]
|
|
@@ -130,6 +130,36 @@ sqlprism reindex-dbt \
|
|
|
130
130
|
| `--profiles-dir` | No | Path to directory containing `profiles.yml`. Defaults to the project directory. |
|
|
131
131
|
| `--dbt-command` | No | Base command to invoke dbt. `compile` is appended automatically. Default: `uv run dbt`. Use `dbt` if globally installed, or `uvx --with dbt-starrocks dbt` for ephemeral install. |
|
|
132
132
|
|
|
133
|
+
## Convention Commands
|
|
134
|
+
|
|
135
|
+
### `sqlprism conventions init`
|
|
136
|
+
|
|
137
|
+
Generate a `sqlprism.conventions.yml` file from inferred conventions. Includes confidence scores as comments.
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
sqlprism conventions init [--config PATH] [--db PATH] [--force]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
| Parameter | Description |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `--force` | Overwrite existing conventions file. Without this flag, init refuses to overwrite. |
|
|
146
|
+
|
|
147
|
+
### `sqlprism conventions refresh`
|
|
148
|
+
|
|
149
|
+
Re-run convention inference after reindex. Preserves explicit overrides (source: 'override').
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
sqlprism conventions refresh [--config PATH] [--db PATH]
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### `sqlprism conventions diff`
|
|
156
|
+
|
|
157
|
+
Show what conventions changed since the last `init`. Compares the YAML file against current inferred conventions.
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
sqlprism conventions diff [--config PATH] [--db PATH]
|
|
161
|
+
```
|
|
162
|
+
|
|
133
163
|
## Query Commands
|
|
134
164
|
|
|
135
165
|
All query commands output JSON to stdout. They share common parameters:
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Conventions
|
|
2
|
+
|
|
3
|
+
The conventions engine automatically infers project patterns from the indexed SQL graph -- naming conventions, layer references, required columns, column naming style, and semantic tags. It runs after every reindex, giving AI agents (and developers) a machine-readable description of how the project is structured.
|
|
4
|
+
|
|
5
|
+
## How It Works
|
|
6
|
+
|
|
7
|
+
- Convention inference runs automatically after reindex.
|
|
8
|
+
- Detects layers from directory structure (`models/staging/`, `models/marts/`, etc.).
|
|
9
|
+
- Infers naming patterns per layer (e.g. `stg_{source}_{entity}`).
|
|
10
|
+
- Infers reference rules (which layers reference which).
|
|
11
|
+
- Detects required columns (columns appearing in >70% of models per layer).
|
|
12
|
+
- Detects column naming style (`snake_case`, `camelCase`, etc.).
|
|
13
|
+
- Assigns semantic tags via structural clustering (no ML, deterministic).
|
|
14
|
+
|
|
15
|
+
## Confidence Scores
|
|
16
|
+
|
|
17
|
+
All conventions have a confidence score (0.0--1.0):
|
|
18
|
+
|
|
19
|
+
| Range | Meaning |
|
|
20
|
+
|---|---|
|
|
21
|
+
| **>0.9** | High confidence. Follow this convention. |
|
|
22
|
+
| **0.7--0.9** | Moderate. Likely correct but worth verifying. |
|
|
23
|
+
| **<0.7** | Low confidence. Consider an explicit override. |
|
|
24
|
+
|
|
25
|
+
## Overrides
|
|
26
|
+
|
|
27
|
+
You can override inferred conventions with a YAML file:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
sqlprism conventions init # generates sqlprism.conventions.yml
|
|
31
|
+
# edit the file to add explicit overrides
|
|
32
|
+
sqlprism conventions refresh # re-runs inference, preserving overrides
|
|
33
|
+
sqlprism conventions diff # shows changes since last init
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Overrides take precedence over inferred conventions (`source: 'override'` vs `source: 'inferred'`).
|
|
37
|
+
|
|
38
|
+
## MCP Tools
|
|
39
|
+
|
|
40
|
+
Five MCP tools expose conventions to AI agents:
|
|
41
|
+
|
|
42
|
+
| Tool | Description |
|
|
43
|
+
|---|---|
|
|
44
|
+
| `get_conventions` | Naming rules, reference rules, required columns per layer. |
|
|
45
|
+
| `find_similar_models` | Find existing models similar to what you're building. |
|
|
46
|
+
| `suggest_placement` | Recommend where to place a new model based on references. |
|
|
47
|
+
| `search_by_tag` | Find models by semantic tag (business domain concept). |
|
|
48
|
+
| `list_tags` | List all semantic tags with model counts and confidence. |
|
|
49
|
+
|
|
50
|
+
See [MCP Tools](mcp-tools.md) for parameter details.
|
|
51
|
+
|
|
52
|
+
## Semantic Tags
|
|
53
|
+
|
|
54
|
+
Tags are assigned by structural clustering -- models that share many upstream references get grouped and auto-labeled based on common name tokens. Tags represent business domain concepts (e.g. "customer", "order", "revenue").
|
|
55
|
+
|
|
56
|
+
Tag sources:
|
|
57
|
+
|
|
58
|
+
| Source | Description |
|
|
59
|
+
|---|---|
|
|
60
|
+
| **inferred** | Automatically assigned via clustering. |
|
|
61
|
+
| **anchor** | Manually specified in the YAML override as cluster anchors. |
|
|
62
|
+
| **explicit** | Manually assigned to specific models in the YAML override. |
|
|
63
|
+
|
|
64
|
+
## Example Workflow
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# 1. Index your project
|
|
68
|
+
sqlprism reindex
|
|
69
|
+
|
|
70
|
+
# 2. Generate conventions file
|
|
71
|
+
sqlprism conventions init
|
|
72
|
+
# -> creates sqlprism.conventions.yml with inferred conventions
|
|
73
|
+
|
|
74
|
+
# 3. Review and adjust
|
|
75
|
+
# Edit the YAML to fix any conventions the engine got wrong
|
|
76
|
+
|
|
77
|
+
# 4. Re-run inference (preserves your overrides)
|
|
78
|
+
sqlprism conventions refresh
|
|
79
|
+
|
|
80
|
+
# 5. Check what changed
|
|
81
|
+
sqlprism conventions diff
|
|
82
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MCP Tools
|
|
2
2
|
|
|
3
|
-
When running as an MCP server (`sqlprism serve`),
|
|
3
|
+
When running as an MCP server (`sqlprism serve`), 24 tools are exposed. Any MCP client (Claude Code, Claude Desktop, Cursor, Continue.dev) can call these.
|
|
4
4
|
|
|
5
5
|
## Query Tools
|
|
6
6
|
|
|
@@ -237,6 +237,57 @@ Analyze the downstream impact of proposed column changes BEFORE modifying code.
|
|
|
237
237
|
| `changes` | list | Yes | | List of column changes (remove_column, rename_column, add_column). |
|
|
238
238
|
| `repo` | string | No | | Filter by repo name. |
|
|
239
239
|
|
|
240
|
+
## Convention Tools
|
|
241
|
+
|
|
242
|
+
### `get_conventions`
|
|
243
|
+
|
|
244
|
+
Get naming conventions, reference rules, and required columns for a layer. Returns inferred conventions with confidence scores.
|
|
245
|
+
|
|
246
|
+
| Parameter | Type | Required | Default | Description |
|
|
247
|
+
|---|---|---|---|---|
|
|
248
|
+
| `layer` | string | No | | Layer name (e.g. 'staging', 'marts'). Omit for all layers. |
|
|
249
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
250
|
+
|
|
251
|
+
### `search_by_tag`
|
|
252
|
+
|
|
253
|
+
Find models tagged with a business domain concept. Returns models ranked by confidence.
|
|
254
|
+
|
|
255
|
+
| Parameter | Type | Required | Default | Description |
|
|
256
|
+
|---|---|---|---|---|
|
|
257
|
+
| `tag` | string | Yes | | Tag name to search for (e.g. 'customer', 'order'). |
|
|
258
|
+
| `min_confidence` | float | No | | Minimum confidence threshold (0.0-1.0). |
|
|
259
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
260
|
+
|
|
261
|
+
### `list_tags`
|
|
262
|
+
|
|
263
|
+
List all semantic tags with model counts and average confidence. Provides the project's business domain vocabulary.
|
|
264
|
+
|
|
265
|
+
| Parameter | Type | Required | Default | Description |
|
|
266
|
+
|---|---|---|---|---|
|
|
267
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
268
|
+
|
|
269
|
+
### `find_similar_models`
|
|
270
|
+
|
|
271
|
+
Find existing models similar to what you're building. Compares reference overlap, column overlap, and layer placement.
|
|
272
|
+
|
|
273
|
+
| Parameter | Type | Required | Default | Description |
|
|
274
|
+
|---|---|---|---|---|
|
|
275
|
+
| `references` | list[string] | No | | Tables this model will reference. |
|
|
276
|
+
| `output_columns` | list[string] | No | | Columns this model will output. |
|
|
277
|
+
| `model` | string | No | | Existing model name to find similar models to. |
|
|
278
|
+
| `limit` | int | No | 5 | Max results (1-50). |
|
|
279
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
280
|
+
|
|
281
|
+
### `suggest_placement`
|
|
282
|
+
|
|
283
|
+
Recommend where to place a new model based on its references. Uses inferred layer flow rules and naming conventions.
|
|
284
|
+
|
|
285
|
+
| Parameter | Type | Required | Default | Description |
|
|
286
|
+
|---|---|---|---|---|
|
|
287
|
+
| `references` | list[string] | Yes | | Tables this new model will reference. |
|
|
288
|
+
| `name` | string | No | | Proposed model name (for naming validation). |
|
|
289
|
+
| `repo` | string | No | | Filter by repo name. |
|
|
290
|
+
|
|
240
291
|
## DuckPGQ Tools
|
|
241
292
|
|
|
242
293
|
The following tools require the [DuckPGQ](https://github.com/cwida/duckpgq) extension: `find_path`, `find_critical_models`, `find_subgraphs`. The extension is installed automatically on first use. Tools that don't require DuckPGQ (`detect_cycles`, `find_bottlenecks`, `check_impact`) use plain SQL and work everywhere.
|
|
@@ -59,6 +59,7 @@ nav:
|
|
|
59
59
|
- User Guide:
|
|
60
60
|
- CLI Reference: guide/cli.md
|
|
61
61
|
- MCP Tools: guide/mcp-tools.md
|
|
62
|
+
- Conventions: guide/conventions.md
|
|
62
63
|
- SQLMesh Integration: guide/sqlmesh.md
|
|
63
64
|
- dbt Integration: guide/dbt.md
|
|
64
65
|
- Architecture:
|
|
@@ -69,6 +70,7 @@ nav:
|
|
|
69
70
|
- GraphDB (Storage): api/graph.md
|
|
70
71
|
- Indexer (Orchestrator): api/indexer.md
|
|
71
72
|
- SQL Parser: api/sql-parser.md
|
|
73
|
+
- Convention Engine: api/conventions.md
|
|
72
74
|
- SQLMesh Renderer: api/sqlmesh.md
|
|
73
75
|
- dbt Renderer: api/dbt.md
|
|
74
76
|
- MCP Server: api/mcp-tools.md
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlprism"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
description = "SQL codebase indexer with column-level lineage, impact analysis, and MCP server support"
|
|
9
9
|
license = "Apache-2.0"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -26,7 +26,7 @@ classifiers = [
|
|
|
26
26
|
dependencies = [
|
|
27
27
|
"mcp[cli]>=1.0.0",
|
|
28
28
|
"duckdb>=1.5.0",
|
|
29
|
-
"sqlglot>=
|
|
29
|
+
"sqlglot[c]>=30.0.0,<31",
|
|
30
30
|
"pydantic>=2.0.0",
|
|
31
31
|
"click>=8.0.0",
|
|
32
32
|
"pyyaml>=6.0",
|
|
@@ -241,7 +241,7 @@ def reindex_file(paths, config_path, db_path):
|
|
|
241
241
|
for name, cfg in repo_configs.items():
|
|
242
242
|
graph.upsert_repo(name, cfg["path"], repo_type=cfg["repo_type"])
|
|
243
243
|
|
|
244
|
-
resolved_paths = [str(Path(p).resolve()) for p in paths]
|
|
244
|
+
resolved_paths: list[str | Path] = [str(Path(p).resolve()) for p in paths]
|
|
245
245
|
stats = indexer.reindex_files(paths=resolved_paths, repo_configs=repo_configs)
|
|
246
246
|
|
|
247
247
|
# Print summary
|
|
@@ -614,6 +614,148 @@ def status(config_path: str, db_path: str | None):
|
|
|
614
614
|
click.echo(json.dumps(info, indent=2, default=str))
|
|
615
615
|
|
|
616
616
|
|
|
617
|
+
@cli.group()
|
|
618
|
+
def conventions():
|
|
619
|
+
"""Manage convention inference and overrides."""
|
|
620
|
+
pass
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
@conventions.command("init")
|
|
624
|
+
@click.option("--config", "config_path", type=click.Path(), default=None)
|
|
625
|
+
@click.option("--db", "db_path", type=click.Path(), default=None)
|
|
626
|
+
@click.option("--force", is_flag=True, help="Overwrite existing conventions file")
|
|
627
|
+
def conventions_init(config_path: str, db_path: str | None, force: bool):
|
|
628
|
+
"""Generate a sqlprism.conventions.yml with inferred conventions.
|
|
629
|
+
|
|
630
|
+
Runs inference and writes a YAML file with confidence scores as comments.
|
|
631
|
+
Review and adjust the file, then re-run reindex to apply overrides.
|
|
632
|
+
"""
|
|
633
|
+
from sqlprism.core.conventions import ConventionEngine
|
|
634
|
+
from sqlprism.core.graph import GraphDB
|
|
635
|
+
|
|
636
|
+
config = _try_load_config(config_path)
|
|
637
|
+
effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
|
|
638
|
+
|
|
639
|
+
if not Path(effective_db_path).exists():
|
|
640
|
+
click.echo("No index found. Run 'sqlprism reindex' first.")
|
|
641
|
+
sys.exit(1)
|
|
642
|
+
|
|
643
|
+
output_path = Path("sqlprism.conventions.yml")
|
|
644
|
+
if output_path.exists() and not force:
|
|
645
|
+
click.echo(
|
|
646
|
+
f"{output_path} already exists. Use --force to overwrite."
|
|
647
|
+
)
|
|
648
|
+
sys.exit(1)
|
|
649
|
+
|
|
650
|
+
graph = GraphDB(effective_db_path)
|
|
651
|
+
try:
|
|
652
|
+
# Find all repos and run inference for each
|
|
653
|
+
repos = graph._execute_read("SELECT repo_id FROM repos").fetchall()
|
|
654
|
+
if not repos:
|
|
655
|
+
click.echo("No repos indexed. Run 'sqlprism reindex' first.")
|
|
656
|
+
sys.exit(1)
|
|
657
|
+
|
|
658
|
+
repo_id = repos[0][0]
|
|
659
|
+
if len(repos) > 1:
|
|
660
|
+
click.echo(
|
|
661
|
+
f"Warning: {len(repos)} repos found; using first repo. "
|
|
662
|
+
"Use 'conventions refresh' to process all repos.",
|
|
663
|
+
err=True,
|
|
664
|
+
)
|
|
665
|
+
engine = ConventionEngine(graph, repo_id)
|
|
666
|
+
engine.run_inference()
|
|
667
|
+
|
|
668
|
+
yaml_content = engine.generate_yaml()
|
|
669
|
+
output_path.write_text(yaml_content)
|
|
670
|
+
click.echo(f"Wrote {output_path}")
|
|
671
|
+
finally:
|
|
672
|
+
graph.close()
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
@conventions.command("refresh")
|
|
676
|
+
@click.option("--config", "config_path", type=click.Path(), default=None)
|
|
677
|
+
@click.option("--db", "db_path", type=click.Path(), default=None)
|
|
678
|
+
def conventions_refresh(config_path: str, db_path: str | None):
|
|
679
|
+
"""Re-run convention inference without writing YAML.
|
|
680
|
+
|
|
681
|
+
Updates the conventions and semantic_tags tables in the database.
|
|
682
|
+
Useful after reindex to see how conventions changed.
|
|
683
|
+
"""
|
|
684
|
+
from sqlprism.core.conventions import ConventionEngine
|
|
685
|
+
from sqlprism.core.graph import GraphDB
|
|
686
|
+
|
|
687
|
+
config = _try_load_config(config_path)
|
|
688
|
+
effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
|
|
689
|
+
|
|
690
|
+
if not Path(effective_db_path).exists():
|
|
691
|
+
click.echo("No index found. Run 'sqlprism reindex' first.")
|
|
692
|
+
sys.exit(1)
|
|
693
|
+
|
|
694
|
+
graph = GraphDB(effective_db_path)
|
|
695
|
+
try:
|
|
696
|
+
repos = graph._execute_read("SELECT repo_id FROM repos").fetchall()
|
|
697
|
+
if not repos:
|
|
698
|
+
click.echo("No repos indexed.")
|
|
699
|
+
sys.exit(1)
|
|
700
|
+
|
|
701
|
+
for (repo_id,) in repos:
|
|
702
|
+
engine = ConventionEngine(graph, repo_id)
|
|
703
|
+
result = engine.run_inference()
|
|
704
|
+
click.echo(
|
|
705
|
+
f"Repo {repo_id}: {result['layers_detected']} layers, "
|
|
706
|
+
f"{result['conventions_stored']} conventions stored"
|
|
707
|
+
)
|
|
708
|
+
finally:
|
|
709
|
+
graph.close()
|
|
710
|
+
|
|
711
|
+
click.echo("Done.")
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
@conventions.command("diff")
|
|
715
|
+
@click.option("--config", "config_path", type=click.Path(), default=None)
|
|
716
|
+
@click.option("--db", "db_path", type=click.Path(), default=None)
|
|
717
|
+
@click.option(
|
|
718
|
+
"--file",
|
|
719
|
+
"yaml_file",
|
|
720
|
+
type=click.Path(),
|
|
721
|
+
default="sqlprism.conventions.yml",
|
|
722
|
+
help="Conventions YAML file to compare against",
|
|
723
|
+
)
|
|
724
|
+
def conventions_diff(config_path: str, db_path: str | None, yaml_file: str):
|
|
725
|
+
"""Show what changed since last --init.
|
|
726
|
+
|
|
727
|
+
Compares current conventions in the database against the YAML file.
|
|
728
|
+
"""
|
|
729
|
+
from sqlprism.core.conventions import ConventionEngine
|
|
730
|
+
from sqlprism.core.graph import GraphDB
|
|
731
|
+
|
|
732
|
+
config = _try_load_config(config_path)
|
|
733
|
+
effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
|
|
734
|
+
|
|
735
|
+
if not Path(effective_db_path).exists():
|
|
736
|
+
click.echo("No index found. Run 'sqlprism reindex' first.")
|
|
737
|
+
sys.exit(1)
|
|
738
|
+
|
|
739
|
+
graph = GraphDB(effective_db_path)
|
|
740
|
+
try:
|
|
741
|
+
repos = graph._execute_read("SELECT repo_id FROM repos").fetchall()
|
|
742
|
+
if not repos:
|
|
743
|
+
click.echo("No repos indexed.")
|
|
744
|
+
sys.exit(1)
|
|
745
|
+
|
|
746
|
+
repo_id = repos[0][0]
|
|
747
|
+
if len(repos) > 1:
|
|
748
|
+
click.echo(
|
|
749
|
+
f"Warning: {len(repos)} repos found; comparing first repo only.",
|
|
750
|
+
err=True,
|
|
751
|
+
)
|
|
752
|
+
engine = ConventionEngine(graph, repo_id)
|
|
753
|
+
diff_output = engine.get_diff(yaml_file)
|
|
754
|
+
click.echo(diff_output)
|
|
755
|
+
finally:
|
|
756
|
+
graph.close()
|
|
757
|
+
|
|
758
|
+
|
|
617
759
|
@cli.command("init")
|
|
618
760
|
@click.option(
|
|
619
761
|
"--format",
|
|
@@ -705,6 +847,18 @@ def _build_repo_configs(config: dict) -> dict:
|
|
|
705
847
|
return result
|
|
706
848
|
|
|
707
849
|
|
|
850
|
+
def _try_load_config(path: str | None) -> dict:
|
|
851
|
+
"""Try to load config, return empty dict if not found.
|
|
852
|
+
|
|
853
|
+
Unlike ``_cli_load_config``, does not raise on missing config.
|
|
854
|
+
Used by commands that work with just ``--db`` and no config.
|
|
855
|
+
"""
|
|
856
|
+
try:
|
|
857
|
+
return load_config(path)
|
|
858
|
+
except FileNotFoundError:
|
|
859
|
+
return {}
|
|
860
|
+
|
|
861
|
+
|
|
708
862
|
def _cli_load_config(path: str | None) -> dict:
|
|
709
863
|
"""CLI wrapper: converts FileNotFoundError to a friendly click error."""
|
|
710
864
|
try:
|