sqlprism 1.1.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/workflows/ci.yml +1 -1
  2. {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/workflows/docs.yml +2 -2
  3. {sqlprism-1.1.0 → sqlprism-1.2.1}/.gitignore +1 -0
  4. {sqlprism-1.1.0 → sqlprism-1.2.1}/CLAUDE.md +7 -4
  5. {sqlprism-1.1.0 → sqlprism-1.2.1}/PKG-INFO +17 -6
  6. {sqlprism-1.1.0 → sqlprism-1.2.1}/README.md +15 -4
  7. sqlprism-1.2.1/docs/api/conventions.md +23 -0
  8. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/architecture/schema.md +54 -1
  9. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/cli.md +31 -1
  10. sqlprism-1.2.1/docs/guide/conventions.md +82 -0
  11. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/mcp-tools.md +52 -1
  12. {sqlprism-1.1.0 → sqlprism-1.2.1}/mkdocs.yml +2 -0
  13. {sqlprism-1.1.0 → sqlprism-1.2.1}/pyproject.toml +2 -2
  14. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/cli.py +162 -2
  15. sqlprism-1.2.1/src/sqlprism/core/conventions.py +1590 -0
  16. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/graph.py +1541 -230
  17. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/indexer.py +322 -54
  18. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/mcp_tools.py +318 -115
  19. sqlprism-1.2.1/src/sqlprism/core/naming.py +32 -0
  20. sqlprism-1.2.1/src/sqlprism/languages/dbt.py +1117 -0
  21. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/languages/sql.py +150 -22
  22. sqlprism-1.2.1/src/sqlprism/languages/sqlmesh.py +645 -0
  23. sqlprism-1.2.1/tests/test_conventions.py +2847 -0
  24. sqlprism-1.2.1/tests/test_federation.py +488 -0
  25. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_graph.py +346 -27
  26. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_graph_tools.py +246 -0
  27. sqlprism-1.2.1/tests/test_indexer.py +3679 -0
  28. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_mcp_tools.py +91 -10
  29. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_renderers.py +1047 -7
  30. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_sql_parser.py +369 -0
  31. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_types.py +1 -1
  32. {sqlprism-1.1.0 → sqlprism-1.2.1}/uv.lock +36 -6
  33. sqlprism-1.1.0/src/sqlprism/languages/dbt.py +0 -372
  34. sqlprism-1.1.0/src/sqlprism/languages/sqlmesh.py +0 -324
  35. sqlprism-1.1.0/tests/test_federation.py +0 -144
  36. sqlprism-1.1.0/tests/test_indexer.py +0 -1891
  37. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/creating-branches-and-prs/PR-TEMPLATE.md +0 -0
  38. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/creating-branches-and-prs/SKILL.md +0 -0
  39. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/implementing-issues/AGENT-INSTRUCTIONS.md +0 -0
  40. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/implementing-issues/SKILL.md +0 -0
  41. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/implementing-issues/TASK-FORMAT.md +0 -0
  42. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/managing-project-releases/BDD-TEMPLATE.md +0 -0
  43. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/managing-project-releases/SKILL.md +0 -0
  44. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/reviewing-prs/COMMENT-TEMPLATE.md +0 -0
  45. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/reviewing-prs/REVIEWERS.md +0 -0
  46. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/reviewing-prs/SKILL.md +0 -0
  47. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/writing-graph-queries/EXAMPLES.md +0 -0
  48. {sqlprism-1.1.0 → sqlprism-1.2.1}/.claude/skills/writing-graph-queries/SKILL.md +0 -0
  49. {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/dependabot.yml +0 -0
  50. {sqlprism-1.1.0 → sqlprism-1.2.1}/.github/workflows/publish.yml +0 -0
  51. {sqlprism-1.1.0 → sqlprism-1.2.1}/LICENSE +0 -0
  52. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/dbt.md +0 -0
  53. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/graph.md +0 -0
  54. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/indexer.md +0 -0
  55. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/mcp-tools.md +0 -0
  56. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/sql-parser.md +0 -0
  57. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/sqlmesh.md +0 -0
  58. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/api/types.md +0 -0
  59. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/architecture/overview.md +0 -0
  60. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/getting-started/configuration.md +0 -0
  61. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/getting-started/installation.md +0 -0
  62. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/dbt.md +0 -0
  63. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/guide/sqlmesh.md +0 -0
  64. {sqlprism-1.1.0 → sqlprism-1.2.1}/docs/index.md +0 -0
  65. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/__init__.py +0 -0
  66. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/core/__init__.py +0 -0
  67. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/languages/__init__.py +0 -0
  68. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/languages/utils.py +0 -0
  69. {sqlprism-1.1.0 → sqlprism-1.2.1}/src/sqlprism/types.py +0 -0
  70. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/__init__.py +0 -0
  71. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_cli.py +0 -0
  72. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_config.py +0 -0
  73. {sqlprism-1.1.0 → sqlprism-1.2.1}/tests/test_languages.py +0 -0
@@ -41,7 +41,7 @@ jobs:
41
41
 
42
42
  - name: Upload coverage to Codecov
43
43
  if: matrix.python-version == '3.12'
44
- uses: codecov/codecov-action@v5
44
+ uses: codecov/codecov-action@v6
45
45
  with:
46
46
  files: coverage.xml
47
47
  token: ${{ secrets.CODECOV_TOKEN }}
@@ -37,10 +37,10 @@ jobs:
37
37
  run: uv run mkdocs build
38
38
 
39
39
  - name: Upload Pages artifact
40
- uses: actions/upload-pages-artifact@v3
40
+ uses: actions/upload-pages-artifact@v5
41
41
  with:
42
42
  path: site/
43
43
 
44
44
  - name: Deploy to GitHub Pages
45
45
  id: deployment
46
- uses: actions/deploy-pages@v4
46
+ uses: actions/deploy-pages@v5
@@ -1,6 +1,7 @@
1
1
  # Claude
2
2
  .claude/research/
3
3
  .claude/plans/
4
+ .claude/worktrees/
4
5
  .claude/settings.local.json
5
6
  .mcp.json
6
7
 
@@ -18,17 +18,20 @@ src/sqlprism/
18
18
  core/
19
19
  graph.py — DuckDB storage layer (MVCC, repo_type tracking)
20
20
  indexer.py — Orchestrates parsing + indexing; file-level reindex with repo-type dispatch
21
- mcp_tools.py — MCP server tools (19 tools, non-blocking reindex, per-repo debounce)
21
+ mcp_tools.py — MCP server tools (24 tools, non-blocking reindex, per-repo debounce)
22
+ conventions.py — Convention inference engine (layers, naming, references, tags, overrides)
22
23
  languages/
23
24
  sql.py — sqlglot-based SQL parser
24
25
  dbt.py — dbt renderer (full project + selective render_models)
25
26
  sqlmesh.py — sqlmesh renderer (full project + selective render_models)
26
27
  utils.py — Shared venv/env utilities
27
28
  types.py — Data classes (ParseResult, NodeResult, etc.)
28
- cli.py — Click CLI (serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, status, init)
29
+ cli.py — Click CLI (serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init)
29
30
  tests/
30
- test_indexer.py — Indexer + integration tests
31
- test_renderers.py — dbt/sqlmesh renderer tests
31
+ test_indexer.py — Indexer + integration tests
32
+ test_renderers.py — dbt/sqlmesh renderer tests
33
+ test_conventions.py — Convention engine + placement + tags tests
34
+ test_sql_parser.py — SQL parser, lineage, and dialect tests
32
35
  ```
33
36
 
34
37
  ## Conventions
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlprism
3
- Version: 1.1.0
3
+ Version: 1.2.1
4
4
  Summary: SQL codebase indexer with column-level lineage, impact analysis, and MCP server support
5
5
  Project-URL: Homepage, https://github.com/darkcofy/sqlprism
6
6
  Project-URL: Documentation, https://darkcofy.github.io/sqlprism/
@@ -23,7 +23,7 @@ Requires-Dist: duckdb>=1.5.0
23
23
  Requires-Dist: mcp[cli]>=1.0.0
24
24
  Requires-Dist: pydantic>=2.0.0
25
25
  Requires-Dist: pyyaml>=6.0
26
- Requires-Dist: sqlglot<30,>=28.0.0
26
+ Requires-Dist: sqlglot[c]<31,>=30.0.0
27
27
  Description-Content-Type: text/markdown
28
28
 
29
29
  # SQLPrism
@@ -74,6 +74,8 @@ uv run sqlprism reindex # index plain SQL repos
74
74
 
75
75
  For [dbt](https://www.getdbt.com/) and [SQLMesh](https://sqlmesh.com/) projects, use `reindex-dbt` and `reindex-sqlmesh` respectively. See the [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/) for full options.
76
76
 
77
+ > **Prerequisite:** dbt and SQLMesh are **not** dependencies of sqlprism. The renderers shell out to `dbt compile` / `sqlmesh` inside the target project's own virtualenv (via `uv run` by default). Install the renderer in that project — for example `uv add dbt-core dbt-<adapter>` or `uv add sqlmesh` — before running `reindex-dbt` / `reindex-sqlmesh`. If the renderer is missing, sqlprism will raise a clear error pointing at the project directory.
78
+
77
79
  ### 3. Connect your MCP client
78
80
 
79
81
  **Claude Code:**
@@ -225,6 +227,9 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
225
227
  | `sqlprism reindex-dbt` | Compile and index a [dbt](https://www.getdbt.com/) project. |
226
228
  | `sqlprism reindex-sqlmesh` | Render and index a [SQLMesh](https://sqlmesh.com/) project. |
227
229
  | `sqlprism serve` | Start the MCP server (stdio or HTTP). |
230
+ | `sqlprism conventions init` | Generate `sqlprism.conventions.yml` from inferred conventions. |
231
+ | `sqlprism conventions refresh` | Re-run convention inference after reindex. |
232
+ | `sqlprism conventions diff` | Show what changed since last `--init`. |
228
233
  | `sqlprism status` | Show index status. |
229
234
  | `sqlprism query search` | Find entities by name pattern. |
230
235
  | `sqlprism query references` | Find inbound/outbound dependencies. |
@@ -236,7 +241,7 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
236
241
 
237
242
  Full reference: [MCP tools guide](https://darkcofy.github.io/sqlprism/guide/mcp-tools/)
238
243
 
239
- When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
244
+ When running as an MCP server (`sqlprism serve`), 24 tools are exposed:
240
245
 
241
246
  | Tool | Description |
242
247
  |---|---|
@@ -258,6 +263,11 @@ When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
258
263
  | `reindex_files` | Fast on-save reindex with per-repo debounce. |
259
264
  | `reindex_dbt` | Background dbt compile + index. |
260
265
  | `reindex_sqlmesh` | Background SQLMesh render + index. |
266
+ | `get_conventions` | Inferred project conventions — naming, references, columns. |
267
+ | `find_similar_models` | Find existing models similar to what you're building. |
268
+ | `suggest_placement` | Recommend where to place a new model based on references. |
269
+ | `search_by_tag` | Find models by semantic tag (business domain concept). |
270
+ | `list_tags` | List all semantic tags with model counts and confidence. |
261
271
  | `index_status` | Index stats, cross-repo edges, and name collisions. |
262
272
 
263
273
  ## Architecture
@@ -274,8 +284,9 @@ src/sqlprism/
274
284
  core/
275
285
  graph.py <- DuckDB storage layer (MVCC), queries, snippets, repo_type tracking
276
286
  indexer.py <- Orchestrator: scan -> checksum -> parse -> store; file-level reindex with repo-type dispatch
277
- mcp_tools.py <- FastMCP tool definitions (19 tools, non-blocking reindex, per-repo debounce)
278
- cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, status, init
287
+ mcp_tools.py <- FastMCP tool definitions (24 tools, non-blocking reindex, per-repo debounce)
288
+ conventions.py <- Convention inference engine: layers, naming, references, tags, overrides
289
+ cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init
279
290
  ```
280
291
 
281
292
  The SQL parser extracts:
@@ -294,7 +305,7 @@ SQLPrism optionally integrates with [DuckPGQ](https://github.com/cwida/duckpgq)
294
305
 
295
306
  ```bash
296
307
  uv sync
297
- uv run pytest # run tests (416+ tests)
308
+ uv run pytest # run tests (630+ tests)
298
309
  uv run pytest --cov=sqlprism # run with coverage report
299
310
  uv run pytest --cov=sqlprism --cov-report=html:coverage_html # HTML report
300
311
  ```
@@ -46,6 +46,8 @@ uv run sqlprism reindex # index plain SQL repos
46
46
 
47
47
  For [dbt](https://www.getdbt.com/) and [SQLMesh](https://sqlmesh.com/) projects, use `reindex-dbt` and `reindex-sqlmesh` respectively. See the [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/) for full options.
48
48
 
49
+ > **Prerequisite:** dbt and SQLMesh are **not** dependencies of sqlprism. The renderers shell out to `dbt compile` / `sqlmesh` inside the target project's own virtualenv (via `uv run` by default). Install the renderer in that project — for example `uv add dbt-core dbt-<adapter>` or `uv add sqlmesh` — before running `reindex-dbt` / `reindex-sqlmesh`. If the renderer is missing, sqlprism will raise a clear error pointing at the project directory.
50
+
49
51
  ### 3. Connect your MCP client
50
52
 
51
53
  **Claude Code:**
@@ -197,6 +199,9 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
197
199
  | `sqlprism reindex-dbt` | Compile and index a [dbt](https://www.getdbt.com/) project. |
198
200
  | `sqlprism reindex-sqlmesh` | Render and index a [SQLMesh](https://sqlmesh.com/) project. |
199
201
  | `sqlprism serve` | Start the MCP server (stdio or HTTP). |
202
+ | `sqlprism conventions init` | Generate `sqlprism.conventions.yml` from inferred conventions. |
203
+ | `sqlprism conventions refresh` | Re-run convention inference after reindex. |
204
+ | `sqlprism conventions diff` | Show what changed since last `--init`. |
200
205
  | `sqlprism status` | Show index status. |
201
206
  | `sqlprism query search` | Find entities by name pattern. |
202
207
  | `sqlprism query references` | Find inbound/outbound dependencies. |
@@ -208,7 +213,7 @@ Full reference: [CLI guide](https://darkcofy.github.io/sqlprism/guide/cli/)
208
213
 
209
214
  Full reference: [MCP tools guide](https://darkcofy.github.io/sqlprism/guide/mcp-tools/)
210
215
 
211
- When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
216
+ When running as an MCP server (`sqlprism serve`), 24 tools are exposed:
212
217
 
213
218
  | Tool | Description |
214
219
  |---|---|
@@ -230,6 +235,11 @@ When running as an MCP server (`sqlprism serve`), 19 tools are exposed:
230
235
  | `reindex_files` | Fast on-save reindex with per-repo debounce. |
231
236
  | `reindex_dbt` | Background dbt compile + index. |
232
237
  | `reindex_sqlmesh` | Background SQLMesh render + index. |
238
+ | `get_conventions` | Inferred project conventions — naming, references, columns. |
239
+ | `find_similar_models` | Find existing models similar to what you're building. |
240
+ | `suggest_placement` | Recommend where to place a new model based on references. |
241
+ | `search_by_tag` | Find models by semantic tag (business domain concept). |
242
+ | `list_tags` | List all semantic tags with model counts and confidence. |
233
243
  | `index_status` | Index stats, cross-repo edges, and name collisions. |
234
244
 
235
245
  ## Architecture
@@ -246,8 +256,9 @@ src/sqlprism/
246
256
  core/
247
257
  graph.py <- DuckDB storage layer (MVCC), queries, snippets, repo_type tracking
248
258
  indexer.py <- Orchestrator: scan -> checksum -> parse -> store; file-level reindex with repo-type dispatch
249
- mcp_tools.py <- FastMCP tool definitions (19 tools, non-blocking reindex, per-repo debounce)
250
- cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, status, init
259
+ mcp_tools.py <- FastMCP tool definitions (24 tools, non-blocking reindex, per-repo debounce)
260
+ conventions.py <- Convention inference engine: layers, naming, references, tags, overrides
261
+ cli.py <- Click CLI: serve, reindex, reindex-file, reindex-sqlmesh, reindex-dbt, conventions, status, init
251
262
  ```
252
263
 
253
264
  The SQL parser extracts:
@@ -266,7 +277,7 @@ SQLPrism optionally integrates with [DuckPGQ](https://github.com/cwida/duckpgq)
266
277
 
267
278
  ```bash
268
279
  uv sync
269
- uv run pytest # run tests (416+ tests)
280
+ uv run pytest # run tests (630+ tests)
270
281
  uv run pytest --cov=sqlprism # run with coverage report
271
282
  uv run pytest --cov=sqlprism --cov-report=html:coverage_html # HTML report
272
283
  ```
@@ -0,0 +1,23 @@
1
+ # Convention Engine
2
+
3
+ The convention inference engine detects project patterns from the indexed SQL graph — naming conventions, layer references, required columns, column style, and semantic tags.
4
+
5
+ ## ConventionEngine
6
+
7
+ ::: sqlprism.core.conventions.ConventionEngine
8
+
9
+ ## Data Classes
10
+
11
+ ::: sqlprism.core.conventions.Layer
12
+
13
+ ::: sqlprism.core.conventions.NamingPattern
14
+
15
+ ::: sqlprism.core.conventions.ReferenceRule
16
+
17
+ ::: sqlprism.core.conventions.RequiredColumn
18
+
19
+ ::: sqlprism.core.conventions.ColumnStyle
20
+
21
+ ::: sqlprism.core.conventions.TagAssignment
22
+
23
+ ::: sqlprism.core.conventions.Cluster
@@ -2,7 +2,7 @@
2
2
 
3
3
  The index is stored in a single DuckDB file (default: `~/.sqlprism/graph.duckdb`). Current schema version: **1.0**.
4
4
 
5
- ## Tables (6)
5
+ ## Tables (9)
6
6
 
7
7
  ### `repos`
8
8
 
@@ -99,6 +99,51 @@ End-to-end column lineage chains.
99
99
 
100
100
  A lineage chain traces one output column back to its source. Multiple chains (different `chain_index` values) exist when a column has multiple source paths (e.g. COALESCE of two columns).
101
101
 
102
+ ### `columns`
103
+
104
+ Column definitions extracted from DDL or schema files.
105
+
106
+ | Column | Type | Description |
107
+ |---|---|---|
108
+ | `column_id` | INTEGER PK | Auto-increment ID. |
109
+ | `node_id` | INTEGER | FK to `nodes`. |
110
+ | `column_name` | TEXT | Column name. |
111
+ | `data_type` | TEXT or NULL | Column data type (e.g. `INTEGER`, `TEXT`). |
112
+ | `position` | INTEGER | Column ordinal position in the table. |
113
+ | `source` | TEXT | How the column was discovered: `definition`, `usage`, `lineage`. |
114
+
115
+ ### `conventions`
116
+
117
+ Inferred or overridden project conventions per layer.
118
+
119
+ | Column | Type | Description |
120
+ |---|---|---|
121
+ | `convention_id` | INTEGER PK | Auto-increment ID. |
122
+ | `repo_id` | INTEGER | FK to `repos`. |
123
+ | `layer` | TEXT | Layer name (e.g. `staging`, `marts`). |
124
+ | `convention_type` | TEXT | `naming`, `references`, `required_columns`, or `column_style`. |
125
+ | `payload` | JSON | Convention data (pattern, allowed_targets, etc.). |
126
+ | `confidence` | FLOAT | Confidence score (0.0-1.0). |
127
+ | `source` | TEXT | `inferred` or `override`. |
128
+ | `model_count` | INTEGER | Number of models in this layer when inferred. |
129
+
130
+ Unique constraint: `(repo_id, layer, convention_type)`.
131
+
132
+ ### `semantic_tags`
133
+
134
+ Semantic tags assigned to models by clustering or explicit override.
135
+
136
+ | Column | Type | Description |
137
+ |---|---|---|
138
+ | `tag_id` | INTEGER PK | Auto-increment ID. |
139
+ | `repo_id` | INTEGER | FK to `repos`. |
140
+ | `tag_name` | TEXT | Tag name (e.g. `customer`, `order`, `revenue`). |
141
+ | `node_id` | INTEGER | FK to `nodes`. |
142
+ | `confidence` | FLOAT | Confidence score (0.0-1.0). |
143
+ | `source` | TEXT | `inferred`, `anchor`, or `explicit`. |
144
+
145
+ Unique constraint: `(repo_id, tag_name, node_id)`.
146
+
102
147
  ## Indexes
103
148
 
104
149
  ```sql
@@ -117,6 +162,11 @@ CREATE INDEX idx_col_usage_type ON column_usage(usage_type);
117
162
  CREATE INDEX idx_lineage_output ON column_lineage(output_node, output_column);
118
163
  CREATE INDEX idx_lineage_hop ON column_lineage(hop_table, hop_column);
119
164
  CREATE INDEX idx_lineage_file ON column_lineage(file_id);
165
+ CREATE INDEX idx_conventions_repo ON conventions(repo_id);
166
+ CREATE INDEX idx_conventions_type ON conventions(convention_type);
167
+ CREATE INDEX idx_tags_name ON semantic_tags(tag_name);
168
+ CREATE INDEX idx_tags_node ON semantic_tags(node_id);
169
+ CREATE INDEX idx_tags_repo ON semantic_tags(repo_id);
120
170
  ```
121
171
 
122
172
  ## Entity Relationship
@@ -130,4 +180,7 @@ repos 1──* files 1──* nodes
130
180
 
131
181
  files 1──* column_usage *──1 nodes
132
182
  files 1──* column_lineage
183
+ nodes 1──* columns
184
+ repos 1──* conventions
185
+ repos 1──* semantic_tags *──1 nodes
133
186
  ```
@@ -34,7 +34,7 @@ sqlprism status [--config PATH] [--db PATH]
34
34
 
35
35
  ### `sqlprism serve`
36
36
 
37
- Starts the MCP server, exposing all 19 tools to any MCP client.
37
+ Starts the MCP server, exposing all 24 tools to any MCP client.
38
38
 
39
39
  ```bash
40
40
  sqlprism serve [--config PATH] [--db PATH] [--transport stdio|streamable-http] [--port 8000]
@@ -130,6 +130,36 @@ sqlprism reindex-dbt \
130
130
  | `--profiles-dir` | No | Path to directory containing `profiles.yml`. Defaults to the project directory. |
131
131
  | `--dbt-command` | No | Base command to invoke dbt. `compile` is appended automatically. Default: `uv run dbt`. Use `dbt` if globally installed, or `uvx --with dbt-starrocks dbt` for ephemeral install. |
132
132
 
133
+ ## Convention Commands
134
+
135
+ ### `sqlprism conventions init`
136
+
137
+ Generate a `sqlprism.conventions.yml` file from inferred conventions. Includes confidence scores as comments.
138
+
139
+ ```bash
140
+ sqlprism conventions init [--config PATH] [--db PATH] [--force]
141
+ ```
142
+
143
+ | Parameter | Description |
144
+ |---|---|
145
+ | `--force` | Overwrite existing conventions file. Without this flag, init refuses to overwrite. |
146
+
147
+ ### `sqlprism conventions refresh`
148
+
149
+ Re-run convention inference after reindex. Preserves explicit overrides (source: 'override').
150
+
151
+ ```bash
152
+ sqlprism conventions refresh [--config PATH] [--db PATH]
153
+ ```
154
+
155
+ ### `sqlprism conventions diff`
156
+
157
+ Show what conventions changed since the last `init`. Compares the YAML file against current inferred conventions.
158
+
159
+ ```bash
160
+ sqlprism conventions diff [--config PATH] [--db PATH]
161
+ ```
162
+
133
163
  ## Query Commands
134
164
 
135
165
  All query commands output JSON to stdout. They share common parameters:
@@ -0,0 +1,82 @@
1
+ # Conventions
2
+
3
+ The conventions engine automatically infers project patterns from the indexed SQL graph -- naming conventions, layer references, required columns, column naming style, and semantic tags. It runs after every reindex, giving AI agents (and developers) a machine-readable description of how the project is structured.
4
+
5
+ ## How It Works
6
+
7
+ - Convention inference runs automatically after reindex.
8
+ - Detects layers from directory structure (`models/staging/`, `models/marts/`, etc.).
9
+ - Infers naming patterns per layer (e.g. `stg_{source}_{entity}`).
10
+ - Infers reference rules (which layers reference which).
11
+ - Detects required columns (columns appearing in >70% of models per layer).
12
+ - Detects column naming style (`snake_case`, `camelCase`, etc.).
13
+ - Assigns semantic tags via structural clustering (no ML, deterministic).
14
+
15
+ ## Confidence Scores
16
+
17
+ All conventions have a confidence score (0.0--1.0):
18
+
19
+ | Range | Meaning |
20
+ |---|---|
21
+ | **>0.9** | High confidence. Follow this convention. |
22
+ | **0.7--0.9** | Moderate. Likely correct but worth verifying. |
23
+ | **<0.7** | Low confidence. Consider an explicit override. |
24
+
25
+ ## Overrides
26
+
27
+ You can override inferred conventions with a YAML file:
28
+
29
+ ```bash
30
+ sqlprism conventions init # generates sqlprism.conventions.yml
31
+ # edit the file to add explicit overrides
32
+ sqlprism conventions refresh # re-runs inference, preserving overrides
33
+ sqlprism conventions diff # shows changes since last init
34
+ ```
35
+
36
+ Overrides take precedence over inferred conventions (`source: 'override'` vs `source: 'inferred'`).
37
+
38
+ ## MCP Tools
39
+
40
+ Five MCP tools expose conventions to AI agents:
41
+
42
+ | Tool | Description |
43
+ |---|---|
44
+ | `get_conventions` | Naming rules, reference rules, required columns per layer. |
45
+ | `find_similar_models` | Find existing models similar to what you're building. |
46
+ | `suggest_placement` | Recommend where to place a new model based on references. |
47
+ | `search_by_tag` | Find models by semantic tag (business domain concept). |
48
+ | `list_tags` | List all semantic tags with model counts and confidence. |
49
+
50
+ See [MCP Tools](mcp-tools.md) for parameter details.
51
+
52
+ ## Semantic Tags
53
+
54
+ Tags are assigned by structural clustering -- models that share many upstream references get grouped and auto-labeled based on common name tokens. Tags represent business domain concepts (e.g. "customer", "order", "revenue").
55
+
56
+ Tag sources:
57
+
58
+ | Source | Description |
59
+ |---|---|
60
+ | **inferred** | Automatically assigned via clustering. |
61
+ | **anchor** | Manually specified in the YAML override as cluster anchors. |
62
+ | **explicit** | Manually assigned to specific models in the YAML override. |
63
+
64
+ ## Example Workflow
65
+
66
+ ```bash
67
+ # 1. Index your project
68
+ sqlprism reindex
69
+
70
+ # 2. Generate conventions file
71
+ sqlprism conventions init
72
+ # -> creates sqlprism.conventions.yml with inferred conventions
73
+
74
+ # 3. Review and adjust
75
+ # Edit the YAML to fix any conventions the engine got wrong
76
+
77
+ # 4. Re-run inference (preserves your overrides)
78
+ sqlprism conventions refresh
79
+
80
+ # 5. Check what changed
81
+ sqlprism conventions diff
82
+ ```
@@ -1,6 +1,6 @@
1
1
  # MCP Tools
2
2
 
3
- When running as an MCP server (`sqlprism serve`), 19 tools are exposed. Any MCP client (Claude Code, Claude Desktop, Cursor, Continue.dev) can call these.
3
+ When running as an MCP server (`sqlprism serve`), 24 tools are exposed. Any MCP client (Claude Code, Claude Desktop, Cursor, Continue.dev) can call these.
4
4
 
5
5
  ## Query Tools
6
6
 
@@ -237,6 +237,57 @@ Analyze the downstream impact of proposed column changes BEFORE modifying code.
237
237
  | `changes` | list | Yes | | List of column changes (remove_column, rename_column, add_column). |
238
238
  | `repo` | string | No | | Filter by repo name. |
239
239
 
240
+ ## Convention Tools
241
+
242
+ ### `get_conventions`
243
+
244
+ Get naming conventions, reference rules, and required columns for a layer. Returns inferred conventions with confidence scores.
245
+
246
+ | Parameter | Type | Required | Default | Description |
247
+ |---|---|---|---|---|
248
+ | `layer` | string | No | | Layer name (e.g. 'staging', 'marts'). Omit for all layers. |
249
+ | `repo` | string | No | | Filter by repo name. |
250
+
251
+ ### `search_by_tag`
252
+
253
+ Find models tagged with a business domain concept. Returns models ranked by confidence.
254
+
255
+ | Parameter | Type | Required | Default | Description |
256
+ |---|---|---|---|---|
257
+ | `tag` | string | Yes | | Tag name to search for (e.g. 'customer', 'order'). |
258
+ | `min_confidence` | float | No | | Minimum confidence threshold (0.0-1.0). |
259
+ | `repo` | string | No | | Filter by repo name. |
260
+
261
+ ### `list_tags`
262
+
263
+ List all semantic tags with model counts and average confidence. Provides the project's business domain vocabulary.
264
+
265
+ | Parameter | Type | Required | Default | Description |
266
+ |---|---|---|---|---|
267
+ | `repo` | string | No | | Filter by repo name. |
268
+
269
+ ### `find_similar_models`
270
+
271
+ Find existing models similar to what you're building. Compares reference overlap, column overlap, and layer placement.
272
+
273
+ | Parameter | Type | Required | Default | Description |
274
+ |---|---|---|---|---|
275
+ | `references` | list[string] | No | | Tables this model will reference. |
276
+ | `output_columns` | list[string] | No | | Columns this model will output. |
277
+ | `model` | string | No | | Existing model name to find similar models to. |
278
+ | `limit` | int | No | 5 | Max results (1-50). |
279
+ | `repo` | string | No | | Filter by repo name. |
280
+
281
+ ### `suggest_placement`
282
+
283
+ Recommend where to place a new model based on its references. Uses inferred layer flow rules and naming conventions.
284
+
285
+ | Parameter | Type | Required | Default | Description |
286
+ |---|---|---|---|---|
287
+ | `references` | list[string] | Yes | | Tables this new model will reference. |
288
+ | `name` | string | No | | Proposed model name (for naming validation). |
289
+ | `repo` | string | No | | Filter by repo name. |
290
+
240
291
  ## DuckPGQ Tools
241
292
 
242
293
  The following tools require the [DuckPGQ](https://github.com/cwida/duckpgq) extension: `find_path`, `find_critical_models`, `find_subgraphs`. The extension is installed automatically on first use. Tools that don't require DuckPGQ (`detect_cycles`, `find_bottlenecks`, `check_impact`) use plain SQL and work everywhere.
@@ -59,6 +59,7 @@ nav:
59
59
  - User Guide:
60
60
  - CLI Reference: guide/cli.md
61
61
  - MCP Tools: guide/mcp-tools.md
62
+ - Conventions: guide/conventions.md
62
63
  - SQLMesh Integration: guide/sqlmesh.md
63
64
  - dbt Integration: guide/dbt.md
64
65
  - Architecture:
@@ -69,6 +70,7 @@ nav:
69
70
  - GraphDB (Storage): api/graph.md
70
71
  - Indexer (Orchestrator): api/indexer.md
71
72
  - SQL Parser: api/sql-parser.md
73
+ - Convention Engine: api/conventions.md
72
74
  - SQLMesh Renderer: api/sqlmesh.md
73
75
  - dbt Renderer: api/dbt.md
74
76
  - MCP Server: api/mcp-tools.md
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sqlprism"
7
- version = "1.1.0"
7
+ version = "1.2.1"
8
8
  description = "SQL codebase indexer with column-level lineage, impact analysis, and MCP server support"
9
9
  license = "Apache-2.0"
10
10
  requires-python = ">=3.11"
@@ -26,7 +26,7 @@ classifiers = [
26
26
  dependencies = [
27
27
  "mcp[cli]>=1.0.0",
28
28
  "duckdb>=1.5.0",
29
- "sqlglot>=28.0.0,<30",
29
+ "sqlglot[c]>=30.0.0,<31",
30
30
  "pydantic>=2.0.0",
31
31
  "click>=8.0.0",
32
32
  "pyyaml>=6.0",