sql-code-graph 1.1.0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.1.3.dist-info}/METADATA +1 -1
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.1.3.dist-info}/RECORD +15 -15
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +39 -19
- sqlcg/cli/commands/db.py +2 -2
- sqlcg/cli/commands/find.py +3 -3
- sqlcg/cli/commands/gain.py +1 -1
- sqlcg/core/config.py +33 -5
- sqlcg/core/kuzu_backend.py +4 -1
- sqlcg/core/queries.cypher +0 -6
- sqlcg/core/queries.py +0 -1
- sqlcg/indexer/indexer.py +18 -0
- sqlcg/parsers/base.py +7 -1
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.1.3.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.1.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=YGDRrWVIrONmQholAKWh6hSKxlPd2dLcM1AdHHdBhEA,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
7
|
-
sqlcg/cli/commands/db.py,sha256=
|
|
8
|
-
sqlcg/cli/commands/find.py,sha256=
|
|
9
|
-
sqlcg/cli/commands/gain.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=hiKj0R1m5i4ZmwrXBlVT14xGy6rs9jmv_ZDCLVZj4Tg,14282
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=5VpknLqYaimK6YA516w6iQVX6JmHcD52o6MuW5d088c,7462
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=SsK6q4YRPknrz_lIQ4Gun6HRoAdoVRGClwAYdm_s2OU,3168
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=hz36QmuaXJXutI4vyNMDfcNsBeLTXa6EOw2bWe2AhTQ,8939
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
|
|
11
11
|
sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
|
|
12
12
|
sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
|
|
@@ -16,21 +16,21 @@ sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,
|
|
|
16
16
|
sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
|
|
17
17
|
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
18
|
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
19
|
-
sqlcg/core/config.py,sha256=
|
|
19
|
+
sqlcg/core/config.py,sha256=LuB8HWPsIt1OsjOshTT1bJdXWXN01w76ABl9M-VB9DM,14777
|
|
20
20
|
sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
|
|
21
21
|
sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
|
|
22
22
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
23
|
-
sqlcg/core/kuzu_backend.py,sha256=
|
|
23
|
+
sqlcg/core/kuzu_backend.py,sha256=3kL8bGEQm70fuxYUdt1p7fsY12lCLQ07x01NYg6FOGA,16821
|
|
24
24
|
sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
|
|
25
|
-
sqlcg/core/queries.cypher,sha256=
|
|
26
|
-
sqlcg/core/queries.py,sha256=
|
|
25
|
+
sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
|
|
26
|
+
sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
|
|
27
27
|
sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
|
|
28
28
|
sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
|
|
29
29
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
30
30
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
31
31
|
sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
|
|
32
32
|
sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
|
|
33
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
33
|
+
sqlcg/indexer/indexer.py,sha256=KyyowxiSNU3Gm4JE-mj8gVm6D80XERJPd-he59I2sIk,62018
|
|
34
34
|
sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
|
|
35
35
|
sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
|
|
36
36
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
@@ -41,7 +41,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
|
41
41
|
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
42
42
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
43
43
|
sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
|
|
44
|
-
sqlcg/parsers/base.py,sha256=
|
|
44
|
+
sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
|
|
45
45
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
46
46
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
47
47
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
@@ -59,7 +59,7 @@ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
|
59
59
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
60
60
|
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
61
61
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
62
|
-
sql_code_graph-1.1.
|
|
63
|
-
sql_code_graph-1.1.
|
|
64
|
-
sql_code_graph-1.1.
|
|
65
|
-
sql_code_graph-1.1.
|
|
62
|
+
sql_code_graph-1.1.3.dist-info/METADATA,sha256=Z_aRnsDOgZ_ngAHkIr3x2XpEjF-x6UMUQwcIkAMlGjo,13615
|
|
63
|
+
sql_code_graph-1.1.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
64
|
+
sql_code_graph-1.1.3.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
65
|
+
sql_code_graph-1.1.3.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -19,6 +19,29 @@ app = typer.Typer(help="Lineage analysis")
|
|
|
19
19
|
console = Console()
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def _kind_filter(source_alias: str, include_intermediate: bool) -> str:
|
|
23
|
+
"""Build the Half-B (#38) kind filter for the lineage traversal query.
|
|
24
|
+
|
|
25
|
+
When ``include_intermediate`` is False, the filter uses ``OPTIONAL MATCH`` plus
|
|
26
|
+
``t.kind IS NULL OR t.kind IN ['table', 'external']`` so a source whose SqlTable
|
|
27
|
+
node is ABSENT (a CTE-body source on a graph indexed before the #39 fix, or not yet
|
|
28
|
+
re-indexed) is KEPT rather than silently dropped. Reverting this to an inner
|
|
29
|
+
``MATCH (t:SqlTable {...}) ... WHERE t.kind IN [...]`` is the #38 regression:
|
|
30
|
+
node-less physical sources vanish from results.
|
|
31
|
+
|
|
32
|
+
``source_alias`` is ``src`` for upstream and ``dst`` for downstream — it names both
|
|
33
|
+
the node whose table is looked up and the variable carried through the WITH clause.
|
|
34
|
+
This is the single production source of the filter string; the #40 recall guard
|
|
35
|
+
imports it so reverting Half B here turns the guard red.
|
|
36
|
+
"""
|
|
37
|
+
if include_intermediate:
|
|
38
|
+
return ""
|
|
39
|
+
return (
|
|
40
|
+
f"OPTIONAL MATCH (t:SqlTable {{qualified: {source_alias}.table_qualified}}) "
|
|
41
|
+
f"WITH c, {source_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
22
45
|
@app.command("upstream")
|
|
23
46
|
def upstream( # noqa: B008
|
|
24
47
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
@@ -34,15 +57,14 @@ def upstream( # noqa: B008
|
|
|
34
57
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
35
58
|
raise typer.Exit(1)
|
|
36
59
|
|
|
37
|
-
# By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
)
|
|
60
|
+
# By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
|
|
61
|
+
# Half B (#38): use OPTIONAL MATCH so a missing SqlTable node (e.g. CTE-body source not yet
|
|
62
|
+
# re-indexed after #39 fix) is KEPT rather than silently dropped. WHERE t.kind IS NULL OR
|
|
63
|
+
# t.kind IN [...] means: keep when node absent (NULL) OR when kind is a physical source.
|
|
64
|
+
# CTE aliases (kind='cte') and derived tables (kind='derived') are filtered out.
|
|
65
|
+
kind_filter = _kind_filter("src", include_intermediate)
|
|
44
66
|
|
|
45
|
-
with get_backend() as backend:
|
|
67
|
+
with get_backend(read_only=True) as backend:
|
|
46
68
|
results = backend.run_read(
|
|
47
69
|
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
48
70
|
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
@@ -95,15 +117,13 @@ def downstream( # noqa: B008
|
|
|
95
117
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
96
118
|
raise typer.Exit(1)
|
|
97
119
|
|
|
98
|
-
# By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
"WHERE t.kind IN ['table', 'external'] "
|
|
104
|
-
)
|
|
120
|
+
# By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
|
|
121
|
+
# Half B (#38): OPTIONAL MATCH keeps sources whose SqlTable node is absent (NULL) or is a
|
|
122
|
+
# physical kind. WITH c, dst, t carries the three variables in scope at this interpolation
|
|
123
|
+
# point; direct and q are bound later in the query.
|
|
124
|
+
kind_filter = _kind_filter("dst", include_intermediate)
|
|
105
125
|
|
|
106
|
-
with get_backend() as backend:
|
|
126
|
+
with get_backend(read_only=True) as backend:
|
|
107
127
|
results = backend.run_read(
|
|
108
128
|
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
109
129
|
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
@@ -171,7 +191,7 @@ def impact( # noqa: B008
|
|
|
171
191
|
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
172
192
|
) -> None:
|
|
173
193
|
"""Show all queries impacted by a table."""
|
|
174
|
-
with get_backend() as backend:
|
|
194
|
+
with get_backend(read_only=True) as backend:
|
|
175
195
|
results = backend.run_read(
|
|
176
196
|
f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
|
|
177
197
|
f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
|
|
@@ -207,7 +227,7 @@ def failures(
|
|
|
207
227
|
with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
|
|
208
228
|
an earlier version.
|
|
209
229
|
"""
|
|
210
|
-
with get_backend() as backend:
|
|
230
|
+
with get_backend(read_only=True) as backend:
|
|
211
231
|
cypher = (
|
|
212
232
|
f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
|
|
213
233
|
"AND ($cause IS NULL OR f.parse_cause = $cause) "
|
|
@@ -224,7 +244,7 @@ def unused(
|
|
|
224
244
|
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
225
245
|
) -> None:
|
|
226
246
|
"""Find tables with no query references."""
|
|
227
|
-
with get_backend() as backend:
|
|
247
|
+
with get_backend(read_only=True) as backend:
|
|
228
248
|
results = backend.run_read(
|
|
229
249
|
f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
|
|
230
250
|
"RETURN DISTINCT t.qualified AS qualified LIMIT 100",
|
sqlcg/cli/commands/db.py
CHANGED
|
@@ -75,7 +75,7 @@ def db_reset( # noqa: B008
|
|
|
75
75
|
@app.command("info")
|
|
76
76
|
def db_info() -> None:
|
|
77
77
|
"""Show database stats."""
|
|
78
|
-
with get_backend() as backend:
|
|
78
|
+
with get_backend(read_only=True) as backend:
|
|
79
79
|
version = backend.get_schema_version() or "unknown"
|
|
80
80
|
console.print(f"Schema version: {version}")
|
|
81
81
|
|
|
@@ -167,7 +167,7 @@ def db_info() -> None:
|
|
|
167
167
|
@app.command("list-repos")
|
|
168
168
|
def list_repos() -> None:
|
|
169
169
|
"""List all indexed repositories."""
|
|
170
|
-
with get_backend() as backend:
|
|
170
|
+
with get_backend(read_only=True) as backend:
|
|
171
171
|
result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
|
|
172
172
|
|
|
173
173
|
if not result:
|
sqlcg/cli/commands/find.py
CHANGED
|
@@ -18,7 +18,7 @@ def find_table( # noqa: B008
|
|
|
18
18
|
) -> None:
|
|
19
19
|
"""Find a table by name."""
|
|
20
20
|
name = name.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
21
|
-
with get_backend() as backend:
|
|
21
|
+
with get_backend(read_only=True) as backend:
|
|
22
22
|
results = backend.run_read(
|
|
23
23
|
f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
|
|
24
24
|
"RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
|
|
@@ -42,7 +42,7 @@ def find_column( # noqa: B008
|
|
|
42
42
|
) -> None:
|
|
43
43
|
"""Find a column by table.column reference."""
|
|
44
44
|
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
45
|
-
with get_backend() as backend:
|
|
45
|
+
with get_backend(read_only=True) as backend:
|
|
46
46
|
results = backend.run_read(
|
|
47
47
|
f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
|
|
48
48
|
{"ref": ref},
|
|
@@ -61,7 +61,7 @@ def find_pattern( # noqa: B008
|
|
|
61
61
|
pattern: str = typer.Argument(..., help="SQL pattern to search for"), # noqa: B008
|
|
62
62
|
) -> None:
|
|
63
63
|
"""Find queries containing a SQL pattern."""
|
|
64
|
-
with get_backend() as backend:
|
|
64
|
+
with get_backend(read_only=True) as backend:
|
|
65
65
|
results = backend.run_read(
|
|
66
66
|
f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
|
|
67
67
|
"RETURN q.id AS id, q.kind AS kind LIMIT 50",
|
sqlcg/cli/commands/gain.py
CHANGED
|
@@ -123,7 +123,7 @@ def gain_cmd(
|
|
|
123
123
|
# Section F: parse quality from graph
|
|
124
124
|
parse_quality: dict[str, int] | None = None
|
|
125
125
|
try:
|
|
126
|
-
with get_backend() as backend:
|
|
126
|
+
with get_backend(read_only=True) as backend:
|
|
127
127
|
mode_rows = backend.run_read(
|
|
128
128
|
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
|
|
129
129
|
" COUNT(q) AS cnt ORDER BY cnt DESC",
|
sqlcg/core/config.py
CHANGED
|
@@ -346,9 +346,25 @@ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
|
|
|
346
346
|
return []
|
|
347
347
|
|
|
348
348
|
|
|
349
|
-
def get_backend() -> "GraphBackend":
|
|
349
|
+
def get_backend(read_only: bool = False) -> "GraphBackend":
|
|
350
350
|
"""Get a graph backend instance respecting the SQLCG_BACKEND env var.
|
|
351
351
|
|
|
352
|
+
Args:
|
|
353
|
+
read_only: Open in read-only mode. When ``True``, the KùzuDB open
|
|
354
|
+
does not take an exclusive write lock, enabling *multiple concurrent
|
|
355
|
+
read-only opens* (reader/reader concurrency). CLI read commands
|
|
356
|
+
pass ``True`` so they do not hold the exclusive write lock and
|
|
357
|
+
therefore do not block other concurrent readers or a pending reindex.
|
|
358
|
+
Note: this does NOT allow reads while a read-write writer already
|
|
359
|
+
holds the exclusive lock — KùzuDB's exclusive write lock is
|
|
360
|
+
process-level; a ``read_only=True`` open still fails with
|
|
361
|
+
"Database is locked" when a writer is active. Reads during an
|
|
362
|
+
active writer remain a known limitation (future work: route reads
|
|
363
|
+
through the live MCP server).
|
|
364
|
+
Neo4j has no single-writer lock; this flag is a no-op there.
|
|
365
|
+
All writer call sites (index, reindex, db init/reset, server
|
|
366
|
+
init_backend) use the default ``False``.
|
|
367
|
+
|
|
352
368
|
Returns:
|
|
353
369
|
A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
|
|
354
370
|
|
|
@@ -361,14 +377,26 @@ def get_backend() -> "GraphBackend":
|
|
|
361
377
|
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
362
378
|
|
|
363
379
|
kuzu_cfg = KuzuConfig.from_env()
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
380
|
+
try:
|
|
381
|
+
return KuzuBackend(
|
|
382
|
+
str(kuzu_cfg.db_path),
|
|
383
|
+
buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
|
|
384
|
+
read_only=read_only,
|
|
385
|
+
)
|
|
386
|
+
except RuntimeError as exc:
|
|
387
|
+
if read_only and "READ ONLY" in str(exc):
|
|
388
|
+
# KùzuDB refuses to open a non-existent or empty DB in read-only
|
|
389
|
+
# mode ("Cannot create an empty database under READ ONLY mode").
|
|
390
|
+
# Surface the same empty-DB guidance the user sees from `db info`.
|
|
391
|
+
raise RuntimeError(
|
|
392
|
+
"Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
|
|
393
|
+
) from exc
|
|
394
|
+
raise
|
|
368
395
|
elif backend_type == "neo4j":
|
|
369
396
|
from sqlcg.core.neo4j_backend import Neo4jBackend
|
|
370
397
|
|
|
371
398
|
neo4j_cfg = Neo4jConfig.from_env()
|
|
399
|
+
# Neo4j has no single-writer lock; read_only is a no-op here.
|
|
372
400
|
return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
|
|
373
401
|
else:
|
|
374
402
|
raise ValueError(f"Unknown backend type: {backend_type}")
|
sqlcg/core/kuzu_backend.py
CHANGED
|
@@ -58,7 +58,10 @@ class KuzuBackend(GraphBackend):
|
|
|
58
58
|
Args:
|
|
59
59
|
db_path: Path to the KùzuDB database file (or ':memory:' for in-memory)
|
|
60
60
|
buffer_pool_size_mb: Buffer pool size in MB (0 = use KuzuDB default)
|
|
61
|
-
read_only: Open in read-only mode
|
|
61
|
+
read_only: Open in read-only mode. Enables concurrent read-only
|
|
62
|
+
opens (reader/reader concurrency) by not taking the exclusive
|
|
63
|
+
write lock. Does NOT allow reads while a read-write writer
|
|
64
|
+
holds the lock — KùzuDB's exclusive lock is process-level.
|
|
62
65
|
|
|
63
66
|
Raises:
|
|
64
67
|
RuntimeError: If the database is locked or cannot be opened.
|
sqlcg/core/queries.cypher
CHANGED
|
@@ -38,12 +38,6 @@ RETURN dst.id AS id, dst.col_name AS col_name, dst.table_qualified AS table_qual
|
|
|
38
38
|
MATCH (dst:SqlColumn {id: $id})<-[:COLUMN_LINEAGE]-(src:SqlColumn)
|
|
39
39
|
RETURN src.id AS id, src.col_name AS col_name, src.table_qualified AS table_qualified
|
|
40
40
|
|
|
41
|
-
-- GET_UPSTREAM_DEPENDENCIES_FILTERED
|
|
42
|
-
MATCH (dst:SqlColumn {id: $id})<-[:COLUMN_LINEAGE]-(src:SqlColumn)
|
|
43
|
-
MATCH (t:SqlTable {qualified: src.table_qualified})
|
|
44
|
-
WHERE t.kind IN ['table', 'external']
|
|
45
|
-
RETURN src.id AS id, src.col_name AS col_name, src.table_qualified AS table_qualified
|
|
46
|
-
|
|
47
41
|
-- SEARCH_SQL_PATTERN
|
|
48
42
|
MATCH (q:SqlQuery)-[:QUERY_DEFINED_IN]->(f:File)
|
|
49
43
|
WHERE contains(q.sql, $query)
|
sqlcg/core/queries.py
CHANGED
|
@@ -28,7 +28,6 @@ TRACE_COLUMN_LINEAGE_QUERY = _Q["TRACE_COLUMN_LINEAGE"]
|
|
|
28
28
|
FIND_TABLE_USAGES_QUERY = _Q["FIND_TABLE_USAGES"]
|
|
29
29
|
GET_DOWNSTREAM_DEPENDENCIES_QUERY = _Q["GET_DOWNSTREAM_DEPENDENCIES"]
|
|
30
30
|
GET_UPSTREAM_DEPENDENCIES_QUERY = _Q["GET_UPSTREAM_DEPENDENCIES"]
|
|
31
|
-
GET_UPSTREAM_DEPENDENCIES_FILTERED_QUERY = _Q["GET_UPSTREAM_DEPENDENCIES_FILTERED"]
|
|
32
31
|
SEARCH_SQL_PATTERN_QUERY = _Q["SEARCH_SQL_PATTERN"]
|
|
33
32
|
LIST_DIALECTS_AND_REPOS_QUERY = _Q["LIST_DIALECTS_AND_REPOS"]
|
|
34
33
|
EXPAND_STAR_SOURCES_QUERY = _Q["EXPAND_STAR_SOURCES"]
|
sqlcg/indexer/indexer.py
CHANGED
|
@@ -1126,6 +1126,24 @@ class Indexer:
|
|
|
1126
1126
|
"table_name": edge.src.table.name,
|
|
1127
1127
|
}
|
|
1128
1128
|
)
|
|
1129
|
+
# Half A (#39): emit a SqlTable node for the source table.
|
|
1130
|
+
# CTE-body-only sources are not in stmt.sources (which only covers
|
|
1131
|
+
# tables reachable via the parser's top-level FROM list), so they were
|
|
1132
|
+
# previously missing from the graph. edge.src.table is a frozen
|
|
1133
|
+
# TableRef with schema-aliasing already applied at parse time — the
|
|
1134
|
+
# qualified value is guaranteed to match edge.src.table_qualified.
|
|
1135
|
+
# key set is identical to other table_rows entries → upsert_nodes_bulk
|
|
1136
|
+
# homogeneity preserved; MERGE on primary key deduplicates re-emits.
|
|
1137
|
+
rows.table_rows.append(
|
|
1138
|
+
{
|
|
1139
|
+
"qualified": edge.src.table.full_id,
|
|
1140
|
+
"name": edge.src.table.name,
|
|
1141
|
+
"catalog": edge.src.table.catalog or "",
|
|
1142
|
+
"db": edge.src.table.db or "",
|
|
1143
|
+
"kind": edge.src.table.role,
|
|
1144
|
+
"defined_in_file": "",
|
|
1145
|
+
}
|
|
1146
|
+
)
|
|
1129
1147
|
rows.column_rows.append(
|
|
1130
1148
|
{
|
|
1131
1149
|
"id": dst_id,
|
sqlcg/parsers/base.py
CHANGED
|
@@ -967,10 +967,16 @@ class SqlParser(ABC):
|
|
|
967
967
|
if not isinstance(cte_body, (exp.Select, exp.Union)):
|
|
968
968
|
continue
|
|
969
969
|
|
|
970
|
-
# For Union bodies, use the left
|
|
970
|
+
# For Union bodies, use the deepest left-branch Select's projections.
|
|
971
971
|
# Union.expressions is always empty; projections are on Union.this.
|
|
972
|
+
# For N=2: cte_body.this is a Select — the while loop is a no-op.
|
|
973
|
+
# For N≥3: cte_body.this is a nested Union (A UNION ALL B UNION ALL C
|
|
974
|
+
# parses as Union(Union(A,B),C)), so we walk down to the deepest
|
|
975
|
+
# left-branch Select (whose star qualify() already expanded in place).
|
|
972
976
|
if isinstance(cte_body, exp.Union):
|
|
973
977
|
projection_source = cte_body.this
|
|
978
|
+
while isinstance(projection_source, exp.Union):
|
|
979
|
+
projection_source = projection_source.this
|
|
974
980
|
else:
|
|
975
981
|
projection_source = cte_body
|
|
976
982
|
|
|
File without changes
|
|
File without changes
|