sql-code-graph 1.35.2__py3-none-any.whl → 1.35.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.35.2.dist-info → sql_code_graph-1.35.3.dist-info}/METADATA +1 -1
- {sql_code_graph-1.35.2.dist-info → sql_code_graph-1.35.3.dist-info}/RECORD +7 -7
- sqlcg/__init__.py +1 -1
- sqlcg/indexer/indexer.py +61 -9
- sqlcg/parsers/bigquery_parser.py +5 -1
- {sql_code_graph-1.35.2.dist-info → sql_code_graph-1.35.3.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.35.2.dist-info → sql_code_graph-1.35.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.35.
|
|
3
|
+
Version: 1.35.3
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=iuiB1QBl9EVtoW0aNt89z6gr_yDaZ5RV7phaFO8zX0Y,116
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/coverage.py,sha256=Xm9ITzZDHv2mJ70Q5jCacVuhDStVrE3gq12_-Ypvtd8,43823
|
|
@@ -34,7 +34,7 @@ sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
|
34
34
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
35
35
|
sqlcg/indexer/error_classify.py,sha256=-sp8cRmuOBHu_CxnCtaXf34YxHFYwIFNjIrn4LaEv6M,7142
|
|
36
36
|
sqlcg/indexer/git_delta.py,sha256=zYdH5q-jV7w_ne8Oxdywsy0N3rwUjpd5RjEDurlrMSA,5026
|
|
37
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
37
|
+
sqlcg/indexer/indexer.py,sha256=LNc5pI3_WwIMT2iTnCu9VB1Kk21T1qAmN7fupyiQu2s,105315
|
|
38
38
|
sqlcg/indexer/pool.py,sha256=iMmCQtpDRKBTQBep2_EUq9THcsE18Zgk0hdaFB_CwiA,19006
|
|
39
39
|
sqlcg/indexer/walker.py,sha256=Cft6JiJtdBFy0HR6L9pJdr5Fg0eRR3XBW1OMtM2apto,1947
|
|
40
40
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
@@ -46,7 +46,7 @@ sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
|
|
|
46
46
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
47
47
|
sqlcg/parsers/ansi_parser.py,sha256=RX6eVj7gt1qmsHNJLAF_a4jyW3RCI5W2oF4rd53cKNg,39336
|
|
48
48
|
sqlcg/parsers/base.py,sha256=d5s5_LSv96jrww9vx52GujjrLHwpxy_UOhmIlWcKglw,106489
|
|
49
|
-
sqlcg/parsers/bigquery_parser.py,sha256=
|
|
49
|
+
sqlcg/parsers/bigquery_parser.py,sha256=g0B6aIpMyxLMVQ3ohAAjzR4nEmMh-WGkFcYLMiKdLxs,3177
|
|
50
50
|
sqlcg/parsers/dynamic_name.py,sha256=q0QAa9iAcmRW4e_0G2b2j-xTbI3VR1-Wwa-nJRLtrQw,6836
|
|
51
51
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
52
52
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
@@ -73,7 +73,7 @@ sqlcg/viz/render.py,sha256=BINkGbJbbb_iqhrkN795RaQsdg8nqCiJtsEFF1yo22Y,2737
|
|
|
73
73
|
sqlcg/viz/tags.py,sha256=6zRnGlHjuGmEeB6yN1uhzm8rqL7ZGoyL1Ki7jI5oM6A,5368
|
|
74
74
|
sqlcg/viz/assets/force-graph.min.js,sha256=jNdYdDdrYiUdUlElxRkolPBt30rstQk2q15Q32VVdzc,177272
|
|
75
75
|
sqlcg/viz/assets/template.html,sha256=9_j-mvo1ZxwgiJPDdVrNmca37dTrTjjYVd3977u-DxE,12294
|
|
76
|
-
sql_code_graph-1.35.
|
|
77
|
-
sql_code_graph-1.35.
|
|
78
|
-
sql_code_graph-1.35.
|
|
79
|
-
sql_code_graph-1.35.
|
|
76
|
+
sql_code_graph-1.35.3.dist-info/METADATA,sha256=bR0GUYuujbDEYNj4602aE5Olejev4X6hp7KYlaezZjg,17791
|
|
77
|
+
sql_code_graph-1.35.3.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
78
|
+
sql_code_graph-1.35.3.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
79
|
+
sql_code_graph-1.35.3.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/indexer/indexer.py
CHANGED
|
@@ -388,7 +388,7 @@ def _flush_row_batch(
|
|
|
388
388
|
)
|
|
389
389
|
|
|
390
390
|
|
|
391
|
-
def _subprocess_parse_worker(parser_cls, dialect, path, sql, q):
|
|
391
|
+
def _subprocess_parse_worker(parser_cls, dialect, path, sql, q, rel_path=None):
|
|
392
392
|
"""Parse a single file in a subprocess; queue the ParsedFile (or exception).
|
|
393
393
|
|
|
394
394
|
parser_cls must be the *class* (pickleable), not an instance. The worker
|
|
@@ -398,10 +398,16 @@ def _subprocess_parse_worker(parser_cls, dialect, path, sql, q):
|
|
|
398
398
|
T-09-04: Parser constructors require a SchemaResolver. The subprocess gets a
|
|
399
399
|
fresh empty resolver; column resolution runs in infer-only mode, the same as
|
|
400
400
|
small-repo mode.
|
|
401
|
+
|
|
402
|
+
#171: rel_path is the repo-relative posix path used for CTE/temp namespace
|
|
403
|
+
keying. It MUST be forwarded so the incremental path produces the same keys
|
|
404
|
+
as index_repo (which threads rel_path through its task dict); without it the
|
|
405
|
+
namespace falls back to the absolute OS path, creating duplicate CTE/temp
|
|
406
|
+
nodes after an incremental reindex.
|
|
401
407
|
"""
|
|
402
408
|
try:
|
|
403
409
|
parser = parser_cls(SchemaResolver(dialect=str(dialect) if dialect else None))
|
|
404
|
-
out = parser.parse_file(path, sql)
|
|
410
|
+
out = parser.parse_file(path, sql, rel_path=rel_path)
|
|
405
411
|
q.put(out)
|
|
406
412
|
except BaseException as exc:
|
|
407
413
|
# Send the exception back; parent will re-raise.
|
|
@@ -1129,6 +1135,22 @@ class Indexer:
|
|
|
1129
1135
|
schema_resolver = SchemaResolver(dialect=dialect)
|
|
1130
1136
|
parser = get_parser(dialect, schema_resolver)
|
|
1131
1137
|
|
|
1138
|
+
# #170/#171: load schema_aliases and compute repo-relative posix paths the
|
|
1139
|
+
# SAME way index_repo does, so the incremental path applies the same alias
|
|
1140
|
+
# normalisation (#170) and CTE/temp namespace keys (#171). Without these,
|
|
1141
|
+
# a branch-switch resync produced phantom *_tmp.* nodes (aliases unapplied)
|
|
1142
|
+
# and duplicate CTE/temp nodes (absolute-path keys).
|
|
1143
|
+
from sqlcg.core.config import get_schema_aliases
|
|
1144
|
+
|
|
1145
|
+
schema_aliases = get_schema_aliases(root)
|
|
1146
|
+
root_resolved = Path(root).resolve()
|
|
1147
|
+
|
|
1148
|
+
def _rel_posix(fp: Path) -> str:
|
|
1149
|
+
try:
|
|
1150
|
+
return fp.resolve().relative_to(root_resolved).as_posix()
|
|
1151
|
+
except ValueError:
|
|
1152
|
+
return fp.as_posix()
|
|
1153
|
+
|
|
1132
1154
|
pass1_results: list[ParsedFile] = []
|
|
1133
1155
|
for file_path in reparse_set:
|
|
1134
1156
|
try:
|
|
@@ -1141,7 +1163,9 @@ class Indexer:
|
|
|
1141
1163
|
pass1_results.append(placeholder)
|
|
1142
1164
|
continue
|
|
1143
1165
|
try:
|
|
1144
|
-
parsed = self._index_single_file(
|
|
1166
|
+
parsed = self._index_single_file(
|
|
1167
|
+
parser, file_path, sql, timeout_per_file, rel_path=_rel_posix(file_path)
|
|
1168
|
+
)
|
|
1145
1169
|
except Exception as exc:
|
|
1146
1170
|
logger.warning("resync_changed: parse failed %s: %s", file_path, exc)
|
|
1147
1171
|
parsed = ParsedFile(path=file_path, dialect=dialect)
|
|
@@ -1287,7 +1311,11 @@ class Indexer:
|
|
|
1287
1311
|
def_path = Path(definer_fp)
|
|
1288
1312
|
def_sql = def_path.read_text(encoding="utf-8")
|
|
1289
1313
|
def_parsed = self._index_single_file(
|
|
1290
|
-
parser,
|
|
1314
|
+
parser,
|
|
1315
|
+
def_path,
|
|
1316
|
+
def_sql,
|
|
1317
|
+
timeout_per_file,
|
|
1318
|
+
rel_path=_rel_posix(def_path),
|
|
1291
1319
|
)
|
|
1292
1320
|
# Harvest only — register for cross_file_sources but do NOT upsert
|
|
1293
1321
|
aggregator.register_pass1(def_parsed)
|
|
@@ -1315,7 +1343,7 @@ class Indexer:
|
|
|
1315
1343
|
)
|
|
1316
1344
|
continue
|
|
1317
1345
|
try:
|
|
1318
|
-
cl_parsed = parser.parse_file(cl_path, cl_sql)
|
|
1346
|
+
cl_parsed = parser.parse_file(cl_path, cl_sql, rel_path=_rel_posix(cl_path))
|
|
1319
1347
|
except Exception as exc:
|
|
1320
1348
|
logger.warning("resync_changed: parse failed for closure file %s: %s", cl_path, exc)
|
|
1321
1349
|
cl_parsed = ParsedFile(path=cl_path, dialect=dialect)
|
|
@@ -1325,7 +1353,19 @@ class Indexer:
|
|
|
1325
1353
|
# ---- Step 7: Batched bulk upsert (same _flush_batch path as index_repo) ----
|
|
1326
1354
|
all_results = pass1_results + closure_results
|
|
1327
1355
|
|
|
1328
|
-
#
|
|
1356
|
+
# #170: key-normalisation choke point — apply schema_aliases + empty-identity
|
|
1357
|
+
# guard to EVERY parse result BEFORE the defined_table_registry is built and
|
|
1358
|
+
# before _upsert_file_batch. index_repo (line ~797) and reindex_file
|
|
1359
|
+
# (line ~1421) both do this; resync_changed previously did not, so a
|
|
1360
|
+
# branch-switch incremental reindex left staging-alias schemas (e.g. ba_tmp)
|
|
1361
|
+
# un-normalised, producing phantom *_tmp.* nodes that a from-scratch index
|
|
1362
|
+
# never creates. O(edges) per file, once per resync — outside the hot loop.
|
|
1363
|
+
from sqlcg.parsers.base import normalize_keys as _normalize_keys
|
|
1364
|
+
|
|
1365
|
+
for pf in all_results:
|
|
1366
|
+
_normalize_keys(pf, schema_aliases)
|
|
1367
|
+
|
|
1368
|
+
# Build a registry for duplicate DDL detection (post-normalisation full_ids)
|
|
1329
1369
|
defined_table_registry: dict[str, str] = {}
|
|
1330
1370
|
for pf in all_results:
|
|
1331
1371
|
for table in pf.defined_tables:
|
|
@@ -1432,7 +1472,14 @@ class Indexer:
|
|
|
1432
1472
|
# join-column edges until the next full index (plan-review BLOCKER).
|
|
1433
1473
|
self._resolve_join_columns(db)
|
|
1434
1474
|
|
|
1435
|
-
def _index_single_file(
|
|
1475
|
+
def _index_single_file(
|
|
1476
|
+
self,
|
|
1477
|
+
parser,
|
|
1478
|
+
path: Path,
|
|
1479
|
+
sql: str,
|
|
1480
|
+
timeout: int,
|
|
1481
|
+
rel_path: str | None = None,
|
|
1482
|
+
) -> ParsedFile:
|
|
1436
1483
|
"""Parse one file, with optional timeout via subprocess isolation.
|
|
1437
1484
|
|
|
1438
1485
|
T-09-04: Subprocess isolation via multiprocessing.Process + spawn context.
|
|
@@ -1446,12 +1493,17 @@ class Indexer:
|
|
|
1446
1493
|
path: Path to the file
|
|
1447
1494
|
sql: SQL text
|
|
1448
1495
|
timeout: Timeout in seconds (0 = no timeout)
|
|
1496
|
+
rel_path: Repo-relative posix path for CTE/temp namespace keying
|
|
1497
|
+
(#171). Threaded through to parse_file (both the in-process and
|
|
1498
|
+
subprocess branch) so the incremental path produces the same
|
|
1499
|
+
namespace keys as index_repo. Falls back to str(path) inside the
|
|
1500
|
+
parser when None.
|
|
1449
1501
|
|
|
1450
1502
|
Returns:
|
|
1451
1503
|
ParsedFile with parse_failed flag set if timeout occurs
|
|
1452
1504
|
"""
|
|
1453
1505
|
if timeout <= 0:
|
|
1454
|
-
return parser.parse_file(path, sql)
|
|
1506
|
+
return parser.parse_file(path, sql, rel_path=rel_path)
|
|
1455
1507
|
|
|
1456
1508
|
ctx = mp.get_context("spawn") # avoid fork-inherit pitfalls (KuzuDB connection FD etc.)
|
|
1457
1509
|
# Unbounded queue: the child writes one large ParsedFile (192–552 KB pickled).
|
|
@@ -1462,7 +1514,7 @@ class Indexer:
|
|
|
1462
1514
|
q: mp.Queue = ctx.Queue()
|
|
1463
1515
|
proc = ctx.Process(
|
|
1464
1516
|
target=_subprocess_parse_worker,
|
|
1465
|
-
args=(parser.__class__, parser.DIALECT, path, sql, q),
|
|
1517
|
+
args=(parser.__class__, parser.DIALECT, path, sql, q, rel_path),
|
|
1466
1518
|
daemon=True,
|
|
1467
1519
|
)
|
|
1468
1520
|
proc.start()
|
sqlcg/parsers/bigquery_parser.py
CHANGED
|
@@ -35,12 +35,16 @@ class BigQueryParser(AnsiParser):
|
|
|
35
35
|
"""
|
|
36
36
|
super().__init__(schema_resolver, schema_aliases=schema_aliases)
|
|
37
37
|
|
|
38
|
-
def parse_file(self, path: Path, sql: str) -> ParsedFile:
|
|
38
|
+
def parse_file(self, path: Path, sql: str, rel_path: str | None = None) -> ParsedFile:
|
|
39
39
|
"""Parse BigQuery SQL file with scripting block detection.
|
|
40
40
|
|
|
41
41
|
Args:
|
|
42
42
|
path: Path to the source file
|
|
43
43
|
sql: SQL text to parse
|
|
44
|
+
rel_path: Repo-relative posix path for CTE/temp namespace keying.
|
|
45
|
+
Accepted for signature parity with the other parsers (the pool and
|
|
46
|
+
resync paths always pass it); scripting-fallback BigQuery files do
|
|
47
|
+
not register CTE/temp nodes, so it is currently unused here.
|
|
44
48
|
|
|
45
49
|
Returns:
|
|
46
50
|
ParsedFile with parsed statements and metadata
|
|
File without changes
|
|
File without changes
|