sql-code-graph 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.1.dist-info}/METADATA +1 -1
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.1.dist-info}/RECORD +9 -9
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/index.py +29 -0
- sqlcg/indexer/indexer.py +5 -13
- sqlcg/indexer/pool.py +55 -1
- sqlcg/parsers/base.py +67 -48
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.1.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=rhzbVCLAOlBWTlliY-J8bh3jG3Hn7-5PPLDJwujXW_g,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
@@ -8,7 +8,7 @@ sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
|
|
|
8
8
|
sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
|
|
9
9
|
sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=b6ns4_1ZVnHE5GeIb2N8YogjUvhjkzn_F9HrwCqrt_Y,8253
|
|
12
12
|
sqlcg/cli/commands/install.py,sha256=mNVXdGlQ4JtCaaibuzU-inf519T97mC-Nj9K-G2gMQY,7525
|
|
13
13
|
sqlcg/cli/commands/mcp.py,sha256=H1j6b5Tqr5VXja2GafgD5sJD6hZ5rsgfPwIikK1PZqc,1903
|
|
14
14
|
sqlcg/cli/commands/reindex.py,sha256=iZXxYGI2m2wxkvIA1mB9uvOEp66QaT5zF5TGd0OpqlU,6275
|
|
@@ -29,8 +29,8 @@ sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
|
29
29
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
30
30
|
sqlcg/indexer/error_classify.py,sha256=eWmc9WdOFe9kY_DMgKL0vv9gfcKnFw8e8U7cpUUw9wU,5139
|
|
31
31
|
sqlcg/indexer/git_delta.py,sha256=V7WiNgiYPRo97K_mB3ymkJDZGoFExqwTZ2ut0Nqua5o,4383
|
|
32
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
33
|
-
sqlcg/indexer/pool.py,sha256=
|
|
32
|
+
sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
|
|
33
|
+
sqlcg/indexer/pool.py,sha256=n8u_z2IjW-rX1m0wlJ9-N-jxQby_Y4J9blMEPYaf19Q,18360
|
|
34
34
|
sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
|
|
35
35
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
36
36
|
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
@@ -40,7 +40,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
|
40
40
|
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
41
41
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
42
42
|
sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
|
|
43
|
-
sqlcg/parsers/base.py,sha256=
|
|
43
|
+
sqlcg/parsers/base.py,sha256=nkhl2jVBFRPKHtr2PKfYy6vTdW64v7KKUnfMwVG2ZMU,43941
|
|
44
44
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
45
45
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
46
46
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
@@ -57,7 +57,7 @@ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
|
57
57
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
58
58
|
sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
|
|
59
59
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
60
|
-
sql_code_graph-1.0.
|
|
61
|
-
sql_code_graph-1.0.
|
|
62
|
-
sql_code_graph-1.0.
|
|
63
|
-
sql_code_graph-1.0.
|
|
60
|
+
sql_code_graph-1.0.1.dist-info/METADATA,sha256=vFhNG1uWAym_RQ21vDWG0tlogTOe2DDjCmrJp8X1txg,12806
|
|
61
|
+
sql_code_graph-1.0.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
62
|
+
sql_code_graph-1.0.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
63
|
+
sql_code_graph-1.0.1.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/index.py
CHANGED
|
@@ -84,6 +84,35 @@ def index_cmd( # noqa: B008
|
|
|
84
84
|
db_path = get_db_path()
|
|
85
85
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
86
|
|
|
87
|
+
try:
|
|
88
|
+
_run_index(
|
|
89
|
+
path=path,
|
|
90
|
+
dialect=dialect,
|
|
91
|
+
dbt_manifest=dbt_manifest,
|
|
92
|
+
timeout_per_file=timeout_per_file,
|
|
93
|
+
no_ddl=no_ddl,
|
|
94
|
+
quiet=quiet,
|
|
95
|
+
batch_size=batch_size,
|
|
96
|
+
profile=profile,
|
|
97
|
+
)
|
|
98
|
+
except KeyboardInterrupt:
|
|
99
|
+
# The backend context manager (inside _run_index) has already closed the
|
|
100
|
+
# KuzuDB connection and released the lock by the time we get here.
|
|
101
|
+
console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
|
|
102
|
+
raise typer.Exit(130) from None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _run_index(
|
|
106
|
+
*,
|
|
107
|
+
path: Path,
|
|
108
|
+
dialect: str | None,
|
|
109
|
+
dbt_manifest: Path | None,
|
|
110
|
+
timeout_per_file: int,
|
|
111
|
+
no_ddl: bool,
|
|
112
|
+
quiet: bool,
|
|
113
|
+
batch_size: int,
|
|
114
|
+
profile: bool,
|
|
115
|
+
) -> None:
|
|
87
116
|
with get_backend() as backend:
|
|
88
117
|
backend.init_schema()
|
|
89
118
|
|
sqlcg/indexer/indexer.py
CHANGED
|
@@ -251,9 +251,11 @@ class Indexer:
|
|
|
251
251
|
_t_pass2_end = time.perf_counter()
|
|
252
252
|
|
|
253
253
|
except KeyboardInterrupt:
|
|
254
|
-
|
|
255
|
-
#
|
|
256
|
-
|
|
254
|
+
# Kill workers and abort immediately. A partial pass-1-only result is
|
|
255
|
+
# an incomplete graph (no cross-file resolution, no star expansion);
|
|
256
|
+
# writing it would leave a misleading half-index. Re-run `sqlcg index`
|
|
257
|
+
# to index — re-indexing is the migration path.
|
|
258
|
+
logger.warning("Interrupted — workers killed; no partial graph written.")
|
|
257
259
|
raise
|
|
258
260
|
|
|
259
261
|
# Assemble final pass-2 results: start from pass-1, overlay pass-2 where available
|
|
@@ -1104,16 +1106,6 @@ class Indexer:
|
|
|
1104
1106
|
|
|
1105
1107
|
return counts
|
|
1106
1108
|
|
|
1107
|
-
def _upsert_all(self, results: list[ParsedFile], db: GraphBackend) -> None:
|
|
1108
|
-
"""Upsert all parsed files.
|
|
1109
|
-
|
|
1110
|
-
Args:
|
|
1111
|
-
results: List of ParsedFile objects
|
|
1112
|
-
db: GraphBackend instance
|
|
1113
|
-
"""
|
|
1114
|
-
for parsed in results:
|
|
1115
|
-
self._upsert_parsed_file(parsed, db)
|
|
1116
|
-
|
|
1117
1109
|
def _expand_star_sources(self, db: GraphBackend) -> int:
|
|
1118
1110
|
"""Run the post-ingestion star expansion query.
|
|
1119
1111
|
|
sqlcg/indexer/pool.py
CHANGED
|
@@ -194,7 +194,14 @@ class HardKillPool:
|
|
|
194
194
|
) -> None:
|
|
195
195
|
self._dialect = dialect
|
|
196
196
|
self._schema_aliases: dict[str, str] = schema_aliases or {}
|
|
197
|
-
|
|
197
|
+
# Leave 2 logical cores of headroom rather than spawning one worker per
|
|
198
|
+
# logical core. Parsing is CPU-bound, and the main process also does work
|
|
199
|
+
# between passes (closure resolution, batched upserts); saturating every
|
|
200
|
+
# core makes the largest files miss the per-file wall-clock timeout.
|
|
201
|
+
# Measured on the 1,453-file DWH corpus (after the once-per-statement parser
|
|
202
|
+
# fixes): cpu_count → 2 timeouts / 186s; cpu_count-2 → 0 timeouts / 131s
|
|
203
|
+
# (fewer timeouts AND faster, since timed-out files waste work + respawn churn).
|
|
204
|
+
self._n = n_workers or max(1, (os.cpu_count() or 4) - 2)
|
|
198
205
|
self._ctx = mp.get_context("spawn")
|
|
199
206
|
self._workers: list[_WorkerState] = []
|
|
200
207
|
|
|
@@ -322,6 +329,28 @@ class HardKillPool:
|
|
|
322
329
|
w.task_start = time.monotonic()
|
|
323
330
|
busy.add(slot)
|
|
324
331
|
|
|
332
|
+
try:
|
|
333
|
+
return self._run_map_loop(
|
|
334
|
+
tasks, results, busy, kill_counts, _assign, per_task_timeout, on_result, n_tasks
|
|
335
|
+
)
|
|
336
|
+
except KeyboardInterrupt:
|
|
337
|
+
# Workers ignore SIGINT and are CPU-bound, so they will not notice a
|
|
338
|
+
# graceful SHUTDOWN sentinel until their current parse finishes. On
|
|
339
|
+
# interrupt the user wants the process gone now — hard-kill outright.
|
|
340
|
+
self.terminate()
|
|
341
|
+
raise
|
|
342
|
+
|
|
343
|
+
def _run_map_loop(
|
|
344
|
+
self,
|
|
345
|
+
tasks: list[dict],
|
|
346
|
+
results: list[ParsedFile | None],
|
|
347
|
+
busy: set[int],
|
|
348
|
+
kill_counts: dict[str, int],
|
|
349
|
+
_assign: Callable[[int], None],
|
|
350
|
+
per_task_timeout: float,
|
|
351
|
+
on_result: Callable[[], None] | None,
|
|
352
|
+
n_tasks: int,
|
|
353
|
+
) -> list[ParsedFile | None]:
|
|
325
354
|
# Initial dispatch: fill all worker slots
|
|
326
355
|
for i in range(min(self._n, n_tasks)):
|
|
327
356
|
_assign(i)
|
|
@@ -405,6 +434,31 @@ class HardKillPool:
|
|
|
405
434
|
# Shutdown
|
|
406
435
|
# ------------------------------------------------------------------
|
|
407
436
|
|
|
437
|
+
def terminate(self) -> None:
|
|
438
|
+
"""Immediately SIGKILL every worker without a graceful handshake.
|
|
439
|
+
|
|
440
|
+
Unlike :meth:`shutdown`, this sends no ``_SHUTDOWN`` sentinel and does
|
|
441
|
+
not wait for in-flight parses. Workers ignore SIGINT and are CPU-bound,
|
|
442
|
+
so a graceful stop would block on the longest running parse; on
|
|
443
|
+
interrupt we kill outright so the process dies promptly.
|
|
444
|
+
"""
|
|
445
|
+
for w in self._workers:
|
|
446
|
+
try:
|
|
447
|
+
w.conn.close()
|
|
448
|
+
except Exception:
|
|
449
|
+
pass
|
|
450
|
+
try:
|
|
451
|
+
if w.process.is_alive():
|
|
452
|
+
w.process.kill()
|
|
453
|
+
except Exception:
|
|
454
|
+
pass
|
|
455
|
+
for w in self._workers:
|
|
456
|
+
try:
|
|
457
|
+
w.process.join(timeout=1)
|
|
458
|
+
except Exception:
|
|
459
|
+
pass
|
|
460
|
+
self._workers.clear()
|
|
461
|
+
|
|
408
462
|
def shutdown(self) -> None:
|
|
409
463
|
"""Gracefully stop all workers, then force-kill any that linger."""
|
|
410
464
|
for w in self._workers:
|
sqlcg/parsers/base.py
CHANGED
|
@@ -619,10 +619,6 @@ class SqlParser(ABC):
|
|
|
619
619
|
else:
|
|
620
620
|
return LineageExtraction(edges=edges, star_sources=star_sources)
|
|
621
621
|
|
|
622
|
-
# Build scope once from the body for all-column reuse (T-05 optimization)
|
|
623
|
-
# Defer scope building to just before the column loop to ensure sources
|
|
624
|
-
# are expanded first (avoid rebuilding for each column, but only build
|
|
625
|
-
# after sources are known)
|
|
626
622
|
body_scope = None
|
|
627
623
|
combined_sources = {**(sources or {})}
|
|
628
624
|
|
|
@@ -644,6 +640,54 @@ class SqlParser(ABC):
|
|
|
644
640
|
key = cte_alias.lower()
|
|
645
641
|
combined_sources[key] = cte.this
|
|
646
642
|
|
|
643
|
+
# Build body_scope ONCE per statement, before the column loop, and reuse
|
|
644
|
+
# it for every column (CLAUDE.md invariant: "body_scope built once per
|
|
645
|
+
# statement"). If schema-qualify fails, retry schema-free so we STILL get
|
|
646
|
+
# a scope for the copy=False fast path; only if both fail do we fall back
|
|
647
|
+
# to the per-column sources= path. Building this lazily inside the loop
|
|
648
|
+
# (regressed in 4234e5d) meant a single qualify failure re-ran
|
|
649
|
+
# expand+qualify+build_scope for EVERY column → O(N_cols) full-body
|
|
650
|
+
# deepcopies per statement (measured: 229 qualify calls on one 460-line file).
|
|
651
|
+
if scope is None:
|
|
652
|
+
expanded_body = body
|
|
653
|
+
expand_sources = {
|
|
654
|
+
k: v for k, v in (sources or {}).items() if isinstance(v, exp.Query)
|
|
655
|
+
}
|
|
656
|
+
if expand_sources:
|
|
657
|
+
try:
|
|
658
|
+
expanded_body = exp.expand(
|
|
659
|
+
body,
|
|
660
|
+
expand_sources, # type: ignore
|
|
661
|
+
dialect=self.DIALECT,
|
|
662
|
+
copy=True,
|
|
663
|
+
)
|
|
664
|
+
except Exception:
|
|
665
|
+
expanded_body = body
|
|
666
|
+
try:
|
|
667
|
+
qualified_body = qualify(
|
|
668
|
+
expanded_body,
|
|
669
|
+
dialect=self.DIALECT,
|
|
670
|
+
schema=schema,
|
|
671
|
+
validate_qualify_columns=False,
|
|
672
|
+
identify=False,
|
|
673
|
+
)
|
|
674
|
+
body_scope = build_scope(qualified_body)
|
|
675
|
+
except Exception as _qualify_exc:
|
|
676
|
+
out.errors.append(
|
|
677
|
+
f"col_lineage_skip:qualify_failed:{type(_qualify_exc).__name__}"
|
|
678
|
+
)
|
|
679
|
+
# Schema-free retry: still yields a scope for the copy=False path.
|
|
680
|
+
try:
|
|
681
|
+
qualified_body = qualify(
|
|
682
|
+
expanded_body,
|
|
683
|
+
dialect=self.DIALECT,
|
|
684
|
+
validate_qualify_columns=False,
|
|
685
|
+
identify=False,
|
|
686
|
+
)
|
|
687
|
+
body_scope = build_scope(qualified_body)
|
|
688
|
+
except Exception:
|
|
689
|
+
body_scope = None
|
|
690
|
+
|
|
647
691
|
# Extract output columns
|
|
648
692
|
for col_expr in col_expressions:
|
|
649
693
|
# Skip star projections — sg_lineage requires a concrete column name.
|
|
@@ -723,53 +767,23 @@ class SqlParser(ABC):
|
|
|
723
767
|
continue
|
|
724
768
|
|
|
725
769
|
try:
|
|
726
|
-
# Build scope on first column for reuse across all columns (T-05 optimization).
|
|
727
|
-
# NOTE: We build body_scope locally from the extracted body rather than
|
|
728
|
-
# using a pre-built scope from the statement, because CREATE/INSERT statements
|
|
729
|
-
# have their scope rooted at the outer statement, but the body passed here
|
|
730
|
-
# is the inner SELECT. Reusing the outer scope would produce incorrect
|
|
731
|
-
# qualification. The pre-built scope from parse_file would only be useful
|
|
732
|
-
# if we had a mechanism to extract the matching inner scope, which is
|
|
733
|
-
# complex and not yet implemented (see sprint_06 T-05 deviation for details).
|
|
734
|
-
if body_scope is None and scope is None:
|
|
735
|
-
try:
|
|
736
|
-
# Expand only file-level sources (CTEs, temp tables, CTAS bodies).
|
|
737
|
-
expanded_body = body
|
|
738
|
-
expand_sources = {
|
|
739
|
-
k: v for k, v in (sources or {}).items() if isinstance(v, exp.Query)
|
|
740
|
-
}
|
|
741
|
-
if expand_sources:
|
|
742
|
-
expanded_body = exp.expand(
|
|
743
|
-
body,
|
|
744
|
-
expand_sources, # type: ignore
|
|
745
|
-
dialect=self.DIALECT,
|
|
746
|
-
copy=True,
|
|
747
|
-
)
|
|
748
|
-
|
|
749
|
-
# Qualify the expanded body to prepare for scope building
|
|
750
|
-
qualified_body = qualify(
|
|
751
|
-
expanded_body,
|
|
752
|
-
dialect=self.DIALECT,
|
|
753
|
-
schema=schema,
|
|
754
|
-
validate_qualify_columns=False,
|
|
755
|
-
identify=False,
|
|
756
|
-
)
|
|
757
|
-
body_scope = build_scope(qualified_body)
|
|
758
|
-
except Exception as _qualify_exc:
|
|
759
|
-
# qualify() failure is non-fatal: sg_lineage falls back to
|
|
760
|
-
# its own qualification. Record for observability.
|
|
761
|
-
out.errors.append(
|
|
762
|
-
f"col_lineage_skip:qualify_failed:{type(_qualify_exc).__name__}:{_qualify_exc}"
|
|
763
|
-
)
|
|
764
|
-
body_scope = None
|
|
765
|
-
|
|
766
770
|
# When a scope is available it embeds full column→table resolution.
|
|
767
771
|
# On the qualify-failed fallback path (no scope), pass only the small
|
|
768
772
|
# set of file-level sources so sg_lineage can resolve CTEs/CTAS bodies.
|
|
769
773
|
active_scope = scope if scope is not None else body_scope
|
|
770
774
|
sg_kwargs: dict = {"dialect": self.DIALECT}
|
|
771
775
|
if active_scope is not None:
|
|
776
|
+
# scope= path: the pre-built scope already embeds full
|
|
777
|
+
# column→table resolution. copy=False + trim_selects=False
|
|
778
|
+
# suppress sqlglot's per-call AST deepcopy and per-column
|
|
779
|
+
# trim — neither is needed when the scope is built once and
|
|
780
|
+
# reused across all columns. Dropping these (regressed in
|
|
781
|
+
# 4234e5d) makes lineage() deepcopy the whole scope per
|
|
782
|
+
# column → O(columns × scope_size) (measured: 3.2M deepcopy
|
|
783
|
+
# calls / ~3.8s on a 359-line file).
|
|
772
784
|
sg_kwargs["scope"] = active_scope
|
|
785
|
+
sg_kwargs["copy"] = False
|
|
786
|
+
sg_kwargs["trim_selects"] = False
|
|
773
787
|
else:
|
|
774
788
|
sg_kwargs["sources"] = sources or {}
|
|
775
789
|
root = sg_lineage(col_name, body, **sg_kwargs)
|
|
@@ -912,6 +926,13 @@ class SqlParser(ABC):
|
|
|
912
926
|
# stops sg_lineage at the CTE name boundary (doesn't expand into bodies).
|
|
913
927
|
if isinstance(stmt, exp.Insert) and isinstance(stmt.this, exp.Schema):
|
|
914
928
|
insert_cols = [c.name for c in stmt.this.expressions]
|
|
929
|
+
# Build the WITH-stripped body ONCE before the loop and only swap its
|
|
930
|
+
# single projection per column (regressed in 4234e5d, which moved the
|
|
931
|
+
# full-body body.copy() inside the loop → O(N_cols) full-body deepcopies
|
|
932
|
+
# for wide INSERT ... SELECT). Stripping WITH stops sg_lineage at the CTE
|
|
933
|
+
# name boundary.
|
|
934
|
+
body_no_with = body.copy()
|
|
935
|
+
body_no_with.set("with_", None)
|
|
915
936
|
for idx, col_expr in enumerate(col_expressions):
|
|
916
937
|
if idx >= len(insert_cols):
|
|
917
938
|
break
|
|
@@ -920,10 +941,8 @@ class SqlParser(ABC):
|
|
|
920
941
|
insert_col = insert_cols[idx]
|
|
921
942
|
if not insert_col:
|
|
922
943
|
continue
|
|
923
|
-
#
|
|
924
|
-
#
|
|
925
|
-
body_no_with = body.copy()
|
|
926
|
-
body_no_with.set("with_", None)
|
|
944
|
+
# Patch the shared body with this column's aliased expression so
|
|
945
|
+
# sg_lineage can trace it to the INSERT column name.
|
|
927
946
|
aliased = exp.Alias(this=col_expr.copy(), alias=insert_col)
|
|
928
947
|
body_no_with.set("expressions", [aliased])
|
|
929
948
|
patched_sql = body_no_with.sql(dialect=self.DIALECT)
|
|
File without changes
|
|
File without changes
|