sql-code-graph 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -1,4 +1,4 @@
1
- sqlcg/__init__.py,sha256=2lT2oiKX19arg1oTOFf13dXA3qyyQNpRevdvKHZIOp4,115
1
+ sqlcg/__init__.py,sha256=rhzbVCLAOlBWTlliY-J8bh3jG3Hn7-5PPLDJwujXW_g,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
@@ -8,7 +8,7 @@ sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
8
8
  sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
9
9
  sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
10
10
  sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
11
- sqlcg/cli/commands/index.py,sha256=6f-kaoY5roY4DDvEOi_HrDnBG9Jrqy0_A47gsxZsNUQ,7421
11
+ sqlcg/cli/commands/index.py,sha256=b6ns4_1ZVnHE5GeIb2N8YogjUvhjkzn_F9HrwCqrt_Y,8253
12
12
  sqlcg/cli/commands/install.py,sha256=mNVXdGlQ4JtCaaibuzU-inf519T97mC-Nj9K-G2gMQY,7525
13
13
  sqlcg/cli/commands/mcp.py,sha256=H1j6b5Tqr5VXja2GafgD5sJD6hZ5rsgfPwIikK1PZqc,1903
14
14
  sqlcg/cli/commands/reindex.py,sha256=iZXxYGI2m2wxkvIA1mB9uvOEp66QaT5zF5TGd0OpqlU,6275
@@ -29,8 +29,8 @@ sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
29
29
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
30
30
  sqlcg/indexer/error_classify.py,sha256=eWmc9WdOFe9kY_DMgKL0vv9gfcKnFw8e8U7cpUUw9wU,5139
31
31
  sqlcg/indexer/git_delta.py,sha256=V7WiNgiYPRo97K_mB3ymkJDZGoFExqwTZ2ut0Nqua5o,4383
32
- sqlcg/indexer/indexer.py,sha256=Jes0SybIDXLWQlWbRrDAbxVfJ7OsdS3PDAVSoRcv3Tc,50605
33
- sqlcg/indexer/pool.py,sha256=Q9DQmgUsSeKL1S-gNAzMbCNPGI9WsG6Nmt_noh_O8M8,16069
32
+ sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
33
+ sqlcg/indexer/pool.py,sha256=n8u_z2IjW-rX1m0wlJ9-N-jxQby_Y4J9blMEPYaf19Q,18360
34
34
  sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
35
35
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
36
36
  sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
@@ -40,7 +40,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
40
40
  sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
41
41
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
42
42
  sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
43
- sqlcg/parsers/base.py,sha256=aw-gueAMdt551peUY0g7lWbswQLPWx0FDCK4RDfUjDE,43205
43
+ sqlcg/parsers/base.py,sha256=nkhl2jVBFRPKHtr2PKfYy6vTdW64v7KKUnfMwVG2ZMU,43941
44
44
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
45
45
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
46
46
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
@@ -57,7 +57,7 @@ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
57
57
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
58
58
  sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
59
59
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
60
- sql_code_graph-1.0.0.dist-info/METADATA,sha256=HQdFHBzEKTlPlqnwRCT9n0iKrmWqkmM5mhM3fOi5lvo,12806
61
- sql_code_graph-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
- sql_code_graph-1.0.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
- sql_code_graph-1.0.0.dist-info/RECORD,,
60
+ sql_code_graph-1.0.1.dist-info/METADATA,sha256=vFhNG1uWAym_RQ21vDWG0tlogTOe2DDjCmrJp8X1txg,12806
61
+ sql_code_graph-1.0.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
+ sql_code_graph-1.0.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
+ sql_code_graph-1.0.1.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.0.0"
3
+ __version__ = "1.0.1"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -84,6 +84,35 @@ def index_cmd( # noqa: B008
84
84
  db_path = get_db_path()
85
85
  db_path.parent.mkdir(parents=True, exist_ok=True)
86
86
 
87
+ try:
88
+ _run_index(
89
+ path=path,
90
+ dialect=dialect,
91
+ dbt_manifest=dbt_manifest,
92
+ timeout_per_file=timeout_per_file,
93
+ no_ddl=no_ddl,
94
+ quiet=quiet,
95
+ batch_size=batch_size,
96
+ profile=profile,
97
+ )
98
+ except KeyboardInterrupt:
99
+ # The backend context manager (inside _run_index) has already closed the
100
+ # KuzuDB connection and released the lock by the time we get here.
101
+ console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
102
+ raise typer.Exit(130) from None
103
+
104
+
105
+ def _run_index(
106
+ *,
107
+ path: Path,
108
+ dialect: str | None,
109
+ dbt_manifest: Path | None,
110
+ timeout_per_file: int,
111
+ no_ddl: bool,
112
+ quiet: bool,
113
+ batch_size: int,
114
+ profile: bool,
115
+ ) -> None:
87
116
  with get_backend() as backend:
88
117
  backend.init_schema()
89
118
 
sqlcg/indexer/indexer.py CHANGED
@@ -251,9 +251,11 @@ class Indexer:
251
251
  _t_pass2_end = time.perf_counter()
252
252
 
253
253
  except KeyboardInterrupt:
254
- logger.info("SIGINT received flushing pass-1 progress")
255
- # pass1_results may be partial; upsert what we have
256
- self._upsert_all(pass1_results, db)
254
+ # Kill workers and abort immediately. A partial pass-1-only result is
255
+ # an incomplete graph (no cross-file resolution, no star expansion);
256
+ # writing it would leave a misleading half-index. Re-run `sqlcg index`
257
+ # to index — re-indexing is the migration path.
258
+ logger.warning("Interrupted — workers killed; no partial graph written.")
257
259
  raise
258
260
 
259
261
  # Assemble final pass-2 results: start from pass-1, overlay pass-2 where available
@@ -1104,16 +1106,6 @@ class Indexer:
1104
1106
 
1105
1107
  return counts
1106
1108
 
1107
- def _upsert_all(self, results: list[ParsedFile], db: GraphBackend) -> None:
1108
- """Upsert all parsed files.
1109
-
1110
- Args:
1111
- results: List of ParsedFile objects
1112
- db: GraphBackend instance
1113
- """
1114
- for parsed in results:
1115
- self._upsert_parsed_file(parsed, db)
1116
-
1117
1109
  def _expand_star_sources(self, db: GraphBackend) -> int:
1118
1110
  """Run the post-ingestion star expansion query.
1119
1111
 
sqlcg/indexer/pool.py CHANGED
@@ -194,7 +194,14 @@ class HardKillPool:
194
194
  ) -> None:
195
195
  self._dialect = dialect
196
196
  self._schema_aliases: dict[str, str] = schema_aliases or {}
197
- self._n = n_workers or os.cpu_count() or 4
197
+ # Leave 2 logical cores of headroom rather than spawning one worker per
198
+ # logical core. Parsing is CPU-bound, and the main process also does work
199
+ # between passes (closure resolution, batched upserts); saturating every
200
+ # core makes the largest files miss the per-file wall-clock timeout.
201
+ # Measured on the 1,453-file DWH corpus (after the once-per-statement parser
202
+ # fixes): cpu_count → 2 timeouts / 186s; cpu_count-2 → 0 timeouts / 131s
203
+ # (fewer timeouts AND faster, since timed-out files waste work + respawn churn).
204
+ self._n = n_workers or max(1, (os.cpu_count() or 4) - 2)
198
205
  self._ctx = mp.get_context("spawn")
199
206
  self._workers: list[_WorkerState] = []
200
207
 
@@ -322,6 +329,28 @@ class HardKillPool:
322
329
  w.task_start = time.monotonic()
323
330
  busy.add(slot)
324
331
 
332
+ try:
333
+ return self._run_map_loop(
334
+ tasks, results, busy, kill_counts, _assign, per_task_timeout, on_result, n_tasks
335
+ )
336
+ except KeyboardInterrupt:
337
+ # Workers ignore SIGINT and are CPU-bound, so they will not notice a
338
+ # graceful SHUTDOWN sentinel until their current parse finishes. On
339
+ # interrupt the user wants the process gone now — hard-kill outright.
340
+ self.terminate()
341
+ raise
342
+
343
+ def _run_map_loop(
344
+ self,
345
+ tasks: list[dict],
346
+ results: list[ParsedFile | None],
347
+ busy: set[int],
348
+ kill_counts: dict[str, int],
349
+ _assign: Callable[[int], None],
350
+ per_task_timeout: float,
351
+ on_result: Callable[[], None] | None,
352
+ n_tasks: int,
353
+ ) -> list[ParsedFile | None]:
325
354
  # Initial dispatch: fill all worker slots
326
355
  for i in range(min(self._n, n_tasks)):
327
356
  _assign(i)
@@ -405,6 +434,31 @@ class HardKillPool:
405
434
  # Shutdown
406
435
  # ------------------------------------------------------------------
407
436
 
437
+ def terminate(self) -> None:
438
+ """Immediately SIGKILL every worker without a graceful handshake.
439
+
440
+ Unlike :meth:`shutdown`, this sends no ``_SHUTDOWN`` sentinel and does
441
+ not wait for in-flight parses. Workers ignore SIGINT and are CPU-bound,
442
+ so a graceful stop would block on the longest running parse; on
443
+ interrupt we kill outright so the process dies promptly.
444
+ """
445
+ for w in self._workers:
446
+ try:
447
+ w.conn.close()
448
+ except Exception:
449
+ pass
450
+ try:
451
+ if w.process.is_alive():
452
+ w.process.kill()
453
+ except Exception:
454
+ pass
455
+ for w in self._workers:
456
+ try:
457
+ w.process.join(timeout=1)
458
+ except Exception:
459
+ pass
460
+ self._workers.clear()
461
+
408
462
  def shutdown(self) -> None:
409
463
  """Gracefully stop all workers, then force-kill any that linger."""
410
464
  for w in self._workers:
sqlcg/parsers/base.py CHANGED
@@ -619,10 +619,6 @@ class SqlParser(ABC):
619
619
  else:
620
620
  return LineageExtraction(edges=edges, star_sources=star_sources)
621
621
 
622
- # Build scope once from the body for all-column reuse (T-05 optimization)
623
- # Defer scope building to just before the column loop to ensure sources
624
- # are expanded first (avoid rebuilding for each column, but only build
625
- # after sources are known)
626
622
  body_scope = None
627
623
  combined_sources = {**(sources or {})}
628
624
 
@@ -644,6 +640,54 @@ class SqlParser(ABC):
644
640
  key = cte_alias.lower()
645
641
  combined_sources[key] = cte.this
646
642
 
643
+ # Build body_scope ONCE per statement, before the column loop, and reuse
644
+ # it for every column (CLAUDE.md invariant: "body_scope built once per
645
+ # statement"). If schema-qualify fails, retry schema-free so we STILL get
646
+ # a scope for the copy=False fast path; only if both fail do we fall back
647
+ # to the per-column sources= path. Building this lazily inside the loop
648
+ # (regressed in 4234e5d) meant a single qualify failure re-ran
649
+ # expand+qualify+build_scope for EVERY column → O(N_cols) full-body
650
+ # deepcopies per statement (measured: 229 qualify calls on one 460-line file).
651
+ if scope is None:
652
+ expanded_body = body
653
+ expand_sources = {
654
+ k: v for k, v in (sources or {}).items() if isinstance(v, exp.Query)
655
+ }
656
+ if expand_sources:
657
+ try:
658
+ expanded_body = exp.expand(
659
+ body,
660
+ expand_sources, # type: ignore
661
+ dialect=self.DIALECT,
662
+ copy=True,
663
+ )
664
+ except Exception:
665
+ expanded_body = body
666
+ try:
667
+ qualified_body = qualify(
668
+ expanded_body,
669
+ dialect=self.DIALECT,
670
+ schema=schema,
671
+ validate_qualify_columns=False,
672
+ identify=False,
673
+ )
674
+ body_scope = build_scope(qualified_body)
675
+ except Exception as _qualify_exc:
676
+ out.errors.append(
677
+ f"col_lineage_skip:qualify_failed:{type(_qualify_exc).__name__}"
678
+ )
679
+ # Schema-free retry: still yields a scope for the copy=False path.
680
+ try:
681
+ qualified_body = qualify(
682
+ expanded_body,
683
+ dialect=self.DIALECT,
684
+ validate_qualify_columns=False,
685
+ identify=False,
686
+ )
687
+ body_scope = build_scope(qualified_body)
688
+ except Exception:
689
+ body_scope = None
690
+
647
691
  # Extract output columns
648
692
  for col_expr in col_expressions:
649
693
  # Skip star projections — sg_lineage requires a concrete column name.
@@ -723,53 +767,23 @@ class SqlParser(ABC):
723
767
  continue
724
768
 
725
769
  try:
726
- # Build scope on first column for reuse across all columns (T-05 optimization).
727
- # NOTE: We build body_scope locally from the extracted body rather than
728
- # using a pre-built scope from the statement, because CREATE/INSERT statements
729
- # have their scope rooted at the outer statement, but the body passed here
730
- # is the inner SELECT. Reusing the outer scope would produce incorrect
731
- # qualification. The pre-built scope from parse_file would only be useful
732
- # if we had a mechanism to extract the matching inner scope, which is
733
- # complex and not yet implemented (see sprint_06 T-05 deviation for details).
734
- if body_scope is None and scope is None:
735
- try:
736
- # Expand only file-level sources (CTEs, temp tables, CTAS bodies).
737
- expanded_body = body
738
- expand_sources = {
739
- k: v for k, v in (sources or {}).items() if isinstance(v, exp.Query)
740
- }
741
- if expand_sources:
742
- expanded_body = exp.expand(
743
- body,
744
- expand_sources, # type: ignore
745
- dialect=self.DIALECT,
746
- copy=True,
747
- )
748
-
749
- # Qualify the expanded body to prepare for scope building
750
- qualified_body = qualify(
751
- expanded_body,
752
- dialect=self.DIALECT,
753
- schema=schema,
754
- validate_qualify_columns=False,
755
- identify=False,
756
- )
757
- body_scope = build_scope(qualified_body)
758
- except Exception as _qualify_exc:
759
- # qualify() failure is non-fatal: sg_lineage falls back to
760
- # its own qualification. Record for observability.
761
- out.errors.append(
762
- f"col_lineage_skip:qualify_failed:{type(_qualify_exc).__name__}:{_qualify_exc}"
763
- )
764
- body_scope = None
765
-
766
770
  # When a scope is available it embeds full column→table resolution.
767
771
  # On the qualify-failed fallback path (no scope), pass only the small
768
772
  # set of file-level sources so sg_lineage can resolve CTEs/CTAS bodies.
769
773
  active_scope = scope if scope is not None else body_scope
770
774
  sg_kwargs: dict = {"dialect": self.DIALECT}
771
775
  if active_scope is not None:
776
+ # scope= path: the pre-built scope already embeds full
777
+ # column→table resolution. copy=False + trim_selects=False
778
+ # suppress sqlglot's per-call AST deepcopy and per-column
779
+ # trim — neither is needed when the scope is built once and
780
+ # reused across all columns. Dropping these (regressed in
781
+ # 4234e5d) makes lineage() deepcopy the whole scope per
782
+ # column → O(columns × scope_size) (measured: 3.2M deepcopy
783
+ # calls / ~3.8s on a 359-line file).
772
784
  sg_kwargs["scope"] = active_scope
785
+ sg_kwargs["copy"] = False
786
+ sg_kwargs["trim_selects"] = False
773
787
  else:
774
788
  sg_kwargs["sources"] = sources or {}
775
789
  root = sg_lineage(col_name, body, **sg_kwargs)
@@ -912,6 +926,13 @@ class SqlParser(ABC):
912
926
  # stops sg_lineage at the CTE name boundary (doesn't expand into bodies).
913
927
  if isinstance(stmt, exp.Insert) and isinstance(stmt.this, exp.Schema):
914
928
  insert_cols = [c.name for c in stmt.this.expressions]
929
+ # Build the WITH-stripped body ONCE before the loop and only swap its
930
+ # single projection per column (regressed in 4234e5d, which moved the
931
+ # full-body body.copy() inside the loop → O(N_cols) full-body deepcopies
932
+ # for wide INSERT ... SELECT). Stripping WITH stops sg_lineage at the CTE
933
+ # name boundary.
934
+ body_no_with = body.copy()
935
+ body_no_with.set("with_", None)
915
936
  for idx, col_expr in enumerate(col_expressions):
916
937
  if idx >= len(insert_cols):
917
938
  break
@@ -920,10 +941,8 @@ class SqlParser(ABC):
920
941
  insert_col = insert_cols[idx]
921
942
  if not insert_col:
922
943
  continue
923
- # Build a patched SELECT: strip WITH, alias the expression with the
924
- # INSERT column name so sg_lineage can trace it.
925
- body_no_with = body.copy()
926
- body_no_with.set("with_", None)
944
+ # Patch the shared body with this column's aliased expression so
945
+ # sg_lineage can trace it to the INSERT column name.
927
946
  aliased = exp.Alias(this=col_expr.copy(), alias=insert_col)
928
947
  body_no_with.set("expressions", [aliased])
929
948
  patched_sql = body_no_with.sql(dialect=self.DIALECT)