sql-code-graph 1.1.3__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/cli/commands/db.py CHANGED
@@ -10,6 +10,7 @@ from rich.console import Console
10
10
  from sqlcg.core.config import get_backend, get_db_path
11
11
  from sqlcg.core.freshness import compute_freshness, render_freshness_line
12
12
  from sqlcg.core.schema import NodeLabel
13
+ from sqlcg.server.read_client import run_read_routed
13
14
  from sqlcg.utils.logging import getLogger
14
15
 
15
16
  logger = getLogger(__name__)
@@ -75,107 +76,112 @@ def db_reset( # noqa: B008
75
76
  @app.command("info")
76
77
  def db_info() -> None:
77
78
  """Show database stats."""
78
- with get_backend(read_only=True) as backend:
79
- version = backend.get_schema_version() or "unknown"
80
- console.print(f"Schema version: {version}")
81
-
82
- # Freshness block only shown when the DB has been indexed from a git repo
79
+ # db info is a read-only command. All Cypher reads route through the live
80
+ # server (run_read_routed) to avoid "Database is locked" while the MCP server
81
+ # holds the write lock. get_schema_version / get_indexed_sha are inlined as
82
+ # run_read_routed calls using their known Cypher so they too route through the
83
+ # socket when a server is live; this avoids a direct-open that would hit the lock.
84
+
85
+ # Schema version
86
+ schema_rows = run_read_routed("MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {})
87
+ version = (schema_rows[0]["version"] if schema_rows else None) or "unknown"
88
+ console.print(f"Schema version: {version}")
89
+
90
+ # Freshness block — only shown when the DB has been indexed from a git repo
91
+ try:
92
+ sha_rows = run_read_routed(
93
+ "MATCH (v:SchemaVersion) RETURN v.indexed_sha AS sha LIMIT 1", {}
94
+ )
95
+ indexed_sha = sha_rows[0]["sha"] if sha_rows else None
96
+ repo_rows = run_read_routed("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
97
+ if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
98
+ repo_root = Path(repo_rows[0]["path"])
99
+ f = compute_freshness(repo_root, indexed_sha)
100
+ console.print(render_freshness_line(f))
101
+ except NotImplementedError:
102
+ # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
103
+ pass
104
+ except Exception as e:
105
+ # Any unexpected error in the freshness block must not crash db info
106
+ logger.debug(f"Freshness check skipped: {e}")
107
+
108
+ # Show node counts for all labels
109
+ for label in NodeLabel:
83
110
  try:
84
- indexed_sha = backend.get_indexed_sha()
85
- repo_rows = backend.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
86
- if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
87
- repo_root = Path(repo_rows[0]["path"])
88
- f = compute_freshness(repo_root, indexed_sha)
89
- console.print(render_freshness_line(f))
90
- except NotImplementedError:
91
- # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
92
- pass
111
+ result = run_read_routed(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
112
+ count = result[0]["count"] if result else 0
113
+ console.print(f" {label}: {count}")
93
114
  except Exception as e:
94
- # Any unexpected error in the freshness block must not crash db info
95
- logger.debug(f"Freshness check skipped: {e}")
96
-
97
- # Show node counts for all labels
98
- for label in NodeLabel:
99
- try:
100
- result = backend.run_read(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
101
- count = result[0]["count"] if result else 0
102
- console.print(f" {label}: {count}")
103
- except Exception as e:
104
- # Log unexpected exceptions instead of silently skipping
105
- logger.error(f"Error getting count for {label}: {e}")
106
- console.print(f" [red]{label}: error[/red]")
107
-
108
- # Health check section
109
- repo_count_result = backend.run_read("MATCH (n:Repo) RETURN COUNT(n) AS count", {})
110
- repo_count = repo_count_result[0]["count"] if repo_count_result else 0
111
-
112
- if repo_count == 0:
113
- console.print( # noqa: E501
114
- "[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
115
+ # Log unexpected exceptions instead of silently skipping
116
+ logger.error(f"Error getting count for {label}: {e}")
117
+ console.print(f" [red]{label}: error[/red]")
118
+
119
+ # Health check section
120
+ repo_count_result = run_read_routed("MATCH (n:Repo) RETURN COUNT(n) AS count", {})
121
+ repo_count = repo_count_result[0]["count"] if repo_count_result else 0
122
+
123
+ if repo_count == 0:
124
+ console.print( # noqa: E501
125
+ "[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
126
+ )
127
+ else:
128
+ query_count_result = run_read_routed("MATCH (n:SqlQuery) RETURN COUNT(n) AS count", {})
129
+ query_count = query_count_result[0]["count"] if query_count_result else 0
130
+
131
+ if query_count == 0:
132
+ console.print(
133
+ "[yellow]No queries indexed. Run 'sqlcg index <path>' to populate "
134
+ "the graph.[/yellow]"
115
135
  )
116
136
  else:
117
- query_count_result = backend.run_read("MATCH (n:SqlQuery) RETURN COUNT(n) AS count", {})
118
- query_count = query_count_result[0]["count"] if query_count_result else 0
137
+ col_count_result = run_read_routed("MATCH (n:SqlColumn) RETURN COUNT(n) AS count", {})
138
+ col_count = col_count_result[0]["count"] if col_count_result else 0
119
139
 
120
- if query_count == 0:
140
+ if col_count == 0:
121
141
  console.print(
122
- "[yellow]No queries indexed. Run 'sqlcg index <path>' to populate "
123
- "the graph.[/yellow]"
142
+ "[yellow]Column lineage not available. Tools trace_column_lineage, "
143
+ "get_downstream_dependencies, and get_upstream_dependencies "
144
+ "will return empty results.[/yellow]"
124
145
  )
125
- else:
126
- col_count_result = backend.run_read(
127
- "MATCH (n:SqlColumn) RETURN COUNT(n) AS count", {}
128
- )
129
- col_count = col_count_result[0]["count"] if col_count_result else 0
130
-
131
- if col_count == 0:
132
- console.print(
133
- "[yellow]Column lineage not available. Tools trace_column_lineage, "
134
- "get_downstream_dependencies, and get_upstream_dependencies "
135
- "will return empty results.[/yellow]"
136
- )
137
-
138
- # Print COLUMN_LINEAGE edges count
139
- edges_result = backend.run_read(
140
- "MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {}
141
- )
142
- edges_count = edges_result[0]["count"] if edges_result else 0
143
- console.print(f" COLUMN_LINEAGE edges: {edges_count}")
144
146
 
145
- # Print star resolution metrics (T-07)
146
- from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
147
+ # Print COLUMN_LINEAGE edges count
148
+ edges_result = run_read_routed("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
149
+ edges_count = edges_result[0]["count"] if edges_result else 0
150
+ console.print(f" COLUMN_LINEAGE edges: {edges_count}")
147
151
 
148
- star_source_result = backend.run_read(COUNT_STAR_SOURCES_QUERY, {})
149
- star_source_count = star_source_result[0]["n"] if star_source_result else 0
150
- console.print(f" STAR_SOURCE edges: {star_source_count}")
152
+ # Print star resolution metrics (T-07)
153
+ from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
151
154
 
152
- star_expansion_result = backend.run_read(COUNT_STAR_EXPANSIONS_QUERY, {})
153
- star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
154
- console.print(f" STAR_EXPANSION lineage edges: {star_expansion_count}")
155
+ star_source_result = run_read_routed(COUNT_STAR_SOURCES_QUERY, {})
156
+ star_source_count = star_source_result[0]["n"] if star_source_result else 0
157
+ console.print(f" STAR_SOURCE edges: {star_source_count}")
155
158
 
156
- # Print parsing mode distribution
157
- mode_query = (
158
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC"
159
- )
160
- mode_rows = backend.run_read(mode_query, {})
161
- if mode_rows and "mode" in mode_rows[0]:
162
- console.print("\n Parsing mode distribution:")
163
- for row in mode_rows:
164
- console.print(f" {row['mode']}: {row['cnt']}")
159
+ star_expansion_result = run_read_routed(COUNT_STAR_EXPANSIONS_QUERY, {})
160
+ star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
161
+ console.print(f" STAR_EXPANSION lineage edges: {star_expansion_count}")
162
+
163
+ # Print parsing mode distribution
164
+ mode_query = (
165
+ "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC"
166
+ )
167
+ mode_rows = run_read_routed(mode_query, {})
168
+ if mode_rows and "mode" in mode_rows[0]:
169
+ console.print("\n Parsing mode distribution:")
170
+ for row in mode_rows:
171
+ console.print(f" {row['mode']}: {row['cnt']}")
165
172
 
166
173
 
167
174
  @app.command("list-repos")
168
175
  def list_repos() -> None:
169
176
  """List all indexed repositories."""
170
- with get_backend(read_only=True) as backend:
171
- result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
177
+ result = run_read_routed("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
172
178
 
173
- if not result:
174
- console.print("[yellow]No repositories indexed[/yellow]")
175
- else:
176
- from rich.table import Table
179
+ if not result:
180
+ console.print("[yellow]No repositories indexed[/yellow]")
181
+ else:
182
+ from rich.table import Table
177
183
 
178
- table = Table("Path", "Name")
179
- for row in result:
180
- table.add_row(str(row.get("path", "")), str(row.get("name", "")))
181
- console.print(table)
184
+ table = Table("Path", "Name")
185
+ for row in result:
186
+ table.add_row(str(row.get("path", "")), str(row.get("name", "")))
187
+ console.print(table)
@@ -4,8 +4,8 @@ import typer
4
4
  from rich.console import Console
5
5
  from rich.table import Table
6
6
 
7
- from sqlcg.core.config import get_backend
8
7
  from sqlcg.core.schema import NodeLabel
8
+ from sqlcg.server.read_client import run_read_routed
9
9
 
10
10
  app = typer.Typer(help="Search the graph")
11
11
  console = Console()
@@ -18,21 +18,20 @@ def find_table( # noqa: B008
18
18
  ) -> None:
19
19
  """Find a table by name."""
20
20
  name = name.lower() # graph keys are lowercased at index time (C2 normalization)
21
- with get_backend(read_only=True) as backend:
22
- results = backend.run_read(
23
- f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
24
- "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
25
- {"name": name},
26
- )
27
- if not raw:
28
- from sqlcg.server.noise_filter import NoiseFilter
21
+ results = run_read_routed(
22
+ f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
23
+ "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
24
+ {"name": name},
25
+ )
26
+ if not raw:
27
+ from sqlcg.server.noise_filter import NoiseFilter
29
28
 
30
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
31
- ids = [r["qualified"] for r in results]
32
- kept, _ = nf.filter_nodes(ids)
33
- kept_set = set(kept)
34
- results = [r for r in results if r["qualified"] in kept_set]
35
- _print_table(results, ["qualified", "kind"])
29
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
30
+ ids = [r["qualified"] for r in results]
31
+ kept, _ = nf.filter_nodes(ids)
32
+ kept_set = set(kept)
33
+ results = [r for r in results if r["qualified"] in kept_set]
34
+ _print_table(results, ["qualified", "kind"])
36
35
 
37
36
 
38
37
  @app.command("column")
@@ -42,18 +41,17 @@ def find_column( # noqa: B008
42
41
  ) -> None:
43
42
  """Find a column by table.column reference."""
44
43
  ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
45
- with get_backend(read_only=True) as backend:
46
- results = backend.run_read(
47
- f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
48
- {"ref": ref},
49
- )
50
- if not raw:
51
- from sqlcg.server.noise_filter import NoiseFilter
44
+ results = run_read_routed(
45
+ f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
46
+ {"ref": ref},
47
+ )
48
+ if not raw:
49
+ from sqlcg.server.noise_filter import NoiseFilter
52
50
 
53
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
54
- # Filter on the schema.table portion of each column id (schema.table.column)
55
- results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
56
- _print_table(results, ["id"])
51
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
52
+ # Filter on the schema.table portion of each column id (schema.table.column)
53
+ results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
54
+ _print_table(results, ["id"])
57
55
 
58
56
 
59
57
  @app.command("pattern")
@@ -61,13 +59,12 @@ def find_pattern( # noqa: B008
61
59
  pattern: str = typer.Argument(..., help="SQL pattern to search for"), # noqa: B008
62
60
  ) -> None:
63
61
  """Find queries containing a SQL pattern."""
64
- with get_backend(read_only=True) as backend:
65
- results = backend.run_read(
66
- f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
67
- "RETURN q.id AS id, q.kind AS kind LIMIT 50",
68
- {"pattern": pattern},
69
- )
70
- _print_table(results, ["id", "kind"])
62
+ results = run_read_routed(
63
+ f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
64
+ "RETURN q.id AS id, q.kind AS kind LIMIT 50",
65
+ {"pattern": pattern},
66
+ )
67
+ _print_table(results, ["id", "kind"])
71
68
 
72
69
 
73
70
  def _print_table(rows: list[dict], columns: list[str]) -> None:
@@ -7,8 +7,8 @@ from pathlib import Path
7
7
  import typer
8
8
  from rich.console import Console
9
9
 
10
- from sqlcg.core.config import get_backend
11
10
  from sqlcg.metrics import store as metrics_module
11
+ from sqlcg.server.read_client import run_read_routed
12
12
  from sqlcg.utils.logging import getLogger
13
13
 
14
14
  logger = getLogger(__name__)
@@ -120,19 +120,21 @@ def gain_cmd(
120
120
  )
121
121
  execute_cypher_ratio = execute_cypher_count / total_calls if total_calls > 0 else 0
122
122
 
123
- # Section F: parse quality from graph
123
+ # Section F: parse quality from graph.
124
+ # run_read_routed raises typer.Exit (Exception-derived, NOT SystemExit) on
125
+ # server-busy timeout, so the except-Exception block degrades gracefully
126
+ # (skips the parse-quality section) instead of crashing gain (WARNING 3).
124
127
  parse_quality: dict[str, int] | None = None
125
128
  try:
126
- with get_backend(read_only=True) as backend:
127
- mode_rows = backend.run_read(
128
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
129
- " COUNT(q) AS cnt ORDER BY cnt DESC",
130
- {},
131
- )
132
- if mode_rows and "mode" in mode_rows[0]:
133
- parse_quality = {str(r["mode"]): int(r["cnt"]) for r in mode_rows}
129
+ mode_rows = run_read_routed(
130
+ "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
131
+ " COUNT(q) AS cnt ORDER BY cnt DESC",
132
+ {},
133
+ )
134
+ if mode_rows and "mode" in mode_rows[0]:
135
+ parse_quality = {str(r["mode"]): int(r["cnt"]) for r in mode_rows}
134
136
  except Exception:
135
- pass # graph not available — skip quality section
137
+ pass # graph not available or server busy — skip quality section
136
138
 
137
139
  if json_output:
138
140
  payload: dict = {
sqlcg/core/config.py CHANGED
@@ -350,18 +350,13 @@ def get_backend(read_only: bool = False) -> "GraphBackend":
350
350
  """Get a graph backend instance respecting the SQLCG_BACKEND env var.
351
351
 
352
352
  Args:
353
- read_only: Open in read-only mode. When ``True``, the KùzuDB open
354
- does not take an exclusive write lock, enabling *multiple concurrent
355
- read-only opens* (reader/reader concurrency). CLI read commands
356
- pass ``True`` so they do not hold the exclusive write lock and
357
- therefore do not block other concurrent readers or a pending reindex.
358
- Note: this does NOT allow reads while a read-write writer already
359
- holds the exclusive lock KùzuDB's exclusive write lock is
360
- process-level; a ``read_only=True`` open still fails with
361
- "Database is locked" when a writer is active. Reads during an
362
- active writer remain a known limitation (future work: route reads
363
- through the live MCP server).
364
- Neo4j has no single-writer lock; this flag is a no-op there.
353
+ read_only: Open the database in read-only mode. For KuzuBackend this
354
+ enables multiple concurrent read-only opens (reader/reader
355
+ concurrency), but does NOT allow reads while a read-write writer
356
+ holds the exclusive process lock that requires routing through the
357
+ live MCP server via ``read_client.run_read_routed`` (v1.2.0).
358
+ Ignored for Neo4jBackend (Neo4j has no single-writer process lock;
359
+ the flag is a no-op and the normal connection is opened).
365
360
  All writer call sites (index, reindex, db init/reset, server
366
361
  init_backend) use the default ``False``.
367
362
 
@@ -370,6 +365,13 @@ def get_backend(read_only: bool = False) -> "GraphBackend":
370
365
 
371
366
  Raises:
372
367
  ValueError: If backend type is not recognized
368
+
369
+ Note:
370
+ CLI read commands (find, analyze, db info, gain) route through a live
371
+ MCP server via ``read_client.run_read_routed`` (v1.2.0) when a server
372
+ is live, falling back to ``get_backend(read_only=True)`` when no server
373
+ is present. The fallback path still contends for the process lock under
374
+ an active writer (Windows / no-server fallback only).
373
375
  """
374
376
  backend_type = os.getenv("SQLCG_BACKEND", "kuzu")
375
377
 
@@ -396,7 +398,7 @@ def get_backend(read_only: bool = False) -> "GraphBackend":
396
398
  from sqlcg.core.neo4j_backend import Neo4jBackend
397
399
 
398
400
  neo4j_cfg = Neo4jConfig.from_env()
399
- # Neo4j has no single-writer lock; read_only is a no-op here.
401
+ # read_only is ignored for Neo4j no single-writer process lock.
400
402
  return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
401
403
  else:
402
404
  raise ValueError(f"Unknown backend type: {backend_type}")
sqlcg/indexer/indexer.py CHANGED
@@ -93,8 +93,11 @@ def _flush_row_batch(db: GraphBackend, buf: BatchRowBuffer) -> None:
93
93
  This is the v1.1.1 batch-flush core: called once per batch (not once per file).
94
94
  Dedup keys mirror the graph's MERGE cardinality:
95
95
  - file_rows: path (primary key)
96
- - table_rows: qualified (primary key); prefers row with non-empty
97
- defined_in_file so DEFINED_IN provenance is preserved.
96
+ - table_rows: qualified (primary key); prefers (1) row with non-empty
97
+ defined_in_file so DEFINED_IN provenance is preserved;
98
+ (2) structural kind ('cte','derived','external') over
99
+ default 'table' so CTE aliases keep kind='cte' even when
100
+ also seen as source references with the default kind.
98
101
  - column_rows: id (primary key)
99
102
  - query_rows: id (primary key, globally unique path:index)
100
103
  - edge rows: (src_key, dst_key) only — matches MERGE (src)-[r]->(dst)
@@ -108,12 +111,27 @@ def _flush_row_batch(db: GraphBackend, buf: BatchRowBuffer) -> None:
108
111
  # --- Phase B: batch-scoped dedup ---
109
112
  # For table_rows, prefer defined rows (non-empty defined_in_file) so provenance
110
113
  # is not lost when a shared table is referenced by multiple files.
114
+ # Also prefer structurally-assigned kinds ('cte', 'derived', 'external') over the
115
+ # default 'table' kind: a CTE alias emitted first as a source reference (kind='table'
116
+ # default) and later confirmed as a CTE destination (kind='cte') must keep 'cte' so
117
+ # the kind filter correctly excludes it from default filtered output.
118
+ _structural_kinds = {"cte", "derived", "external"}
111
119
  table_dedup: dict[str, dict] = {}
112
120
  for r in buf.table_rows:
113
121
  key = r["qualified"]
114
122
  existing = table_dedup.get(key)
115
- if existing is None or (not existing.get("defined_in_file") and r.get("defined_in_file")):
123
+ if existing is None:
116
124
  table_dedup[key] = r
125
+ else:
126
+ # Rule 1: prefer rows with defined_in_file (DDL provenance)
127
+ if not existing.get("defined_in_file") and r.get("defined_in_file"):
128
+ table_dedup[key] = r
129
+ # Rule 2: prefer structural kind over default 'table'
130
+ elif (
131
+ existing.get("kind", "table") not in _structural_kinds
132
+ and r.get("kind", "table") in _structural_kinds
133
+ ):
134
+ table_dedup[key] = r
117
135
  table_rows = list(table_dedup.values())
118
136
 
119
137
  column_rows = list({r["id"]: r for r in buf.column_rows}.values())
@@ -432,7 +450,14 @@ class Indexer:
432
450
  Note: sqlcg watch's reindex_file uses a separate code path with
433
451
  its own short per-file transaction. PERF-BATCH only affects index_repo.
434
452
  """
435
- self._upsert_file_batch(batch, db, defined_table_registry, nonlocal_counts)
453
+ self._upsert_file_batch(
454
+ batch,
455
+ db,
456
+ defined_table_registry,
457
+ nonlocal_counts,
458
+ canonical_by_bare=aggregator.canonical_by_bare,
459
+ ambiguous_bare=aggregator._ambiguous_bare,
460
+ )
436
461
 
437
462
  if profile:
438
463
  _t_upsert_start = time.perf_counter()
@@ -975,6 +1000,8 @@ class Indexer:
975
1000
  self,
976
1001
  parsed: ParsedFile,
977
1002
  defined_table_registry: dict[str, str] | None = None,
1003
+ canonical_by_bare: dict[str, str] | None = None,
1004
+ ambiguous_bare: set[str] | None = None,
978
1005
  ) -> FileRowSet:
979
1006
  """Build all row dicts for one ParsedFile — Phase A (pure, no db access).
980
1007
 
@@ -986,6 +1013,13 @@ class Indexer:
986
1013
  Args:
987
1014
  parsed: ParsedFile to build rows for
988
1015
  defined_table_registry: Optional cross-file DDL dedup registry
1016
+ canonical_by_bare: Optional #44 bare-name → canonical full_id index
1017
+ (populated by CrossFileAggregator.register_pass1 from DDL tables).
1018
+ When provided, unqualified INSERT targets whose bare name maps to
1019
+ exactly one DDL canonical are rewritten to use the canonical full_id.
1020
+ When None, the rewrite is skipped (single-file / reindex_file path).
1021
+ ambiguous_bare: Optional set of bare names defined in >1 schema.
1022
+ These are never rewritten — the existing _bare_ref CLI hint handles them.
989
1023
 
990
1024
  Returns:
991
1025
  FileRowSet with all row lists and per-file counts/quality key
@@ -1201,15 +1235,53 @@ class Indexer:
1201
1235
  rows.counts["star_sources"] += 1
1202
1236
 
1203
1237
  # Upsert target table node (if not already a defined_table)
1204
- # so that star expansion can create destination columns
1238
+ # so that star expansion can create destination columns.
1239
+ # #44: when a canonical_by_bare index is available, attempt to resolve
1240
+ # an unqualified / wrong-schema INSERT target to its DDL-canonical full_id
1241
+ # so that INSERT-target nodes share identity with the DDL node.
1205
1242
  if stmt.target and stmt.target.full_id not in defined_table_ids:
1243
+ target_qualified = stmt.target.full_id
1244
+ target_name = stmt.target.name
1245
+ target_db = stmt.target.db or ""
1246
+ target_catalog = stmt.target.catalog or ""
1247
+ target_kind = "table"
1248
+
1249
+ # #44 canonical-name resolution: if bare name maps unambiguously to a
1250
+ # DDL-defined table, use the canonical full_id for the emitted row.
1251
+ # Degrading to no-op when canonical_by_bare is None (single-file path).
1252
+ if canonical_by_bare is not None:
1253
+ bare = (stmt.target.name or "").lower()
1254
+ if bare and bare not in (ambiguous_bare or set()) and bare in canonical_by_bare:
1255
+ # Resolve to the sole DDL canonical — keeps kind='table'
1256
+ canonical_id = canonical_by_bare[bare]
1257
+ target_qualified = canonical_id
1258
+ # Derive name/db/catalog from the canonical full_id parts.
1259
+ # full_id format: "db.name" or "catalog.db.name" or "name"
1260
+ parts = canonical_id.split(".")
1261
+ if len(parts) >= 3:
1262
+ target_name = parts[-1]
1263
+ target_db = parts[-2]
1264
+ target_catalog = parts[-3]
1265
+ elif len(parts) == 2:
1266
+ target_name = parts[-1]
1267
+ target_db = parts[-2]
1268
+ target_catalog = ""
1269
+ else:
1270
+ target_name = canonical_id
1271
+ target_db = ""
1272
+ target_catalog = ""
1273
+ else:
1274
+ # Not resolved to a DDL canonical — mark as derived so the
1275
+ # kind filter excludes it from default (non-raw) output (#45.2)
1276
+ target_kind = "derived"
1277
+
1206
1278
  rows.table_rows.append(
1207
1279
  {
1208
- "qualified": stmt.target.full_id,
1209
- "name": stmt.target.name,
1210
- "catalog": stmt.target.catalog or "",
1211
- "db": stmt.target.db or "",
1212
- "kind": "table",
1280
+ "qualified": target_qualified,
1281
+ "name": target_name,
1282
+ "catalog": target_catalog,
1283
+ "db": target_db,
1284
+ "kind": target_kind,
1213
1285
  "defined_in_file": "",
1214
1286
  }
1215
1287
  )
@@ -1223,6 +1295,8 @@ class Indexer:
1223
1295
  defined_table_registry: dict[str, str],
1224
1296
  nonlocal_counts: dict,
1225
1297
  warning_prefix: str = "",
1298
+ canonical_by_bare: dict[str, str] | None = None,
1299
+ ambiguous_bare: set[str] | None = None,
1226
1300
  ) -> None:
1227
1301
  """Accumulate rows for all files in batch, then flush once in one transaction.
1228
1302
 
@@ -1237,13 +1311,19 @@ class Indexer:
1237
1311
  defined_table_registry: Cross-file DDL dedup registry
1238
1312
  nonlocal_counts: Mutable summary dict updated in place (tables/edges/quality/…)
1239
1313
  warning_prefix: Optional prefix for warning log messages (e.g. "resync_changed: ")
1314
+ canonical_by_bare: Optional #44 bare-name → canonical full_id index.
1315
+ When provided, unqualified INSERT targets are rewritten to their DDL
1316
+ canonical full_id. None on single-file / reindex_file paths.
1317
+ ambiguous_bare: Optional set of bare names defined in >1 schema.
1240
1318
  """
1241
1319
  if not batch:
1242
1320
  return
1243
1321
  buf = BatchRowBuffer()
1244
1322
  for parsed_in_batch in batch:
1245
1323
  try:
1246
- file_rows = self._build_file_rows(parsed_in_batch, defined_table_registry)
1324
+ file_rows = self._build_file_rows(
1325
+ parsed_in_batch, defined_table_registry, canonical_by_bare, ambiguous_bare
1326
+ )
1247
1327
  except Exception as exc:
1248
1328
  logger.warning(
1249
1329
  "%sFailed to build rows for %s: %s — skipping",