sql-code-graph 0.3.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {sql_code_graph-0.3.0.dist-info → sql_code_graph-1.0.1.dist-info}/METADATA +87 -9
  2. sql_code_graph-1.0.1.dist-info/RECORD +63 -0
  3. sqlcg/__init__.py +1 -1
  4. sqlcg/cli/commands/analyze.py +24 -0
  5. sqlcg/cli/commands/db.py +40 -7
  6. sqlcg/cli/commands/gain.py +5 -17
  7. sqlcg/cli/commands/git.py +71 -40
  8. sqlcg/cli/commands/index.py +151 -17
  9. sqlcg/cli/commands/install.py +147 -8
  10. sqlcg/cli/commands/mcp.py +12 -0
  11. sqlcg/cli/commands/reindex.py +170 -0
  12. sqlcg/cli/commands/uninstall.py +94 -39
  13. sqlcg/cli/commands/watch.py +14 -1
  14. sqlcg/cli/main.py +8 -0
  15. sqlcg/core/config.py +185 -2
  16. sqlcg/core/graph_db.py +65 -0
  17. sqlcg/core/kuzu_backend.py +177 -6
  18. sqlcg/core/neo4j_backend.py +38 -0
  19. sqlcg/core/queries.cypher +114 -0
  20. sqlcg/core/queries.py +44 -82
  21. sqlcg/core/schema.cypher +15 -3
  22. sqlcg/core/schema.py +2 -1
  23. sqlcg/indexer/error_classify.py +140 -0
  24. sqlcg/indexer/git_delta.py +121 -0
  25. sqlcg/indexer/indexer.py +951 -132
  26. sqlcg/indexer/pool.py +500 -0
  27. sqlcg/indexer/walker.py +1 -3
  28. sqlcg/indexer/watcher.py +68 -18
  29. sqlcg/lineage/aggregator.py +58 -2
  30. sqlcg/lineage/schema_resolver.py +26 -14
  31. sqlcg/parsers/ansi_parser.py +195 -26
  32. sqlcg/parsers/base.py +627 -58
  33. sqlcg/parsers/bigquery_parser.py +7 -2
  34. sqlcg/parsers/postgres_parser.py +7 -2
  35. sqlcg/parsers/registry.py +7 -2
  36. sqlcg/parsers/snowflake_parser.py +170 -8
  37. sqlcg/parsers/tsql_parser.py +7 -2
  38. sqlcg/server/models.py +297 -4
  39. sqlcg/server/noise_filter.py +167 -0
  40. sqlcg/server/skill.py +256 -0
  41. sqlcg/server/tools.py +934 -178
  42. sql_code_graph-0.3.0.dist-info/RECORD +0 -56
  43. {sql_code_graph-0.3.0.dist-info → sql_code_graph-1.0.1.dist-info}/WHEEL +0 -0
  44. {sql_code_graph-0.3.0.dist-info → sql_code_graph-1.0.1.dist-info}/entry_points.txt +0 -0
@@ -28,6 +28,8 @@ def uninstall_cmd( # noqa: B008
28
28
  Step 1: Remove MCP registration from ~/.claude/settings.json
29
29
  Step 2: Optionally delete the KùzuDB graph database
30
30
  Step 3: Remove git hook sentinel block from .git/hooks/post-checkout
31
+ Step 4: Remove sqlcg skill directory from ~/.claude/skills/sqlcg/ and
32
+ <repo>/.claude/skills/sqlcg/
31
33
  """
32
34
  # Step 1: Remove MCP entry from settings.json
33
35
  _step1_remove_mcp_entry()
@@ -44,6 +46,9 @@ def uninstall_cmd( # noqa: B008
44
46
  repo_path = repo if repo else Path.cwd()
45
47
  _step3_remove_git_hook(repo_path)
46
48
 
49
+ # Step 4: Remove sqlcg skill directory
50
+ _step4_remove_skill(repo_path)
51
+
47
52
 
48
53
  def _step1_remove_mcp_entry() -> None:
49
54
  """Remove the 'sql-code-graph' entry from ~/.claude/settings.json."""
@@ -130,84 +135,134 @@ def _step2_delete_database(force: bool) -> None:
130
135
  console.print(f"[yellow]Warning:[/yellow] Failed to delete metrics store: {e}")
131
136
 
132
137
 
133
- def _step3_remove_git_hook(repo_path: Path) -> None:
134
- """Remove the git hook sentinel block from .git/hooks/post-checkout."""
135
- hook_file = repo_path / ".git" / "hooks" / "post-checkout"
138
+ def _strip_sentinel_block(content: str, sentinel: str) -> str:
139
+ """Strip the block introduced by sentinel from hook file content.
136
140
 
137
- if not hook_file.exists():
138
- console.print(f"[yellow]No git hook found in {repo_path}[/yellow]")
139
- return
141
+ The block starts at the sentinel line and extends until an empty line that is followed
142
+ by non-empty content (end-of-block), or until the end of file.
140
143
 
141
- # Read the file
142
- content = hook_file.read_text()
143
-
144
- # Strip the sentinel block: from "# sqlcg post-checkout hook" to the end of the block
145
- # The block ends when we encounter a line that doesn't start with whitespace/# or is empty
146
- # followed by non-empty content
144
+ Returns the stripped content (may be empty if the sentinel was the only content).
145
+ """
147
146
  lines = content.split("\n")
148
147
  filtered_lines = []
149
148
  skip_mode = False
150
149
 
151
150
  for i, line in enumerate(lines):
152
- if "# sqlcg post-checkout hook" in line:
151
+ if sentinel in line:
153
152
  skip_mode = True
154
153
  continue
155
154
 
156
155
  if skip_mode:
157
- # Skip all lines that are part of the hook block
158
- # The block extends from the sentinel comment until we hit an empty line
159
- # followed by non-hook content, or until the end of file
160
156
  if line.strip() == "":
161
- # Check if there's content after this blank line that's not the hook
162
157
  remaining = "\n".join(lines[i + 1 :]).strip()
163
158
  if remaining:
164
- # There's content after this blank line, so end the skip mode
165
159
  skip_mode = False
166
- filtered_lines.append("") # Preserve the blank line separator
167
- # else: blank line is at end of file, just skip it
168
- # else: continue skipping
160
+ filtered_lines.append("") # Preserve blank-line separator
161
+ # else: trailing blank line skip
162
+ # else: continue skipping body lines
169
163
  continue
170
164
 
171
165
  filtered_lines.append(line)
172
166
 
173
- # Reconstruct the content
174
167
  if filtered_lines:
175
- new_content = "\n".join(filtered_lines).strip() + "\n"
176
- else:
177
- new_content = ""
168
+ return "\n".join(filtered_lines).strip() + "\n"
169
+ return ""
170
+
171
+
172
+ def _remove_single_hook(repo_path: Path, filename: str, sentinel: str) -> None:
173
+ """Strip the sqlcg sentinel block from one git hook file.
174
+
175
+ If the file becomes empty after stripping, delete it.
176
+ If the file does not exist, emit a notice and return.
177
+ """
178
+ hook_file = repo_path / ".git" / "hooks" / filename
179
+
180
+ if not hook_file.exists():
181
+ console.print(f"[yellow]No {filename} hook found in {repo_path}[/yellow]")
182
+ return
183
+
184
+ content = hook_file.read_text()
185
+
186
+ if sentinel not in content:
187
+ # Nothing to strip
188
+ return
189
+
190
+ new_content = _strip_sentinel_block(content, sentinel)
178
191
 
179
192
  if not new_content.strip():
180
- # File became empty, delete it
181
193
  try:
182
194
  hook_file.unlink()
183
- console.print(
184
- f"[green]Removed git hook from {repo_path}/.git/hooks/post-checkout[/green]"
185
- )
195
+ console.print(f"[green]Removed git hook from {repo_path}/.git/hooks/{filename}[/green]")
186
196
  except Exception as e:
187
197
  console.print(f"[yellow]Warning:[/yellow] Failed to delete hook file: {e}")
188
198
  else:
189
- # Write back the filtered content
190
199
  try:
191
200
  hook_file.write_text(new_content)
192
- console.print(
193
- f"[green]Removed git hook from {repo_path}/.git/hooks/post-checkout[/green]"
194
- )
201
+ console.print(f"[green]Removed git hook from {repo_path}/.git/hooks/{filename}[/green]")
195
202
  except Exception as e:
196
203
  console.print(f"[yellow]Warning:[/yellow] Failed to update hook file: {e}")
197
204
 
198
205
 
206
+ # (filename, sentinel) pairs for all sqlcg-managed hooks
207
+ _HOOK_SENTINELS: list[tuple[str, str]] = [
208
+ ("post-checkout", "# sqlcg post-checkout hook"),
209
+ ("post-merge", "# sqlcg post-merge hook"),
210
+ ]
211
+
212
+
213
+ def _step3_remove_git_hook(repo_path: Path) -> None:
214
+ """Remove sqlcg sentinel blocks from all managed git hook files.
215
+
216
+ Strips both post-checkout and post-merge hooks. Deletes a hook file if it
217
+ becomes empty after stripping; preserves foreign content otherwise.
218
+ """
219
+ for filename, sentinel in _HOOK_SENTINELS:
220
+ _remove_single_hook(repo_path, filename, sentinel)
221
+
222
+
199
223
  def _get_db_path() -> str | None:
200
224
  """Get the configured database path from environment or default."""
201
- db_path = os.getenv("SQLCG_DB_PATH")
202
- if db_path:
203
- return db_path
225
+ from sqlcg.core.config import KuzuConfig
204
226
 
205
- # Default path for kuzu
206
- default_path = str(Path.home() / ".sqlcg" / "kuzu.db")
207
- return default_path if Path(default_path).exists() else None
227
+ db_path = str(KuzuConfig.from_env().db_path)
228
+ return db_path if Path(db_path).exists() else None
208
229
 
209
230
 
210
231
  def _is_kuzu_backend(db_path: str) -> bool:
211
232
  """Check if the database is a KùzuDB backend (not Neo4j)."""
212
233
  backend = os.getenv("SQLCG_BACKEND", "kuzu").lower()
213
234
  return backend in ("kuzu", "") # Default to kuzu if unset
235
+
236
+
237
+ # Candidate skill directory locations to remove (global first, then project-relative)
238
+ # Each entry is a callable(repo_path) -> Path resolving to the sqlcg skill dir.
239
+ _SKILL_DIR_TARGETS = [
240
+ lambda repo_path: Path.home() / ".claude" / "skills" / "sqlcg",
241
+ lambda repo_path: repo_path / ".claude" / "skills" / "sqlcg",
242
+ ]
243
+
244
+
245
+ def _step4_remove_skill(repo_path: Path) -> None:
246
+ """Remove the sqlcg-owned skill directory at all candidate locations.
247
+
248
+ Iterates over the global (~/.claude/skills/sqlcg/) and project-relative
249
+ (<repo>/.claude/skills/sqlcg/) directories. For each:
250
+ - If the directory exists, removes it with shutil.rmtree (ignoring errors)
251
+ and prints a green "Removed" notice.
252
+ - If the directory does not exist, prints a yellow "not found" notice.
253
+
254
+ Only the sqlcg/ subdirectory is ever removed — the parent skills/ dir and
255
+ any sibling skill directories are left untouched.
256
+ """
257
+ any_found = False
258
+ for target_fn in _SKILL_DIR_TARGETS:
259
+ skill_dir = target_fn(repo_path)
260
+ if skill_dir.exists():
261
+ any_found = True
262
+ shutil.rmtree(skill_dir, ignore_errors=True)
263
+ console.print(f"[green]Removed skill directory:[/green] {skill_dir}")
264
+ else:
265
+ console.print(f"[yellow]Skill directory not found:[/yellow] {skill_dir}")
266
+
267
+ if not any_found:
268
+ console.print("[yellow]No skill directories found — nothing to remove.[/yellow]")
@@ -29,6 +29,17 @@ def watch_cmd( # noqa: B008
29
29
  with get_backend() as backend:
30
30
  backend.init_schema()
31
31
 
32
+ # Check schema version — must match current build
33
+ from sqlcg.core.schema import SCHEMA_VERSION
34
+
35
+ stored = backend.get_schema_version()
36
+ if stored != SCHEMA_VERSION:
37
+ console.print(
38
+ f"[red]Database schema is v{stored}; this build requires v{SCHEMA_VERSION}. "
39
+ "Run 'sqlcg db reset && sqlcg db init' to re-initialize.[/red]"
40
+ )
41
+ raise typer.Exit(1)
42
+
32
43
  indexer = Indexer()
33
44
 
34
45
  # Initial full index
@@ -37,7 +48,9 @@ def watch_cmd( # noqa: B008
37
48
 
38
49
  spec = load_ignore_spec(path)
39
50
  job_manager = WatchJobManager(indexer, backend, dialect)
40
- handler = SqlFileEventHandler(job_manager, backend, spec, path, indexer=indexer)
51
+ handler = SqlFileEventHandler(
52
+ job_manager, backend, spec, path, indexer=indexer, dialect=dialect
53
+ )
41
54
  observer = Observer()
42
55
  observer.schedule(handler, str(path), recursive=True)
43
56
  observer.start()
sqlcg/cli/main.py CHANGED
@@ -12,6 +12,7 @@ from sqlcg.cli.commands import (
12
12
  index,
13
13
  install,
14
14
  mcp,
15
+ reindex,
15
16
  report,
16
17
  uninstall,
17
18
  watch,
@@ -23,6 +24,12 @@ QUICK START:
23
24
  1. sqlcg db init
24
25
  2. sqlcg index <path> --dialect snowflake
25
26
  3. sqlcg git install-hooks
27
+ 4. sqlcg install --scope project # also provisions a Claude skill (SKILL.md)
28
+
29
+ USING THE MCP TOOLS:
30
+ Read `sqlcg mcp best-practices` first — it explains the fact/heuristic
31
+ boundary so heuristic output (dead-code, risk) is never reported as fact.
32
+ See `sqlcg mcp --help` for all MCP commands.
26
33
 
27
34
  Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.
28
35
  """
@@ -38,6 +45,7 @@ app.add_typer(git.app, name="git")
38
45
 
39
46
  # Register single commands
40
47
  app.command("index")(index.index_cmd)
48
+ app.command("reindex")(reindex.reindex_cmd)
41
49
  app.command("watch")(watch.watch_cmd)
42
50
  app.command("gain")(gain.gain_cmd)
43
51
  app.command("report")(report.report_cmd)
sqlcg/core/config.py CHANGED
@@ -15,6 +15,10 @@ class KuzuConfig(BaseModel):
15
15
  """Configuration for KùzuDB backend."""
16
16
 
17
17
  db_path: Path = Field(default_factory=lambda: Path.home() / ".sqlcg" / "graph.db")
18
+ buffer_pool_size_mb: int = Field(
19
+ default=0,
20
+ description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
21
+ )
18
22
 
19
23
  @classmethod
20
24
  def from_env(cls) -> "KuzuConfig":
@@ -24,7 +28,11 @@ class KuzuConfig(BaseModel):
24
28
  KuzuConfig instance with environment-overridden values if present.
25
29
  """
26
30
  env_path = os.getenv("SQLCG_DB_PATH")
27
- return cls(db_path=Path(env_path)) if env_path else cls()
31
+ env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
32
+ return cls(
33
+ db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
34
+ buffer_pool_size_mb=int(env_buf) if env_buf else 0,
35
+ )
28
36
 
29
37
 
30
38
  class Neo4jConfig(BaseModel):
@@ -79,6 +87,178 @@ def get_dialect(path: Path) -> str:
79
87
  return "snowflake"
80
88
 
81
89
 
90
+ def get_schema_aliases(path: Path) -> dict[str, str]:
91
+ """Get schema alias mappings from .sqlcg.toml.
92
+
93
+ Reads [sqlcg.schema_aliases] and returns a lowercased staging-schema →
94
+ canonical-schema dict. Use this when a staging area sits under a different
95
+ schema but the table names are identical, e.g.::
96
+
97
+ [sqlcg.schema_aliases]
98
+ da_tmp = "da"
99
+ ba_tmp = "ba"
100
+
101
+ Any table reference whose schema part matches a key is rewritten to use the
102
+ canonical schema instead, so ``da_tmp.my_table`` is traced as ``da.my_table``.
103
+
104
+ Args:
105
+ path: Root directory to search for .sqlcg.toml
106
+
107
+ Returns:
108
+ Dict mapping staging schema name (lowercase) to its canonical replacement
109
+ """
110
+ config_file = Path(path) / ".sqlcg.toml"
111
+ if config_file.exists():
112
+ try:
113
+ with open(config_file, "rb") as f:
114
+ config = tomllib.load(f)
115
+ raw = config.get("sqlcg", {}).get("schema_aliases", {})
116
+ if isinstance(raw, dict):
117
+ return {k.lower(): v for k, v in raw.items() if isinstance(v, str)}
118
+ except Exception:
119
+ pass
120
+ return {}
121
+
122
+
123
+ def get_noise_filter_patterns(path: Path) -> list[str]:
124
+ """Get backup table ignore patterns from .sqlcg.toml.
125
+
126
+ Reads [sqlcg.noise_filter] -> ignore_table_patterns (a list of glob strings)
127
+ from .sqlcg.toml. Returns the list lowercased. When the key is absent,
128
+ returns a built-in default list::
129
+
130
+ [sqlcg.noise_filter]
131
+ ignore_table_patterns = ["*_bck", "*_bck_us", "*_bck_[0-9]*"]
132
+
133
+ Args:
134
+ path: Root directory to search for .sqlcg.toml
135
+
136
+ Returns:
137
+ List of glob patterns (all lowercased). Defaults to built-in backup patterns.
138
+ """
139
+ default_patterns = [
140
+ "*_bck",
141
+ "*_bck_us",
142
+ "*_bck_[0-9]*",
143
+ "*_backup",
144
+ "*_backup_[0-9]*",
145
+ ]
146
+ config_file = Path(path) / ".sqlcg.toml"
147
+ if config_file.exists():
148
+ try:
149
+ with open(config_file, "rb") as f:
150
+ config = tomllib.load(f)
151
+ raw = config.get("sqlcg", {}).get("noise_filter", {}).get("ignore_table_patterns")
152
+ if isinstance(raw, list):
153
+ return [p.lower() if isinstance(p, str) else p for p in raw]
154
+ except Exception:
155
+ pass
156
+ return default_patterns
157
+
158
+
159
+ def get_ignored_tables(path: Path) -> list[str]:
160
+ """Get explicitly-ignored qualified table names from .sqlcg.toml.
161
+
162
+ Complements ``get_noise_filter_patterns`` (glob patterns) with an exact
163
+ qualified-name list, for specific tables that do not follow a backup naming
164
+ convention but should still be dropped from tool answers — e.g. a
165
+ load-control / delta-bookkeeping table::
166
+
167
+ [sqlcg.noise_filter]
168
+ ignored_tables = ["ma.rtetl_delta", "ctl.load_log"]
169
+
170
+ Names are matched exactly (case-insensitive) against ``schema.table``. The
171
+ lineage engine still records these as real edges; this only lets a user
172
+ declare them noise in config rather than baking the judgment into code.
173
+
174
+ Args:
175
+ path: Root directory to search for .sqlcg.toml
176
+
177
+ Returns:
178
+ List of qualified table names (all lowercased). Defaults to an empty list.
179
+ """
180
+ config_file = Path(path) / ".sqlcg.toml"
181
+ if config_file.exists():
182
+ try:
183
+ with open(config_file, "rb") as f:
184
+ config = tomllib.load(f)
185
+ raw = config.get("sqlcg", {}).get("noise_filter", {}).get("ignored_tables")
186
+ if isinstance(raw, list):
187
+ return [t.lower() for t in raw if isinstance(t, str)]
188
+ except Exception:
189
+ pass
190
+ return []
191
+
192
+
193
+ def get_ignore_table_regexes(path: Path) -> list[str]:
194
+ """Get table-exclusion regexes from .sqlcg.toml.
195
+
196
+ Complements ``get_noise_filter_patterns`` (anchored fnmatch globs) and
197
+ ``get_ignored_tables`` (exact names) with full regular expressions, for
198
+ backup conventions the globs cannot express — e.g. a ``_bck`` marker that
199
+ can appear anywhere in the name, not just as a suffix::
200
+
201
+ [sqlcg.noise_filter]
202
+ ignore_table_regexes = ["_bck", "_tmp_[0-9]{8}"]
203
+
204
+ Each pattern is matched (``re.search``, case-insensitive) against the full
205
+ qualified ``schema.table`` name, so an unanchored ``_bck`` excludes
206
+ ``ba.foo_bck`` and ``da.bar_bck_archive`` alike (the latter is missed by the
207
+ suffix-anchored ``*_bck`` glob). The
208
+ lineage engine still records these as real edges; this only lets a user
209
+ declare them noise in config rather than baking the judgment into code.
210
+
211
+ Args:
212
+ path: Root directory to search for .sqlcg.toml
213
+
214
+ Returns:
215
+ List of regex strings (kept verbatim — not lowercased, so character
216
+ classes survive). Defaults to an empty list.
217
+ """
218
+ config_file = Path(path) / ".sqlcg.toml"
219
+ if config_file.exists():
220
+ try:
221
+ with open(config_file, "rb") as f:
222
+ config = tomllib.load(f)
223
+ raw = config.get("sqlcg", {}).get("noise_filter", {}).get("ignore_table_regexes")
224
+ if isinstance(raw, list):
225
+ return [r for r in raw if isinstance(r, str)]
226
+ except Exception:
227
+ pass
228
+ return []
229
+
230
+
231
+ def get_presentation_prefixes(path: Path) -> list[str]:
232
+ """Get presentation-facing schema prefixes from .sqlcg.toml.
233
+
234
+ Reads [sqlcg.presentation] -> schema_prefixes (a list of strings) from
235
+ .sqlcg.toml. Returns the list lowercased. **Defaults to an empty list** when
236
+ the key is absent — when unset, presentation-facing detection is simply off
237
+ (correct generic behaviour for any user). No schema prefix is hardcoded in
238
+ shipped code; a DWH that wants ``ia_`` flagged must declare it::
239
+
240
+ [sqlcg.presentation]
241
+ schema_prefixes = ["ia_"]
242
+
243
+ Args:
244
+ path: Root directory to search for .sqlcg.toml
245
+
246
+ Returns:
247
+ List of schema prefixes (all lowercased). Defaults to an empty list.
248
+ """
249
+ config_file = Path(path) / ".sqlcg.toml"
250
+ if config_file.exists():
251
+ try:
252
+ with open(config_file, "rb") as f:
253
+ config = tomllib.load(f)
254
+ raw = config.get("sqlcg", {}).get("presentation", {}).get("schema_prefixes")
255
+ if isinstance(raw, list):
256
+ return [p.lower() for p in raw if isinstance(p, str)]
257
+ except Exception:
258
+ pass
259
+ return []
260
+
261
+
82
262
  def get_backend() -> "GraphBackend":
83
263
  """Get a graph backend instance respecting the SQLCG_BACKEND env var.
84
264
 
@@ -94,7 +274,10 @@ def get_backend() -> "GraphBackend":
94
274
  from sqlcg.core.kuzu_backend import KuzuBackend
95
275
 
96
276
  kuzu_cfg = KuzuConfig.from_env()
97
- return KuzuBackend(str(kuzu_cfg.db_path))
277
+ return KuzuBackend(
278
+ str(kuzu_cfg.db_path),
279
+ buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
280
+ )
98
281
  elif backend_type == "neo4j":
99
282
  from sqlcg.core.neo4j_backend import Neo4jBackend
100
283
 
sqlcg/core/graph_db.py CHANGED
@@ -67,6 +67,52 @@ class GraphBackend(ABC):
67
67
  properties: Dict of properties to set/update on the relationship
68
68
  """
69
69
 
70
+ @abstractmethod
71
+ def upsert_nodes_bulk(
72
+ self,
73
+ label: str,
74
+ rows: list[dict[str, Any]],
75
+ ) -> None:
76
+ """Bulk-upsert nodes of one label in a single backend round-trip.
77
+
78
+ Each row dict must contain the primary-key field for `label` (see _pk_field)
79
+ plus any other properties to SET. All rows must share the same property-key
80
+ set; backends MAY raise if rows are heterogeneous (KuzuBackend does).
81
+
82
+ Idempotent MERGE semantics, identical to upsert_node per row.
83
+
84
+ Args:
85
+ label: Node label (e.g., NodeLabel.COLUMN)
86
+ rows: List of property dicts. Empty list is a no-op.
87
+ """
88
+
89
+ @abstractmethod
90
+ def upsert_edges_bulk(
91
+ self,
92
+ src_label: str,
93
+ dst_label: str,
94
+ rel_type: str,
95
+ rows: list[dict[str, Any]],
96
+ ) -> None:
97
+ """Bulk-upsert edges of one (src_label, rel_type, dst_label) triple.
98
+
99
+ Each row dict must contain:
100
+ - "src_key": source primary-key value (matches src_label _pk_field)
101
+ - "dst_key": destination primary-key value (matches dst_label _pk_field)
102
+ - Any additional keys are set as edge properties.
103
+
104
+ Idempotent MERGE semantics, identical to upsert_edge per row. Rows whose
105
+ src or dst node does not exist are silently skipped by KuzuDB's MERGE
106
+ semantics — callers must ensure node upserts happen first within the same
107
+ transaction (see indexer ordering rules in _upsert_parsed_file).
108
+
109
+ Args:
110
+ src_label: Source node label
111
+ dst_label: Destination node label
112
+ rel_type: Relationship type
113
+ rows: List of edge property dicts. Empty list is a no-op.
114
+ """
115
+
70
116
  @abstractmethod
71
117
  def run_read(self, query: str, params: dict[str, Any]) -> list[dict[str, Any]]:
72
118
  """Execute a read-only query and return results.
@@ -112,6 +158,25 @@ class GraphBackend(ABC):
112
158
  The schema version string, or None if not set.
113
159
  """
114
160
 
161
+ @abstractmethod
162
+ def set_indexed_sha(self, sha: str) -> None:
163
+ """Persist the git SHA of the last successful index.
164
+
165
+ Written by index_repo on success and by resync_changed on success.
166
+
167
+ Args:
168
+ sha: Git commit SHA string (e.g. from git rev-parse HEAD).
169
+ """
170
+
171
+ @abstractmethod
172
+ def get_indexed_sha(self) -> str | None:
173
+ """Retrieve the git SHA of the last successful index.
174
+
175
+ Returns:
176
+ The stored SHA string, or None if never set (repo pre-dates this
177
+ feature, or the DB was freshly initialised).
178
+ """
179
+
115
180
  @abstractmethod
116
181
  def close(self) -> None:
117
182
  """Close the database connection."""