sql-code-graph 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. sql_code_graph-0.2.1.dist-info/METADATA +171 -0
  2. sql_code_graph-0.2.1.dist-info/RECORD +55 -0
  3. sql_code_graph-0.2.1.dist-info/WHEEL +4 -0
  4. sql_code_graph-0.2.1.dist-info/entry_points.txt +2 -0
  5. sqlcg/__init__.py +5 -0
  6. sqlcg/__main__.py +6 -0
  7. sqlcg/cli/__init__.py +1 -0
  8. sqlcg/cli/commands/__init__.py +1 -0
  9. sqlcg/cli/commands/analyze.py +93 -0
  10. sqlcg/cli/commands/db.py +83 -0
  11. sqlcg/cli/commands/find.py +63 -0
  12. sqlcg/cli/commands/gain.py +169 -0
  13. sqlcg/cli/commands/git.py +73 -0
  14. sqlcg/cli/commands/index.py +92 -0
  15. sqlcg/cli/commands/install.py +60 -0
  16. sqlcg/cli/commands/mcp.py +54 -0
  17. sqlcg/cli/commands/report.py +135 -0
  18. sqlcg/cli/commands/watch.py +57 -0
  19. sqlcg/cli/main.py +40 -0
  20. sqlcg/core/__init__.py +8 -0
  21. sqlcg/core/config.py +104 -0
  22. sqlcg/core/graph_db.py +179 -0
  23. sqlcg/core/jobs.py +105 -0
  24. sqlcg/core/kuzu_backend.py +269 -0
  25. sqlcg/core/neo4j_backend.py +195 -0
  26. sqlcg/core/queries.py +82 -0
  27. sqlcg/core/schema.cypher +104 -0
  28. sqlcg/core/schema.py +48 -0
  29. sqlcg/indexer/__init__.py +1 -0
  30. sqlcg/indexer/dbt_adapter.py +23 -0
  31. sqlcg/indexer/indexer.py +317 -0
  32. sqlcg/indexer/walker.py +55 -0
  33. sqlcg/indexer/watcher.py +195 -0
  34. sqlcg/lineage/__init__.py +1 -0
  35. sqlcg/lineage/aggregator.py +58 -0
  36. sqlcg/lineage/schema_resolver.py +198 -0
  37. sqlcg/metrics/__init__.py +5 -0
  38. sqlcg/metrics/store.py +273 -0
  39. sqlcg/parsers/__init__.py +30 -0
  40. sqlcg/parsers/ansi_parser.py +215 -0
  41. sqlcg/parsers/base.py +414 -0
  42. sqlcg/parsers/bigquery_parser.py +77 -0
  43. sqlcg/parsers/postgres_parser.py +27 -0
  44. sqlcg/parsers/registry.py +46 -0
  45. sqlcg/parsers/snowflake_parser.py +148 -0
  46. sqlcg/parsers/tsql_parser.py +27 -0
  47. sqlcg/server/__init__.py +1 -0
  48. sqlcg/server/exceptions.py +20 -0
  49. sqlcg/server/models.py +83 -0
  50. sqlcg/server/server.py +57 -0
  51. sqlcg/server/tools.py +663 -0
  52. sqlcg/utils/__init__.py +6 -0
  53. sqlcg/utils/hashing.py +18 -0
  54. sqlcg/utils/ignore.py +36 -0
  55. sqlcg/utils/logging.py +29 -0
@@ -0,0 +1,198 @@
1
+ """SchemaResolver for managing table schema information and resolving table/view references.
2
+
3
+ Thread-safety: a Lock guards all cache mutations. The lock is re-entrant only
4
+ within a single thread. Do not share a SchemaResolver instance across concurrent
5
+ jobs — construct one per re-index job instead (see jobs.py).
6
+ """
7
+
8
+ import copy
9
+ import threading
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from sqlcg.utils.logging import getLogger
14
+
15
+ logger = getLogger(__name__)
16
+
17
+
18
+ class SchemaResolver:
19
+ """Manages table schema information for SQL parsing and column lineage.
20
+
21
+ Attributes:
22
+ dialect: SQL dialect (e.g., "snowflake", "bigquery", None for ANSI)
23
+ _tables: Internal dict of (catalog, db, table) -> [col_names]
24
+ _view_bodies: Mapping of view names to ParsedFile objects
25
+ _lock: threading.Lock protecting mutations and cache
26
+ _cache: Manual cache dict for as_dict() results
27
+ """
28
+
29
+ def __init__(self, dialect: str | None = None):
30
+ """Initialize SchemaResolver.
31
+
32
+ Args:
33
+ dialect: Optional SQL dialect for normalization
34
+ """
35
+ self.dialect = dialect
36
+ self._tables: dict[tuple[str | None, str | None, str], list[str]] = {}
37
+ self._view_bodies: dict[str, Any] = {} # str -> ParsedFile
38
+ self._lock = threading.Lock()
39
+ self._cache: dict | None = None
40
+
41
+ def add_create_table(self, ast: Any) -> None:
42
+ """Parse a CREATE TABLE AST node and register the table schema.
43
+
44
+ Args:
45
+ ast: sqlglot AST node (exp.Create)
46
+ """
47
+ import sqlglot.expressions as exp
48
+
49
+ if not isinstance(ast, exp.Create):
50
+ return
51
+
52
+ # Extract table name (catalog, db, table)
53
+ table_expr = ast.this
54
+ if not table_expr:
55
+ return
56
+
57
+ # If this is a Schema node, extract the table from it
58
+ if isinstance(table_expr, exp.Schema):
59
+ actual_table = table_expr.this
60
+ else:
61
+ actual_table = table_expr
62
+
63
+ # Parse the table reference
64
+ catalog, db, table_name = self._extract_table_parts(actual_table)
65
+ if not table_name:
66
+ return
67
+
68
+ # Extract column names from the CREATE statement
69
+ # Walk the AST to find all ColumnDef nodes
70
+ col_names = []
71
+ for node in ast.walk():
72
+ if isinstance(node, exp.ColumnDef):
73
+ col_names.append(node.name)
74
+
75
+ with self._lock:
76
+ self._tables[(catalog, db, table_name)] = col_names
77
+ self._cache = None # Invalidate cache
78
+
79
+ def add_view_sources(self, sources: dict[str, Any]) -> None:
80
+ """Register view-to-source-table mapping.
81
+
82
+ Args:
83
+ sources: Mapping of view names to ParsedFile objects
84
+ """
85
+ with self._lock:
86
+ self._view_bodies.update(sources)
87
+ self._cache = None # Invalidate cache
88
+
89
+ def add_dbt_manifest(self, manifest_path: str | Path) -> None:
90
+ """Load and register schemas from a dbt manifest.
91
+
92
+ Args:
93
+ manifest_path: Path to dbt manifest.json
94
+ """
95
+ try:
96
+ import json
97
+ from pathlib import Path
98
+
99
+ manifest_path = Path(manifest_path)
100
+ if not manifest_path.exists():
101
+ logger.warning("dbt manifest not found: %s", manifest_path)
102
+ return
103
+
104
+ with open(manifest_path, encoding="utf-8") as f:
105
+ manifest = json.load(f)
106
+
107
+ # Extract table schemas from manifest nodes
108
+ nodes = manifest.get("nodes", {})
109
+ with self._lock:
110
+ for _node_id, node_data in nodes.items():
111
+ if node_data.get("resource_type") not in ("table", "view"):
112
+ continue
113
+
114
+ # Parse dbt node metadata
115
+ table_name = node_data.get("name", "")
116
+ database = node_data.get("database", "")
117
+ schema = node_data.get("schema", "")
118
+
119
+ if not table_name:
120
+ continue
121
+
122
+ # Extract column names from columns metadata
123
+ col_names = list(node_data.get("columns", {}).keys())
124
+
125
+ # Store in internal dict
126
+ key = (database if database else None, schema if schema else None, table_name)
127
+ self._tables[key] = col_names
128
+
129
+ self._cache = None # Invalidate cache
130
+
131
+ except (FileNotFoundError, json.JSONDecodeError, KeyError) as exc:
132
+ logger.warning("Failed to load dbt manifest %s: %s", manifest_path, exc)
133
+
134
+ def add_information_schema(self, csv_path: str | Path) -> None:
135
+ """Load table schemas from an information_schema CSV.
136
+
137
+ Args:
138
+ csv_path: Path to CSV file
139
+
140
+ Raises:
141
+ NotImplementedError: This feature is deferred to v2.
142
+ """
143
+ raise NotImplementedError("--schema-from-info-schema is not yet implemented (v2)")
144
+
145
+ def as_dict(self) -> dict:
146
+ """Return the schema as a nested dict: {catalog: {db: {table: [cols]}}}.
147
+
148
+ Returns:
149
+ A deep copy of the cached schema dict. Mutations by the caller
150
+ do not affect the internal cache.
151
+ """
152
+ with self._lock:
153
+ if self._cache is None:
154
+ self._cache = self._build_dict()
155
+ return copy.deepcopy(self._cache)
156
+
157
+ def _build_dict(self) -> dict:
158
+ """Build the nested schema dictionary (called only under self._lock).
159
+
160
+ Returns:
161
+ Nested dictionary structure
162
+ """
163
+ out: dict = {}
164
+ for (cat, db, name), cols in self._tables.items():
165
+ cur = out
166
+ for k in [cat, db]:
167
+ if k:
168
+ cur = cur.setdefault(k, {})
169
+ cur[name] = cols
170
+ return out
171
+
172
+ @staticmethod
173
+ def _extract_table_parts(table_expr: Any) -> tuple[str | None, str | None, str]:
174
+ """Extract catalog, db, and table name from a table expression.
175
+
176
+ Args:
177
+ table_expr: sqlglot table expression
178
+
179
+ Returns:
180
+ Tuple of (catalog, db, table_name)
181
+ """
182
+ import sqlglot.expressions as exp
183
+
184
+ match table_expr:
185
+ case exp.Table():
186
+ # table.name is the table identifier
187
+ # table.db is the schema (if present)
188
+ return (
189
+ table_expr.catalog,
190
+ table_expr.db,
191
+ table_expr.name,
192
+ )
193
+ case exp.Identifier():
194
+ return (None, None, table_expr.name)
195
+ case _:
196
+ # Try to extract name from expression
197
+ table_name = table_expr.name if hasattr(table_expr, "name") else ""
198
+ return (None, None, table_name)
@@ -0,0 +1,5 @@
1
+ """Metrics collection and reporting for SQL Code Graph."""
2
+
3
+ from sqlcg.metrics.store import MetricsStore
4
+
5
+ __all__ = ["MetricsStore"]
sqlcg/metrics/store.py ADDED
@@ -0,0 +1,273 @@
1
+ """SQLite-based metrics storage for SQL Code Graph.
2
+
3
+ Importable without KùzuDB. All writes are wrapped in try/except with
4
+ WARNING-level logging on failure. Opt-out via SQLCG_METRICS=0.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ import sqlite3
10
+ from datetime import UTC, datetime
11
+ from pathlib import Path
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class MetricsStore:
17
+ """SQLite metrics store with append-only design.
18
+
19
+ All write operations are best-effort: failures are logged as WARNING
20
+ and do not raise. This ensures that metrics collection never crashes
21
+ the application.
22
+
23
+ Use as a context manager to ensure proper cleanup.
24
+ """
25
+
26
+ def __init__(self, db_path: Path | str) -> None:
27
+ """Initialize metrics store.
28
+
29
+ Args:
30
+ db_path: Path to SQLite database file.
31
+ """
32
+ self.db_path = Path(db_path)
33
+ self._conn: sqlite3.Connection | None = None
34
+
35
+ # Check if metrics are disabled
36
+ if os.environ.get("SQLCG_METRICS", "").strip().lower() == "0":
37
+ self._enabled = False
38
+ else:
39
+ self._enabled = True
40
+ # Ensure parent directory exists
41
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
42
+ self._conn = sqlite3.connect(
43
+ str(self.db_path),
44
+ check_same_thread=True,
45
+ timeout=5.0,
46
+ )
47
+ # Enable WAL mode for better concurrency
48
+ try:
49
+ self._conn.execute("PRAGMA journal_mode=WAL")
50
+ except sqlite3.Error:
51
+ pass # Silently ignore pragma failures
52
+
53
+ def __enter__(self) -> "MetricsStore":
54
+ """Context manager entry."""
55
+ return self
56
+
57
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
58
+ """Context manager exit with cleanup."""
59
+ self.close()
60
+
61
+ def close(self) -> None:
62
+ """Close the database connection."""
63
+ if self._conn is not None:
64
+ try:
65
+ self._conn.close()
66
+ except sqlite3.Error:
67
+ pass # Silently ignore close errors
68
+ finally:
69
+ self._conn = None
70
+
71
+ def init_schema(self) -> None:
72
+ """Initialize database schema (idempotent).
73
+
74
+ Creates three tables if they don't exist:
75
+ - tool_calls: MCP tool invocations with timing
76
+ - index_runs: Indexer.index_repo() invocations with parse stats
77
+ - feedback: User feedback labels (TP/FP) for queries
78
+ """
79
+ if not self._enabled or self._conn is None:
80
+ return
81
+
82
+ try:
83
+ self._conn.execute(
84
+ """
85
+ CREATE TABLE IF NOT EXISTS tool_calls (
86
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
87
+ timestamp TEXT NOT NULL,
88
+ tool_name TEXT NOT NULL,
89
+ duration_ms REAL NOT NULL,
90
+ success INTEGER NOT NULL
91
+ )
92
+ """
93
+ )
94
+ self._conn.execute(
95
+ """
96
+ CREATE TABLE IF NOT EXISTS index_runs (
97
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
98
+ timestamp TEXT NOT NULL,
99
+ repo_path TEXT NOT NULL,
100
+ files_parsed INTEGER NOT NULL,
101
+ parse_errors INTEGER NOT NULL,
102
+ tables_found INTEGER NOT NULL,
103
+ lineage_edges INTEGER NOT NULL,
104
+ duration_ms REAL NOT NULL
105
+ )
106
+ """
107
+ )
108
+ self._conn.execute(
109
+ """
110
+ CREATE TABLE IF NOT EXISTS feedback (
111
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
112
+ timestamp TEXT NOT NULL,
113
+ tool_name TEXT NOT NULL,
114
+ query TEXT NOT NULL,
115
+ label TEXT NOT NULL,
116
+ note TEXT
117
+ )
118
+ """
119
+ )
120
+ self._conn.commit()
121
+ except sqlite3.Error as e:
122
+ logger.warning(f"Failed to initialize metrics schema: {e}")
123
+
124
+ def record_tool_call(
125
+ self,
126
+ tool_name: str,
127
+ duration_ms: float,
128
+ success: bool = True,
129
+ ) -> None:
130
+ """Record a tool call with timing information.
131
+
132
+ Args:
133
+ tool_name: Name of the MCP tool.
134
+ duration_ms: Execution time in milliseconds.
135
+ success: Whether the call succeeded.
136
+ """
137
+ if not self._enabled or self._conn is None:
138
+ return
139
+
140
+ try:
141
+ timestamp = datetime.now(UTC).isoformat()
142
+ self._conn.execute(
143
+ """
144
+ INSERT INTO tool_calls (timestamp, tool_name, duration_ms, success)
145
+ VALUES (?, ?, ?, ?)
146
+ """,
147
+ (timestamp, tool_name, duration_ms, int(success)),
148
+ )
149
+ self._conn.commit()
150
+ except sqlite3.Error as e:
151
+ logger.warning(f"Failed to record tool call: {e}")
152
+
153
+ def record_index_run(
154
+ self,
155
+ repo_path: str,
156
+ files_parsed: int,
157
+ parse_errors: int,
158
+ tables_found: int,
159
+ lineage_edges: int,
160
+ duration_ms: float,
161
+ ) -> None:
162
+ """Record an index run with parse statistics.
163
+
164
+ Args:
165
+ repo_path: Path to the indexed repository.
166
+ files_parsed: Number of SQL files parsed.
167
+ parse_errors: Number of files with parse errors.
168
+ tables_found: Number of table/view nodes created.
169
+ lineage_edges: Number of lineage edges created.
170
+ duration_ms: Total duration in milliseconds.
171
+ """
172
+ if not self._enabled or self._conn is None:
173
+ return
174
+
175
+ try:
176
+ timestamp = datetime.now(UTC).isoformat()
177
+ self._conn.execute(
178
+ """
179
+ INSERT INTO index_runs
180
+ (timestamp, repo_path, files_parsed, parse_errors, tables_found,
181
+ lineage_edges, duration_ms)
182
+ VALUES (?, ?, ?, ?, ?, ?, ?)
183
+ """,
184
+ (
185
+ timestamp,
186
+ repo_path,
187
+ files_parsed,
188
+ parse_errors,
189
+ tables_found,
190
+ lineage_edges,
191
+ duration_ms,
192
+ ),
193
+ )
194
+ self._conn.commit()
195
+ except sqlite3.Error as e:
196
+ logger.warning(f"Failed to record index run: {e}")
197
+
198
+ def record_feedback(
199
+ self,
200
+ tool_name: str,
201
+ query: str,
202
+ label: str,
203
+ note: str = "",
204
+ ) -> None:
205
+ """Record feedback on a tool result.
206
+
207
+ Args:
208
+ tool_name: Name of the tool being evaluated.
209
+ query: The query or pattern that was evaluated.
210
+ label: Feedback label: "TP" or "FP".
211
+ note: Optional user note (truncated to 500 chars).
212
+ """
213
+ if not self._enabled or self._conn is None:
214
+ return
215
+
216
+ try:
217
+ timestamp = datetime.now(UTC).isoformat()
218
+ # Truncate note to 500 characters
219
+ truncated_note = note[:500] if note else None
220
+ self._conn.execute(
221
+ """
222
+ INSERT INTO feedback (timestamp, tool_name, query, label, note)
223
+ VALUES (?, ?, ?, ?, ?)
224
+ """,
225
+ (timestamp, tool_name, query, label, truncated_note),
226
+ )
227
+ self._conn.commit()
228
+ except sqlite3.Error as e:
229
+ logger.warning(f"Failed to record feedback: {e}")
230
+
231
+ def execute_query(self, query: str, params: tuple | None = None) -> list[tuple]:
232
+ """Execute a read-only query.
233
+
234
+ Args:
235
+ query: SQL SELECT query.
236
+ params: Optional tuple of query parameters for placeholders.
237
+
238
+ Returns:
239
+ List of result tuples, or empty list on error.
240
+ """
241
+ if not self._enabled or self._conn is None:
242
+ return []
243
+
244
+ try:
245
+ if params:
246
+ cursor = self._conn.execute(query, params)
247
+ else:
248
+ cursor = self._conn.execute(query)
249
+ return cursor.fetchall()
250
+ except sqlite3.Error as e:
251
+ logger.warning(f"Failed to execute query: {e}")
252
+ return []
253
+
254
+ def table_exists(self, table_name: str) -> bool:
255
+ """Check if a table exists in the database.
256
+
257
+ Args:
258
+ table_name: Name of the table.
259
+
260
+ Returns:
261
+ True if table exists, False otherwise.
262
+ """
263
+ if not self._enabled or self._conn is None:
264
+ return False
265
+
266
+ try:
267
+ cursor = self._conn.execute(
268
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?",
269
+ (table_name,),
270
+ )
271
+ return cursor.fetchone() is not None
272
+ except sqlite3.Error:
273
+ return False
@@ -0,0 +1,30 @@
1
+ """Parser module for SQL lineage extraction."""
2
+
3
+ # Import all dialect parsers to register them
4
+ from sqlcg.parsers.ansi_parser import AnsiParser # noqa: F401
5
+ from sqlcg.parsers.base import (
6
+ ColumnRef,
7
+ LineageEdge,
8
+ ParsedFile,
9
+ QueryNode,
10
+ SqlParser,
11
+ TableRef,
12
+ )
13
+ from sqlcg.parsers.bigquery_parser import BigQueryParser # noqa: F401
14
+ from sqlcg.parsers.postgres_parser import PostgresParser # noqa: F401
15
+ from sqlcg.parsers.snowflake_parser import SnowflakeParser # noqa: F401
16
+ from sqlcg.parsers.tsql_parser import TsqlParser # noqa: F401
17
+
18
+ __all__ = [
19
+ "TableRef",
20
+ "ColumnRef",
21
+ "LineageEdge",
22
+ "QueryNode",
23
+ "ParsedFile",
24
+ "SqlParser",
25
+ "AnsiParser",
26
+ "BigQueryParser",
27
+ "PostgresParser",
28
+ "SnowflakeParser",
29
+ "TsqlParser",
30
+ ]