sqlprism 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlprism/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """SQLPrism — queryable knowledge graph for SQL codebases."""
sqlprism/cli.py ADDED
@@ -0,0 +1,625 @@
1
+ """CLI entry point for the SQLPrism MCP server.
2
+
3
+ Reads config from a JSON file or command-line arguments,
4
+ initialises the server, and runs it.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ import click
13
+
14
+ from sqlprism.core.mcp_tools import configure, mcp
15
+ from sqlprism.types import parse_repo_config
16
+
17
+ DEFAULT_DB_PATH = Path.home() / ".sqlprism" / "graph.duckdb"
18
+ DEFAULT_CONFIG_PATH = Path.home() / ".sqlprism" / "config.json"
19
+
20
+
21
+ @click.group()
22
+ @click.option(
23
+ "--log-level",
24
+ type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False),
25
+ default="WARNING",
26
+ help="Set logging verbosity",
27
+ )
28
+ @click.pass_context
29
+ def cli(ctx, log_level):
30
+ """SQLPrism — SQL knowledge graph for your codebase."""
31
+ ctx.ensure_object(dict)
32
+ logging.basicConfig(
33
+ level=getattr(logging, log_level.upper()),
34
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
35
+ datefmt="%Y-%m-%dT%H:%M:%S",
36
+ )
37
+
38
+
39
+ @cli.command()
40
+ @click.option(
41
+ "--config",
42
+ "config_path",
43
+ type=click.Path(),
44
+ default=str(DEFAULT_CONFIG_PATH),
45
+ help="Path to config file",
46
+ )
47
+ @click.option(
48
+ "--db",
49
+ "db_path",
50
+ type=click.Path(),
51
+ default=None,
52
+ help="Path to DuckDB file (overrides config)",
53
+ )
54
+ @click.option(
55
+ "--transport",
56
+ type=click.Choice(["stdio", "streamable_http"]),
57
+ default="stdio",
58
+ help="MCP transport mode",
59
+ )
60
+ @click.option("--port", type=int, default=8000, help="Port for HTTP transport")
61
+ def serve(config_path: str, db_path: str | None, transport: str, port: int):
62
+ """Start the MCP server."""
63
+ # Servers need more visibility — override to INFO unless already more verbose
64
+ root_logger = logging.getLogger()
65
+ if root_logger.level > logging.INFO:
66
+ logging.basicConfig(
67
+ level=logging.INFO,
68
+ force=True,
69
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
70
+ datefmt="%Y-%m-%dT%H:%M:%S",
71
+ )
72
+ config = _load_config(config_path)
73
+
74
+ effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
75
+
76
+ # Ensure parent directory exists
77
+ Path(effective_db_path).parent.mkdir(parents=True, exist_ok=True)
78
+
79
+ configure(
80
+ db_path=effective_db_path,
81
+ repos=config.get("repos", {}),
82
+ sql_dialect=config.get("sql_dialect"),
83
+ )
84
+
85
+ if transport == "stdio":
86
+ mcp.run()
87
+ else:
88
+ mcp.run(transport="streamable_http", port=port)
89
+
90
+
91
+ @cli.command()
92
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
93
+ @click.option("--db", "db_path", type=click.Path(), default=None)
94
+ @click.option("--repo", "repo_name", type=str, default=None, help="Reindex a specific repo only")
95
+ def reindex(config_path: str, db_path: str | None, repo_name: str | None):
96
+ """Run a manual reindex from the command line."""
97
+ from sqlprism.core.graph import GraphDB
98
+ from sqlprism.core.indexer import Indexer
99
+
100
+ config = _load_config(config_path)
101
+ effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
102
+
103
+ Path(effective_db_path).parent.mkdir(parents=True, exist_ok=True)
104
+
105
+ graph = GraphDB(effective_db_path)
106
+ indexer = Indexer(graph)
107
+
108
+ # Index SQL repos
109
+ repos = config.get("repos", {})
110
+ if repo_name:
111
+ if repo_name not in repos:
112
+ click.echo(f"Error: repo '{repo_name}' not in config", err=True)
113
+ sys.exit(1)
114
+ repos = {repo_name: repos[repo_name]}
115
+
116
+ all_parse_errors: list[str] = []
117
+
118
+ for name, cfg in repos.items():
119
+ path, dialect, dialect_overrides = parse_repo_config(cfg, config.get("sql_dialect"))
120
+ click.echo(f"Indexing {name} ({path}){f' [{dialect}]' if dialect else ''}...")
121
+ stats = indexer.reindex_repo(
122
+ name,
123
+ path,
124
+ dialect=dialect,
125
+ dialect_overrides=dialect_overrides,
126
+ )
127
+ click.echo(
128
+ f" scanned={stats['files_scanned']}, "
129
+ f"added={stats['files_added']}, "
130
+ f"changed={stats['files_changed']}, "
131
+ f"removed={stats['files_removed']}, "
132
+ f"nodes={stats['nodes_added']}, "
133
+ f"edges={stats['edges_added']}, "
134
+ f"column_usage={stats['column_usage_added']}"
135
+ )
136
+ if stats.get("parse_errors"):
137
+ all_parse_errors.extend(stats["parse_errors"])
138
+
139
+ # Also index sqlmesh repos from config
140
+ sqlmesh_repos = config.get("sqlmesh_repos", {})
141
+ if repo_name:
142
+ if repo_name in sqlmesh_repos:
143
+ sqlmesh_repos = {repo_name: sqlmesh_repos[repo_name]}
144
+ else:
145
+ sqlmesh_repos = {}
146
+
147
+ for name, sm_config in sqlmesh_repos.items():
148
+ if name.startswith("#"):
149
+ continue
150
+ click.echo(f"Indexing sqlmesh project {name} ({sm_config['project_path']})...")
151
+ variables: dict[str, str | int] = sm_config.get("variables", {})
152
+ stats = indexer.reindex_sqlmesh(
153
+ repo_name=name,
154
+ project_path=sm_config["project_path"],
155
+ env_file=sm_config.get("env_file"),
156
+ variables=variables,
157
+ dialect=sm_config.get("dialect", "athena"),
158
+ sqlmesh_command=sm_config.get("sqlmesh_command", "uv run python"),
159
+ )
160
+ click.echo(
161
+ f" models={stats['models_rendered']}, "
162
+ f"nodes={stats['nodes_added']}, "
163
+ f"edges={stats['edges_added']}, "
164
+ f"column_usage={stats['column_usage_added']}"
165
+ )
166
+
167
+ # Also index dbt repos from config
168
+ dbt_repos = config.get("dbt_repos", {})
169
+ if repo_name:
170
+ if repo_name in dbt_repos:
171
+ dbt_repos = {repo_name: dbt_repos[repo_name]}
172
+ else:
173
+ dbt_repos = {}
174
+
175
+ for name, dbt_config in dbt_repos.items():
176
+ if name.startswith("#"):
177
+ continue
178
+ click.echo(f"Indexing dbt project {name} ({dbt_config['project_path']})...")
179
+ stats = indexer.reindex_dbt(
180
+ repo_name=name,
181
+ project_path=dbt_config["project_path"],
182
+ profiles_dir=dbt_config.get("profiles_dir"),
183
+ env_file=dbt_config.get("env_file"),
184
+ target=dbt_config.get("target"),
185
+ dbt_command=dbt_config.get("dbt_command", "uv run dbt"),
186
+ dialect=dbt_config.get("dialect"),
187
+ )
188
+ click.echo(
189
+ f" models={stats['models_compiled']}, "
190
+ f"nodes={stats['nodes_added']}, "
191
+ f"edges={stats['edges_added']}, "
192
+ f"column_usage={stats['column_usage_added']}"
193
+ )
194
+
195
+ graph.close()
196
+
197
+ if all_parse_errors:
198
+ click.echo(f"\n{len(all_parse_errors)} parse error(s):", err=True)
199
+ for err in all_parse_errors:
200
+ click.echo(f" {err}", err=True)
201
+ sys.exit(1)
202
+
203
+ click.echo("Done.")
204
+
205
+
206
+ @cli.command("reindex-sqlmesh")
207
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
208
+ @click.option("--db", "db_path", type=click.Path(), default=None)
209
+ @click.option("--name", "repo_name", type=str, required=True, help="Repo name for the index")
210
+ @click.option(
211
+ "--project",
212
+ "project_path",
213
+ type=click.Path(exists=True),
214
+ required=True,
215
+ help="Path to sqlmesh project dir (containing config.yaml)",
216
+ )
217
+ @click.option(
218
+ "--env-file",
219
+ type=click.Path(exists=True),
220
+ default=None,
221
+ help="Path to .env file for sqlmesh config",
222
+ )
223
+ @click.option("--dialect", type=str, default="athena", help="SQL dialect (default: athena)")
224
+ @click.option(
225
+ "--var",
226
+ "variables",
227
+ type=(str, str),
228
+ multiple=True,
229
+ help="SQLMesh variables as key value pairs, e.g. --var GRACE_PERIOD 7",
230
+ )
231
+ @click.option(
232
+ "--sqlmesh-command",
233
+ type=str,
234
+ default="uv run python",
235
+ help="Command to run python in sqlmesh venv (default: 'uv run python')",
236
+ )
237
+ def reindex_sqlmesh(
238
+ config_path: str,
239
+ db_path: str | None,
240
+ repo_name: str,
241
+ project_path: str,
242
+ env_file: str | None,
243
+ dialect: str,
244
+ variables: tuple[tuple[str, str], ...],
245
+ sqlmesh_command: str,
246
+ ):
247
+ """Index a sqlmesh project by rendering all models."""
248
+ from sqlprism.core.graph import GraphDB
249
+ from sqlprism.core.indexer import Indexer
250
+
251
+ config = _load_config(config_path)
252
+ effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
253
+ Path(effective_db_path).parent.mkdir(parents=True, exist_ok=True)
254
+
255
+ graph = GraphDB(effective_db_path)
256
+ indexer = Indexer(graph)
257
+
258
+ # Convert --var pairs to dict, auto-cast numeric values
259
+ var_dict: dict[str, str | int] = {}
260
+ for k, v in variables:
261
+ try:
262
+ var_dict[k] = int(v)
263
+ except ValueError:
264
+ var_dict[k] = v
265
+
266
+ click.echo(f"Rendering sqlmesh models from {project_path}...")
267
+ stats = indexer.reindex_sqlmesh(
268
+ repo_name=repo_name,
269
+ project_path=project_path,
270
+ env_file=env_file,
271
+ variables=var_dict,
272
+ dialect=dialect,
273
+ sqlmesh_command=sqlmesh_command,
274
+ )
275
+ click.echo(
276
+ f" models={stats['models_rendered']}, "
277
+ f"nodes={stats['nodes_added']}, "
278
+ f"edges={stats['edges_added']}, "
279
+ f"column_usage={stats['column_usage_added']}"
280
+ )
281
+
282
+ graph.close()
283
+ click.echo("Done.")
284
+
285
+
286
+ @cli.command("reindex-dbt")
287
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
288
+ @click.option("--db", "db_path", type=click.Path(), default=None)
289
+ @click.option("--name", "repo_name", type=str, required=True, help="Repo name for the index")
290
+ @click.option(
291
+ "--project",
292
+ "project_path",
293
+ type=click.Path(exists=True),
294
+ required=True,
295
+ help="Path to dbt project dir (containing dbt_project.yml)",
296
+ )
297
+ @click.option(
298
+ "--profiles-dir",
299
+ type=click.Path(exists=True),
300
+ default=None,
301
+ help="Path to directory containing profiles.yml (defaults to project dir)",
302
+ )
303
+ @click.option(
304
+ "--env-file",
305
+ type=click.Path(exists=True),
306
+ default=None,
307
+ help="Path to .env file for dbt connection variables",
308
+ )
309
+ @click.option("--target", type=str, default=None, help="dbt target name")
310
+ @click.option(
311
+ "--dbt-command",
312
+ type=str,
313
+ default="uv run dbt",
314
+ help="Command to invoke dbt (default: 'uv run dbt')",
315
+ )
316
+ @click.option(
317
+ "--dialect",
318
+ type=str,
319
+ default=None,
320
+ help="SQL dialect for parsing (e.g. starrocks, mysql, postgres)",
321
+ )
322
+ def reindex_dbt_cmd(
323
+ config_path: str,
324
+ db_path: str | None,
325
+ repo_name: str,
326
+ project_path: str,
327
+ profiles_dir: str | None,
328
+ env_file: str | None,
329
+ target: str | None,
330
+ dbt_command: str,
331
+ dialect: str | None,
332
+ ):
333
+ """Index a dbt project by compiling all models."""
334
+ from sqlprism.core.graph import GraphDB
335
+ from sqlprism.core.indexer import Indexer
336
+
337
+ config = _load_config(config_path)
338
+ effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
339
+ Path(effective_db_path).parent.mkdir(parents=True, exist_ok=True)
340
+
341
+ graph = GraphDB(effective_db_path)
342
+ indexer = Indexer(graph)
343
+
344
+ click.echo(f"Compiling dbt models from {project_path}...")
345
+ stats = indexer.reindex_dbt(
346
+ repo_name=repo_name,
347
+ project_path=project_path,
348
+ profiles_dir=profiles_dir,
349
+ env_file=env_file,
350
+ target=target,
351
+ dbt_command=dbt_command,
352
+ dialect=dialect,
353
+ )
354
+ click.echo(
355
+ f" models={stats['models_compiled']}, "
356
+ f"nodes={stats['nodes_added']}, "
357
+ f"edges={stats['edges_added']}, "
358
+ f"column_usage={stats['column_usage_added']}"
359
+ )
360
+
361
+ graph.close()
362
+ click.echo("Done.")
363
+
364
+
365
+ @cli.group()
366
+ def query():
367
+ """Query the knowledge graph."""
368
+ pass
369
+
370
+
371
+ def _open_graph(config_path: str, db_path: str | None):
372
+ """Load config, resolve db_path, and return a GraphDB instance."""
373
+ from sqlprism.core.graph import GraphDB
374
+
375
+ config = _load_config(config_path)
376
+ effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
377
+
378
+ if not Path(effective_db_path).exists():
379
+ click.echo("No index found. Run 'sqlprism reindex' first.", err=True)
380
+ sys.exit(1)
381
+
382
+ return GraphDB(effective_db_path)
383
+
384
+
385
+ @query.command("search")
386
+ @click.argument("pattern")
387
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
388
+ @click.option("--db", "db_path", type=click.Path(), default=None)
389
+ @click.option("--kind", type=str, default=None, help="Filter by node kind")
390
+ @click.option("--schema", type=str, default=None, help="Filter by schema")
391
+ @click.option("--repo", type=str, default=None, help="Filter by repo name")
392
+ @click.option("--limit", type=int, default=20, help="Max results (default 20)")
393
+ def query_search(
394
+ config_path: str,
395
+ db_path: str | None,
396
+ pattern: str,
397
+ kind: str | None,
398
+ schema: str | None,
399
+ repo: str | None,
400
+ limit: int,
401
+ ):
402
+ """Search nodes by name pattern."""
403
+ graph = _open_graph(config_path, db_path)
404
+ result = graph.query_search(
405
+ pattern=pattern,
406
+ kind=kind,
407
+ schema=schema,
408
+ repo=repo,
409
+ limit=limit,
410
+ include_snippets=False,
411
+ )
412
+ graph.close()
413
+ click.echo(json.dumps(result, indent=2, default=str))
414
+
415
+
416
+ @query.command("references")
417
+ @click.argument("name")
418
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
419
+ @click.option("--db", "db_path", type=click.Path(), default=None)
420
+ @click.option("--kind", type=str, default=None, help="Filter by node kind")
421
+ @click.option("--schema", type=str, default=None, help="Filter by schema")
422
+ @click.option("--repo", type=str, default=None, help="Filter by repo name")
423
+ @click.option(
424
+ "--direction",
425
+ type=click.Choice(["both", "inbound", "outbound"]),
426
+ default="both",
427
+ help="Edge direction (default both)",
428
+ )
429
+ def query_references(
430
+ config_path: str,
431
+ db_path: str | None,
432
+ name: str,
433
+ kind: str | None,
434
+ schema: str | None,
435
+ repo: str | None,
436
+ direction: str,
437
+ ):
438
+ """Find all references to/from a named entity."""
439
+ graph = _open_graph(config_path, db_path)
440
+ result = graph.query_references(
441
+ name=name,
442
+ kind=kind,
443
+ schema=schema,
444
+ repo=repo,
445
+ direction=direction,
446
+ include_snippets=False,
447
+ )
448
+ graph.close()
449
+ click.echo(json.dumps(result, indent=2, default=str))
450
+
451
+
452
+ @query.command("column-usage")
453
+ @click.argument("table")
454
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
455
+ @click.option("--db", "db_path", type=click.Path(), default=None)
456
+ @click.option("--column", type=str, default=None, help="Filter by column name")
457
+ @click.option("--usage-type", type=str, default=None, help="Filter by usage type")
458
+ @click.option("--repo", type=str, default=None, help="Filter by repo name")
459
+ def query_column_usage(
460
+ config_path: str,
461
+ db_path: str | None,
462
+ table: str,
463
+ column: str | None,
464
+ usage_type: str | None,
465
+ repo: str | None,
466
+ ):
467
+ """Find column usage for a table."""
468
+ graph = _open_graph(config_path, db_path)
469
+ result = graph.query_column_usage(
470
+ table=table,
471
+ column=column,
472
+ usage_type=usage_type,
473
+ repo=repo,
474
+ )
475
+ graph.close()
476
+ click.echo(json.dumps(result, indent=2, default=str))
477
+
478
+
479
+ @query.command("trace")
480
+ @click.argument("name")
481
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
482
+ @click.option("--db", "db_path", type=click.Path(), default=None)
483
+ @click.option("--kind", type=str, default=None, help="Filter by node kind")
484
+ @click.option(
485
+ "--direction",
486
+ type=click.Choice(["downstream", "upstream", "both"]),
487
+ default="downstream",
488
+ help="Trace direction (default downstream)",
489
+ )
490
+ @click.option("--max-depth", type=int, default=3, help="Max traversal depth (default 3)")
491
+ @click.option("--repo", type=str, default=None, help="Filter by repo name")
492
+ def query_trace(
493
+ config_path: str,
494
+ db_path: str | None,
495
+ name: str,
496
+ kind: str | None,
497
+ direction: str,
498
+ max_depth: int,
499
+ repo: str | None,
500
+ ):
501
+ """Trace multi-hop dependency chains from a named entity."""
502
+ graph = _open_graph(config_path, db_path)
503
+ result = graph.query_trace(
504
+ name=name,
505
+ kind=kind,
506
+ direction=direction,
507
+ max_depth=max_depth,
508
+ repo=repo,
509
+ include_snippets=False,
510
+ )
511
+ graph.close()
512
+ click.echo(json.dumps(result, indent=2, default=str))
513
+
514
+
515
+ @query.command("lineage")
516
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
517
+ @click.option("--db", "db_path", type=click.Path(), default=None)
518
+ @click.option("--table", type=str, default=None, help="Filter by hop table name")
519
+ @click.option("--column", type=str, default=None, help="Filter by column name")
520
+ @click.option("--output-node", type=str, default=None, help="Filter by output node name")
521
+ @click.option("--repo", type=str, default=None, help="Filter by repo name")
522
+ def query_lineage(
523
+ config_path: str,
524
+ db_path: str | None,
525
+ table: str | None,
526
+ column: str | None,
527
+ output_node: str | None,
528
+ repo: str | None,
529
+ ):
530
+ """Query column lineage chains."""
531
+ graph = _open_graph(config_path, db_path)
532
+ result = graph.query_column_lineage(
533
+ table=table,
534
+ column=column,
535
+ output_node=output_node,
536
+ repo=repo,
537
+ )
538
+ graph.close()
539
+ click.echo(json.dumps(result, indent=2, default=str))
540
+
541
+
542
+ @cli.command()
543
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
544
+ @click.option("--db", "db_path", type=click.Path(), default=None)
545
+ def status(config_path: str, db_path: str | None):
546
+ """Show current index status."""
547
+ from sqlprism.core.graph import GraphDB
548
+
549
+ config = _load_config(config_path)
550
+ effective_db_path = db_path or config.get("db_path", str(DEFAULT_DB_PATH))
551
+
552
+ if not Path(effective_db_path).exists():
553
+ click.echo("No index found. Run 'sqlprism reindex' first.")
554
+ sys.exit(1)
555
+
556
+ graph = GraphDB(effective_db_path)
557
+ info = graph.get_index_status()
558
+ graph.close()
559
+
560
+ click.echo(json.dumps(info, indent=2, default=str))
561
+
562
+
563
+ @cli.command("init")
564
+ @click.option("--config", "config_path", type=click.Path(), default=str(DEFAULT_CONFIG_PATH))
565
+ def init_config(config_path: str):
566
+ """Create a default config file."""
567
+ config_file = Path(config_path)
568
+ if config_file.exists():
569
+ click.echo(f"Config already exists at {config_file}")
570
+ return
571
+
572
+ config_file.parent.mkdir(parents=True, exist_ok=True)
573
+
574
+ default_config = {
575
+ "db_path": str(DEFAULT_DB_PATH),
576
+ "sql_dialect": None,
577
+ "repos": {
578
+ "my-project": {
579
+ "path": str(Path.cwd()),
580
+ "dialect": None,
581
+ "dialect_overrides": {
582
+ "# athena/": "athena",
583
+ "# starrocks/": "starrocks",
584
+ },
585
+ },
586
+ },
587
+ "sqlmesh_repos": {
588
+ "# my-sqlmesh-project": {
589
+ "project_path": "/path/to/sqlmesh/folder",
590
+ "env_file": "/path/to/.env",
591
+ "dialect": "athena",
592
+ "variables": {"GRACE_PERIOD": 7},
593
+ },
594
+ },
595
+ "dbt_repos": {
596
+ "# my-dbt-project": {
597
+ "project_path": "/path/to/dbt/project",
598
+ "env_file": "/path/to/.env",
599
+ "target": "dev",
600
+ "dialect": "starrocks",
601
+ "dbt_command": "uv run dbt",
602
+ },
603
+ },
604
+ }
605
+
606
+ config_file.write_text(json.dumps(default_config, indent=2))
607
+ click.echo(f"Created config at {config_file}")
608
+ click.echo("Edit it to add your repos, then run: sqlprism reindex")
609
+
610
+
611
+ def _load_config(config_path: str) -> dict:
612
+ """Load config from JSON file, or return defaults."""
613
+ path = Path(config_path)
614
+ if path.exists():
615
+ return json.loads(path.read_text())
616
+ logging.warning("Config file not found: %s — using defaults", path)
617
+ return {"repos": {}, "db_path": str(DEFAULT_DB_PATH)}
618
+
619
+
620
+ def main():
621
+ cli()
622
+
623
+
624
+ if __name__ == "__main__":
625
+ main()
File without changes