tabulus 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tabulus-0.0.1.dist-info/METADATA +144 -0
- tabulus-0.0.1.dist-info/RECORD +12 -0
- tabulus-0.0.1.dist-info/WHEEL +4 -0
- tabulus-0.0.1.dist-info/entry_points.txt +2 -0
- tabulus-0.0.1.dist-info/licenses/LICENSE +21 -0
- vigil/__init__.py +3 -0
- vigil/cli.py +69 -0
- vigil/config.py +29 -0
- vigil/db.py +196 -0
- vigil/redactor.py +96 -0
- vigil/safety.py +95 -0
- vigil/server.py +155 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tabulus
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Postgres MCP server — agent-first database workbench
|
|
5
|
+
Project-URL: Repository, https://github.com/WalkingMountain/vigilmcp
|
|
6
|
+
Project-URL: Issues, https://github.com/WalkingMountain/vigilmcp/issues
|
|
7
|
+
Author: WalkingMountain
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agent,claude,cursor,database,mcp,postgres,sql
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Database
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: asyncpg>=0.30
|
|
19
|
+
Requires-Dist: mcp>=1.0.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: ruff>=0.7; extra == 'dev'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# Vigil
|
|
28
|
+
|
|
29
|
+
**A Postgres MCP server built for AI agents.**
|
|
30
|
+
|
|
31
|
+
Vigil is the database workbench for the AI-augmented developer. Connect Claude
|
|
32
|
+
Code, Cursor, or any MCP-compatible client to your Postgres database and let
|
|
33
|
+
the agent introspect the schema, sample data, and write safe queries — without
|
|
34
|
+
copy-pasting schemas into chat windows.
|
|
35
|
+
|
|
36
|
+
## Why
|
|
37
|
+
|
|
38
|
+
Every modern dev workflow now includes an AI agent. Every DB GUI was designed
|
|
39
|
+
before that was true. Vigil flips the model: **the agent is a first-class user,
|
|
40
|
+
not a sidebar feature.**
|
|
41
|
+
|
|
42
|
+
What that means in practice:
|
|
43
|
+
|
|
44
|
+
- Schema introspection optimized for LLM context windows (compact JSON, foreign
|
|
45
|
+
keys flattened, sample rows inline).
|
|
46
|
+
- Read-only by default — `INSERT`/`UPDATE`/`DELETE`/`DDL` are rejected at the
|
|
47
|
+
gateway. Agent can't drop your tables.
|
|
48
|
+
- `EXPLAIN` exposed as a tool so the agent can reason about query plans before
|
|
49
|
+
proposing optimizations.
|
|
50
|
+
- Statement timeout + row cap enforced server-side. No agent can DOS your
|
|
51
|
+
database by accident.
|
|
52
|
+
|
|
53
|
+
## Status
|
|
54
|
+
|
|
55
|
+
**v0.0.1 — alpha.** Postgres only. Stdio MCP transport only. No GUI yet.
|
|
56
|
+
|
|
57
|
+
## Install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install tabulus
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Run
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
export DATABASE_URL=postgres://user:pass@host:5432/dbname
|
|
67
|
+
vigil
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Then point your MCP client at the `vigil` command.
|
|
71
|
+
|
|
72
|
+
### Claude Code (project-level)
|
|
73
|
+
|
|
74
|
+
Create `.mcp.json` in your project root:
|
|
75
|
+
|
|
76
|
+
```jsonc
|
|
77
|
+
{
|
|
78
|
+
"mcpServers": {
|
|
79
|
+
"vigil": {
|
|
80
|
+
"command": "vigil",
|
|
81
|
+
"args": [],
|
|
82
|
+
"env": {
|
|
83
|
+
"DATABASE_URL": "postgres://user:pass@host:5432/dbname"
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Restart Claude Code in that directory and approve the trust prompt.
|
|
91
|
+
|
|
92
|
+
### Claude Code (user-level via CLI)
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
claude mcp add vigil "$(which vigil)" --env DATABASE_URL=postgres://user:pass@host:5432/dbname
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Cursor
|
|
99
|
+
|
|
100
|
+
Add to `~/.cursor/mcp_servers.json`:
|
|
101
|
+
|
|
102
|
+
```jsonc
|
|
103
|
+
{
|
|
104
|
+
"mcpServers": {
|
|
105
|
+
"vigil": {
|
|
106
|
+
"command": "vigil",
|
|
107
|
+
"env": { "DATABASE_URL": "postgres://user:pass@host:5432/dbname" }
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Tools
|
|
114
|
+
|
|
115
|
+
| Tool | Description |
|
|
116
|
+
|---|---|
|
|
117
|
+
| `list_tables` | All tables with row count estimates + sizes |
|
|
118
|
+
| `describe_schema` | Columns, PK, FKs, indexes, sample rows for a table |
|
|
119
|
+
| `sample_rows` | Random sample from a table |
|
|
120
|
+
| `safe_select` | Run a read-only SELECT (write keywords rejected) |
|
|
121
|
+
| `explain` | Get query plan (EXPLAIN FORMAT JSON) |
|
|
122
|
+
|
|
123
|
+
## Configuration
|
|
124
|
+
|
|
125
|
+
| Variable | Default | Purpose |
|
|
126
|
+
|---|---|---|
|
|
127
|
+
| `DATABASE_URL` | — (required) | Postgres connection URL |
|
|
128
|
+
| `VIGIL_MAX_ROWS` | `100` | Hard cap on rows returned by any tool |
|
|
129
|
+
| `VIGIL_SAMPLE_SIZE` | `3` | Sample rows included in `describe_schema` |
|
|
130
|
+
| `VIGIL_STATEMENT_TIMEOUT_MS` | `5000` | Server-side query timeout |
|
|
131
|
+
| `VIGIL_REDACT` | `off` | Set `on` to scrub PII (emails, API keys, JWTs, credit cards, phones, IPs) from `sample_rows`, `safe_select`, and `describe_schema` output before the agent sees it. Recommended for production. |
|
|
132
|
+
| `VIGIL_ALLOW_WRITES` | `false` | Set `true` to disable the write block (NOT recommended) |
|
|
133
|
+
|
|
134
|
+
## Roadmap
|
|
135
|
+
|
|
136
|
+
- v0.1 — Postgres parity, polished install
|
|
137
|
+
- v0.2 — SQLite adapter
|
|
138
|
+
- v0.3 — MySQL / MariaDB adapter
|
|
139
|
+
- v0.x — Tauri desktop GUI shell on top of the same core
|
|
140
|
+
- v1.0 — Stable, cross-platform, multi-DB
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
MIT. See [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
vigil/__init__.py,sha256=lKNOax-_jZ74hoDy3c8JJEj9R5-3lM5iVqY09Nsb_qk,74
|
|
2
|
+
vigil/cli.py,sha256=kzLr8ox4VYFL53_0pEPCD9JGO0KjBaIhOfyfxw21Tck,2550
|
|
3
|
+
vigil/config.py,sha256=LjJn5S_7giqrNbPG7C4bSwimlBNyS2dwHam3tx619EI,967
|
|
4
|
+
vigil/db.py,sha256=kL0VVHEiNYpWbiORCy12qaofy6NaxEyALmhzQa6ZO3g,6149
|
|
5
|
+
vigil/redactor.py,sha256=bxTdaXmZctDB6OlpppLHOISarNiYbtHRix2hyC-3nTQ,4554
|
|
6
|
+
vigil/safety.py,sha256=C6SfTNBVVffD4mvWF8DQjw7xmLwecOwyza0kYUuFCdk,2598
|
|
7
|
+
vigil/server.py,sha256=tTXLKFgJ5xzRgBlsEgupBhW8v_pCuYB1BKavKO1jHbk,4751
|
|
8
|
+
tabulus-0.0.1.dist-info/METADATA,sha256=QshdPfrCMPgpyd7Al1xtpZ9AHHSy229T8Pf93luGTfg,4182
|
|
9
|
+
tabulus-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
10
|
+
tabulus-0.0.1.dist-info/entry_points.txt,sha256=g3mzzDHgZny8CWWasju_7Yv_ZaMEn6QDVSkLToN-Fo4,41
|
|
11
|
+
tabulus-0.0.1.dist-info/licenses/LICENSE,sha256=MGd2BM-9ImicOx1eDwNQEq1liAkqK12L7Vl1mneinjI,1072
|
|
12
|
+
tabulus-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 WalkingMountain
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
vigil/__init__.py
ADDED
vigil/cli.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""CLI entry point — `vigil` command.
|
|
2
|
+
|
|
3
|
+
Wraps startup errors in friendly messages so the agent / user sees actionable
|
|
4
|
+
hints instead of stack traces.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from vigil import __version__
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main() -> None:
|
|
13
|
+
if "--version" in sys.argv or "-V" in sys.argv:
|
|
14
|
+
print(f"vigil {__version__}")
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
if "--help" in sys.argv or "-h" in sys.argv:
|
|
18
|
+
print(
|
|
19
|
+
"vigil — Postgres MCP server for AI agents\n"
|
|
20
|
+
"\n"
|
|
21
|
+
"Usage:\n"
|
|
22
|
+
" DATABASE_URL=postgres://user:pass@host:5432/dbname vigil\n"
|
|
23
|
+
"\n"
|
|
24
|
+
"Environment variables:\n"
|
|
25
|
+
" DATABASE_URL required — Postgres connection string\n"
|
|
26
|
+
" VIGIL_MAX_ROWS default 100 — cap on rows returned\n"
|
|
27
|
+
" VIGIL_SAMPLE_SIZE default 3 — rows in describe_schema sample\n"
|
|
28
|
+
" VIGIL_STATEMENT_TIMEOUT_MS default 5000 — server-side query timeout\n"
|
|
29
|
+
" VIGIL_REDACT default off — set 'on' to scrub PII from output\n"
|
|
30
|
+
" VIGIL_ALLOW_WRITES default false — keep false (read-only)\n"
|
|
31
|
+
"\n"
|
|
32
|
+
"Repo: https://github.com/WalkingMountain/vigilmcp"
|
|
33
|
+
)
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Defer import so --version/--help don't pay the asyncio + mcp cost
|
|
38
|
+
from vigil.config import load
|
|
39
|
+
from vigil.server import main as run_server
|
|
40
|
+
|
|
41
|
+
# Fast-fail config validation BEFORE we open the stdio MCP loop —
|
|
42
|
+
# otherwise the agent waits until first tool call to learn DATABASE_URL
|
|
43
|
+
# is missing, which makes the failure mode confusing.
|
|
44
|
+
load()
|
|
45
|
+
run_server()
|
|
46
|
+
except RuntimeError as e:
|
|
47
|
+
# Config errors (missing DATABASE_URL, etc.) — already friendly
|
|
48
|
+
print(f"vigil: {e}", file=sys.stderr)
|
|
49
|
+
sys.exit(2)
|
|
50
|
+
except KeyboardInterrupt:
|
|
51
|
+
sys.exit(0)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
# Last resort — show error class + message, hint at common causes
|
|
54
|
+
print(
|
|
55
|
+
f"vigil: unexpected error: {type(e).__name__}: {e}\n"
|
|
56
|
+
f"\n"
|
|
57
|
+
f"Common causes:\n"
|
|
58
|
+
f" - DATABASE_URL points at an unreachable host\n"
|
|
59
|
+
f" - Postgres requires SSL but the URL lacks ?sslmode=require\n"
|
|
60
|
+
f" - User in DATABASE_URL lacks CONNECT or USAGE privileges\n"
|
|
61
|
+
f"\n"
|
|
62
|
+
f"File an issue: https://github.com/WalkingMountain/vigilmcp/issues",
|
|
63
|
+
file=sys.stderr,
|
|
64
|
+
)
|
|
65
|
+
sys.exit(1)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
main()
|
vigil/config.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Runtime config from environment variables."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class Config:
|
|
9
|
+
database_url: str
|
|
10
|
+
max_rows: int # cap on rows returned by any tool
|
|
11
|
+
sample_size: int # rows per describe_schema sample
|
|
12
|
+
statement_timeout_ms: int
|
|
13
|
+
allow_writes: bool # default False — agent gets read-only
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load() -> Config:
|
|
17
|
+
url = os.environ.get("DATABASE_URL")
|
|
18
|
+
if not url:
|
|
19
|
+
raise RuntimeError(
|
|
20
|
+
"DATABASE_URL is required. Set to a Postgres connection string "
|
|
21
|
+
"(postgres://user:pass@host:5432/dbname)."
|
|
22
|
+
)
|
|
23
|
+
return Config(
|
|
24
|
+
database_url=url,
|
|
25
|
+
max_rows=int(os.environ.get("VIGIL_MAX_ROWS", "100")),
|
|
26
|
+
sample_size=int(os.environ.get("VIGIL_SAMPLE_SIZE", "3")),
|
|
27
|
+
statement_timeout_ms=int(os.environ.get("VIGIL_STATEMENT_TIMEOUT_MS", "5000")),
|
|
28
|
+
allow_writes=os.environ.get("VIGIL_ALLOW_WRITES", "false").lower() == "true",
|
|
29
|
+
)
|
vigil/db.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Postgres connection pool + schema introspection.
|
|
2
|
+
|
|
3
|
+
LLM-friendly schema output: compact JSON, sample rows inline, foreign keys
|
|
4
|
+
flattened. Goal is to fit a 50-table schema into one prompt without truncation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncpg
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from vigil.config import Config
|
|
11
|
+
from vigil.redactor import maybe_redact
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_pool: asyncpg.Pool | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def get_pool(config: Config) -> asyncpg.Pool:
|
|
18
|
+
global _pool
|
|
19
|
+
if _pool is None:
|
|
20
|
+
_pool = await asyncpg.create_pool(
|
|
21
|
+
config.database_url,
|
|
22
|
+
min_size=1,
|
|
23
|
+
max_size=4,
|
|
24
|
+
command_timeout=config.statement_timeout_ms / 1000.0,
|
|
25
|
+
server_settings={
|
|
26
|
+
"default_transaction_read_only": "off" if config.allow_writes else "on",
|
|
27
|
+
"statement_timeout": str(config.statement_timeout_ms),
|
|
28
|
+
},
|
|
29
|
+
)
|
|
30
|
+
return _pool
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def close_pool() -> None:
|
|
34
|
+
global _pool
|
|
35
|
+
if _pool is not None:
|
|
36
|
+
await _pool.close()
|
|
37
|
+
_pool = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def list_tables(pool: asyncpg.Pool, schema: str | None = None) -> list[dict[str, Any]]:
|
|
41
|
+
"""List tables. For tables Postgres hasn't ANALYZEd yet (reltuples < 0),
|
|
42
|
+
fall back to a live COUNT(*) so the agent never sees `-1` row counts."""
|
|
43
|
+
sql = """
|
|
44
|
+
SELECT
|
|
45
|
+
n.nspname AS schema,
|
|
46
|
+
c.relname AS name,
|
|
47
|
+
c.reltuples::bigint AS row_estimate,
|
|
48
|
+
pg_size_pretty(pg_total_relation_size(c.oid)) AS size,
|
|
49
|
+
obj_description(c.oid, 'pg_class') AS comment
|
|
50
|
+
FROM pg_class c
|
|
51
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
52
|
+
WHERE c.relkind IN ('r', 'p', 'v', 'm')
|
|
53
|
+
AND n.nspname NOT IN ('pg_catalog', 'information_schema')
|
|
54
|
+
AND ($1::text IS NULL OR n.nspname = $1)
|
|
55
|
+
ORDER BY n.nspname, c.relname
|
|
56
|
+
"""
|
|
57
|
+
rows = await pool.fetch(sql, schema)
|
|
58
|
+
result = [dict(r) for r in rows]
|
|
59
|
+
for row in result:
|
|
60
|
+
if row["row_estimate"] is None or row["row_estimate"] < 0:
|
|
61
|
+
try:
|
|
62
|
+
count = await pool.fetchval(
|
|
63
|
+
f'SELECT COUNT(*) FROM "{row["schema"]}"."{row["name"]}"'
|
|
64
|
+
)
|
|
65
|
+
row["row_estimate"] = int(count)
|
|
66
|
+
row["row_estimate_exact"] = True
|
|
67
|
+
except Exception:
|
|
68
|
+
row["row_estimate"] = None
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def describe_table(
|
|
73
|
+
pool: asyncpg.Pool,
|
|
74
|
+
qualified: str,
|
|
75
|
+
sample_size: int,
|
|
76
|
+
) -> dict[str, Any]:
|
|
77
|
+
"""Return columns, indexes, foreign keys, and a small sample."""
|
|
78
|
+
schema, table = _split_qualified(qualified)
|
|
79
|
+
|
|
80
|
+
columns = await pool.fetch(
|
|
81
|
+
"""
|
|
82
|
+
SELECT
|
|
83
|
+
column_name AS name,
|
|
84
|
+
data_type AS type,
|
|
85
|
+
is_nullable = 'YES' AS nullable,
|
|
86
|
+
column_default AS default,
|
|
87
|
+
character_maximum_length AS max_length
|
|
88
|
+
FROM information_schema.columns
|
|
89
|
+
WHERE table_schema = $1 AND table_name = $2
|
|
90
|
+
ORDER BY ordinal_position
|
|
91
|
+
""",
|
|
92
|
+
schema,
|
|
93
|
+
table,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
primary_key = await pool.fetch(
|
|
97
|
+
"""
|
|
98
|
+
SELECT a.attname AS column
|
|
99
|
+
FROM pg_index i
|
|
100
|
+
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
|
101
|
+
WHERE i.indrelid = ($1 || '.' || $2)::regclass AND i.indisprimary
|
|
102
|
+
""",
|
|
103
|
+
schema,
|
|
104
|
+
table,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
foreign_keys = await pool.fetch(
|
|
108
|
+
"""
|
|
109
|
+
SELECT
|
|
110
|
+
kcu.column_name AS from_column,
|
|
111
|
+
ccu.table_schema || '.' || ccu.table_name AS to_table,
|
|
112
|
+
ccu.column_name AS to_column
|
|
113
|
+
FROM information_schema.table_constraints tc
|
|
114
|
+
JOIN information_schema.key_column_usage kcu
|
|
115
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
116
|
+
AND tc.table_schema = kcu.table_schema
|
|
117
|
+
JOIN information_schema.constraint_column_usage ccu
|
|
118
|
+
ON ccu.constraint_name = tc.constraint_name
|
|
119
|
+
AND ccu.table_schema = tc.table_schema
|
|
120
|
+
WHERE tc.constraint_type = 'FOREIGN KEY'
|
|
121
|
+
AND tc.table_schema = $1
|
|
122
|
+
AND tc.table_name = $2
|
|
123
|
+
""",
|
|
124
|
+
schema,
|
|
125
|
+
table,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
indexes = await pool.fetch(
|
|
129
|
+
"""
|
|
130
|
+
SELECT indexname AS name, indexdef AS definition
|
|
131
|
+
FROM pg_indexes
|
|
132
|
+
WHERE schemaname = $1 AND tablename = $2
|
|
133
|
+
""",
|
|
134
|
+
schema,
|
|
135
|
+
table,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
sample = await pool.fetch(
|
|
139
|
+
f'SELECT * FROM "{schema}"."{table}" LIMIT $1',
|
|
140
|
+
sample_size,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
"table": f"{schema}.{table}",
|
|
145
|
+
"columns": [dict(c) for c in columns],
|
|
146
|
+
"primary_key": [r["column"] for r in primary_key],
|
|
147
|
+
"foreign_keys": [dict(f) for f in foreign_keys],
|
|
148
|
+
"indexes": [dict(i) for i in indexes],
|
|
149
|
+
"sample_rows": maybe_redact([dict(s) for s in sample]),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def sample_rows(
|
|
154
|
+
pool: asyncpg.Pool,
|
|
155
|
+
qualified: str,
|
|
156
|
+
limit: int,
|
|
157
|
+
) -> list[dict[str, Any]]:
|
|
158
|
+
schema, table = _split_qualified(qualified)
|
|
159
|
+
rows = await pool.fetch(
|
|
160
|
+
f'SELECT * FROM "{schema}"."{table}" ORDER BY random() LIMIT $1',
|
|
161
|
+
limit,
|
|
162
|
+
)
|
|
163
|
+
return maybe_redact([dict(r) for r in rows])
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
async def safe_select(
|
|
167
|
+
pool: asyncpg.Pool,
|
|
168
|
+
sql: str,
|
|
169
|
+
max_rows: int,
|
|
170
|
+
) -> dict[str, Any]:
|
|
171
|
+
# Wrap with LIMIT enforcement (subquery prevents user-supplied LIMIT bypass)
|
|
172
|
+
wrapped = f"SELECT * FROM ({sql}) _vigil_q LIMIT {int(max_rows)}"
|
|
173
|
+
rows = await pool.fetch(wrapped)
|
|
174
|
+
return {
|
|
175
|
+
"row_count": len(rows),
|
|
176
|
+
"rows": maybe_redact([dict(r) for r in rows]),
|
|
177
|
+
"truncated": len(rows) == max_rows,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
async def explain(pool: asyncpg.Pool, sql: str) -> dict[str, Any]:
|
|
182
|
+
plan = await pool.fetchval(f"EXPLAIN (FORMAT JSON, ANALYZE FALSE) {sql}")
|
|
183
|
+
return {"plan": plan}
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _split_qualified(qualified: str) -> tuple[str, str]:
|
|
187
|
+
"""Parse `schema.table` or bare `table` (assumes public)."""
|
|
188
|
+
if "." in qualified:
|
|
189
|
+
schema, table = qualified.split(".", 1)
|
|
190
|
+
else:
|
|
191
|
+
schema, table = "public", qualified
|
|
192
|
+
# Reject injection vectors
|
|
193
|
+
for part in (schema, table):
|
|
194
|
+
if not all(c.isalnum() or c == "_" for c in part):
|
|
195
|
+
raise ValueError(f"Invalid identifier: {part!r}")
|
|
196
|
+
return schema, table
|
vigil/redactor.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""PII / secret redactor for tool output before LLM sees it.
|
|
2
|
+
|
|
3
|
+
Database tools (sample_rows, safe_select, describe_schema's sample_rows)
|
|
4
|
+
return rows from user tables. Those rows often contain customer emails,
|
|
5
|
+
API keys, JWTs, credit cards, SSNs, phone numbers, IPs. Without redaction
|
|
6
|
+
that data ships to Anthropic on every query — brand-killing leak.
|
|
7
|
+
|
|
8
|
+
Sentinel format: `[REDACTED:type]` — preserves enough structure for the
|
|
9
|
+
LLM to reason ("Stripe call failed with [REDACTED:api_key]") without
|
|
10
|
+
leaking the value.
|
|
11
|
+
|
|
12
|
+
Conservative philosophy: false positives are cheap, false negatives kill.
|
|
13
|
+
|
|
14
|
+
Off by default — set VIGIL_REDACT=on to enable.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
|
25
|
+
# ── JWT (eyJ... three segments) ─────────────────────────────────────────
|
|
26
|
+
("jwt", re.compile(r"\beyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+")),
|
|
27
|
+
# ── Vendor API keys with distinctive prefixes ───────────────────────────
|
|
28
|
+
("anthropic_key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}")),
|
|
29
|
+
("openai_key", re.compile(r"\bsk-(?:proj-)?[A-Za-z0-9]{20,}")),
|
|
30
|
+
("stripe_key", re.compile(r"\b(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{20,}")),
|
|
31
|
+
("github_token", re.compile(r"\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}")),
|
|
32
|
+
("slack_token", re.compile(r"\bxox[bpars]-[A-Za-z0-9-]{20,}")),
|
|
33
|
+
("aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
|
|
34
|
+
("google_api_key", re.compile(r"\bAIza[0-9A-Za-z_-]{35}\b")),
|
|
35
|
+
# ── Bearer / Authorization headers ──────────────────────────────────────
|
|
36
|
+
("bearer_token", re.compile(r"(?i)bearer\s+[A-Za-z0-9._~+/-]{20,}={0,2}")),
|
|
37
|
+
# ── Credit card (13-19 digits, common groupings) ────────────────────────
|
|
38
|
+
("credit_card", re.compile(r"\b(?:\d[ -]*?){13,19}\b")),
|
|
39
|
+
# ── SSN (US) ────────────────────────────────────────────────────────────
|
|
40
|
+
("ssn", re.compile(r"\b\d{3}-\d{2}-\d{4}\b")),
|
|
41
|
+
# ── Email ───────────────────────────────────────────────────────────────
|
|
42
|
+
(
|
|
43
|
+
"email",
|
|
44
|
+
re.compile(
|
|
45
|
+
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?"
|
|
46
|
+
r"(?:\.[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?)+\b"
|
|
47
|
+
),
|
|
48
|
+
),
|
|
49
|
+
# ── Phone (international + US, conservative) ────────────────────────────
|
|
50
|
+
(
|
|
51
|
+
"phone",
|
|
52
|
+
re.compile(
|
|
53
|
+
r"(?<![A-Za-z0-9])\+?\d{1,3}[\s.-]?\(?\d{2,4}\)?"
|
|
54
|
+
r"[\s.-]?\d{3,4}[\s.-]?\d{3,4}(?![A-Za-z0-9])"
|
|
55
|
+
),
|
|
56
|
+
),
|
|
57
|
+
# ── IPv4 ────────────────────────────────────────────────────────────────
|
|
58
|
+
("ipv4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
|
|
59
|
+
# ── IPv6 (handles :: compression, blocks Class::Path false matches) ─────
|
|
60
|
+
(
|
|
61
|
+
"ipv6",
|
|
62
|
+
re.compile(r"(?<![A-Za-z0-9:])(?:[A-Fa-f0-9]{0,4}:){2,}[A-Fa-f0-9]{0,4}(?![A-Za-z0-9:])"),
|
|
63
|
+
),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def is_enabled() -> bool:
|
|
68
|
+
return os.environ.get("VIGIL_REDACT", "off").lower() in ("on", "true", "1", "yes")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def redact_string(s: str) -> str:
|
|
72
|
+
"""Replace sensitive substrings with `[REDACTED:type]` sentinels. Idempotent."""
|
|
73
|
+
if not isinstance(s, str) or not s:
|
|
74
|
+
return s
|
|
75
|
+
out = s
|
|
76
|
+
for kind, pattern in _PATTERNS:
|
|
77
|
+
out = pattern.sub(f"[REDACTED:{kind}]", out)
|
|
78
|
+
return out
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def redact_value(v: Any) -> Any:
|
|
82
|
+
"""Recursively redact str / list / dict / tuple. Dict KEYS NOT redacted."""
|
|
83
|
+
if isinstance(v, str):
|
|
84
|
+
return redact_string(v)
|
|
85
|
+
if isinstance(v, dict):
|
|
86
|
+
return {k: redact_value(val) for k, val in v.items()}
|
|
87
|
+
if isinstance(v, list):
|
|
88
|
+
return [redact_value(item) for item in v]
|
|
89
|
+
if isinstance(v, tuple):
|
|
90
|
+
return tuple(redact_value(item) for item in v)
|
|
91
|
+
return v
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def maybe_redact(v: Any) -> Any:
|
|
95
|
+
"""No-op when VIGIL_REDACT is off, redact otherwise."""
|
|
96
|
+
return redact_value(v) if is_enabled() else v
|
vigil/safety.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""SQL safety — read-only enforcement.
|
|
2
|
+
|
|
3
|
+
Rejects any statement that could mutate data or schema. Default mode for the
|
|
4
|
+
agent: SELECT + EXPLAIN only. Writes only enabled when VIGIL_ALLOW_WRITES=true
|
|
5
|
+
AND the human operator opts in per-statement (future approval flow).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
|
|
10
|
+
_FORBIDDEN_KEYWORDS = {
|
|
11
|
+
"INSERT",
|
|
12
|
+
"UPDATE",
|
|
13
|
+
"DELETE",
|
|
14
|
+
"DROP",
|
|
15
|
+
"TRUNCATE",
|
|
16
|
+
"ALTER",
|
|
17
|
+
"CREATE",
|
|
18
|
+
"GRANT",
|
|
19
|
+
"REVOKE",
|
|
20
|
+
"COMMENT",
|
|
21
|
+
"REINDEX",
|
|
22
|
+
"VACUUM",
|
|
23
|
+
"ANALYZE",
|
|
24
|
+
"CLUSTER",
|
|
25
|
+
"COPY",
|
|
26
|
+
"DO",
|
|
27
|
+
"CALL",
|
|
28
|
+
"MERGE",
|
|
29
|
+
"REPLACE",
|
|
30
|
+
"RENAME",
|
|
31
|
+
"REFRESH",
|
|
32
|
+
"LOCK",
|
|
33
|
+
"NOTIFY",
|
|
34
|
+
"LISTEN",
|
|
35
|
+
"UNLISTEN",
|
|
36
|
+
"SET",
|
|
37
|
+
"RESET",
|
|
38
|
+
"DISCARD",
|
|
39
|
+
"PREPARE",
|
|
40
|
+
"EXECUTE",
|
|
41
|
+
"DEALLOCATE",
|
|
42
|
+
"BEGIN",
|
|
43
|
+
"COMMIT",
|
|
44
|
+
"ROLLBACK",
|
|
45
|
+
"SAVEPOINT",
|
|
46
|
+
"RELEASE",
|
|
47
|
+
"START",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Statements that are always allowed in read-only mode
|
|
51
|
+
_ALLOWED_LEADS = {"SELECT", "WITH", "EXPLAIN", "SHOW", "TABLE", "VALUES"}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class UnsafeSQLError(ValueError):
|
|
55
|
+
"""Raised when a query would mutate state in read-only mode."""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def normalize(sql: str) -> str:
|
|
59
|
+
"""Strip comments and collapse whitespace for keyword inspection."""
|
|
60
|
+
# Strip /* ... */ block comments
|
|
61
|
+
sql = re.sub(r"/\*.*?\*/", " ", sql, flags=re.DOTALL)
|
|
62
|
+
# Strip -- line comments
|
|
63
|
+
sql = re.sub(r"--[^\n]*", " ", sql)
|
|
64
|
+
return " ".join(sql.split())
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def assert_read_only(sql: str) -> None:
|
|
68
|
+
"""Raise UnsafeSQLError if sql contains any mutating keyword.
|
|
69
|
+
|
|
70
|
+
Approach: tokenize on word boundaries, reject if any forbidden keyword
|
|
71
|
+
appears at statement-leading position OR after a semicolon.
|
|
72
|
+
"""
|
|
73
|
+
cleaned = normalize(sql)
|
|
74
|
+
if not cleaned:
|
|
75
|
+
raise UnsafeSQLError("Empty statement")
|
|
76
|
+
|
|
77
|
+
# Split on semicolons (multi-statement). Reject anything that isn't a
|
|
78
|
+
# single read-only statement.
|
|
79
|
+
statements = [s.strip() for s in cleaned.split(";") if s.strip()]
|
|
80
|
+
if len(statements) > 1:
|
|
81
|
+
raise UnsafeSQLError("Multiple statements not allowed in read-only mode")
|
|
82
|
+
|
|
83
|
+
stmt = statements[0]
|
|
84
|
+
first_word = stmt.split(None, 1)[0].upper()
|
|
85
|
+
if first_word not in _ALLOWED_LEADS:
|
|
86
|
+
raise UnsafeSQLError(
|
|
87
|
+
f"Statement must start with one of {sorted(_ALLOWED_LEADS)} "
|
|
88
|
+
f"in read-only mode (got {first_word!r})"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Defense-in-depth: scan for any forbidden keyword anywhere
|
|
92
|
+
upper_tokens = set(re.findall(r"\b[A-Z]+\b", stmt.upper()))
|
|
93
|
+
forbidden_hits = upper_tokens & _FORBIDDEN_KEYWORDS
|
|
94
|
+
if forbidden_hits:
|
|
95
|
+
raise UnsafeSQLError(f"Forbidden keyword(s) in read-only mode: {sorted(forbidden_hits)}")
|
vigil/server.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""MCP server entry point.
|
|
2
|
+
|
|
3
|
+
Exposes 5 tools over stdio:
|
|
4
|
+
- list_tables
|
|
5
|
+
- describe_schema
|
|
6
|
+
- sample_rows
|
|
7
|
+
- safe_select
|
|
8
|
+
- explain
|
|
9
|
+
|
|
10
|
+
Used by Claude Code / Cursor / any MCP client.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import json
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from mcp.server import Server
|
|
18
|
+
from mcp.server.stdio import stdio_server
|
|
19
|
+
from mcp.types import TextContent, Tool
|
|
20
|
+
|
|
21
|
+
from vigil.config import load
|
|
22
|
+
from vigil.db import (
|
|
23
|
+
close_pool,
|
|
24
|
+
describe_table,
|
|
25
|
+
explain,
|
|
26
|
+
get_pool,
|
|
27
|
+
list_tables,
|
|
28
|
+
safe_select,
|
|
29
|
+
sample_rows,
|
|
30
|
+
)
|
|
31
|
+
from vigil.safety import UnsafeSQLError, assert_read_only
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
server = Server("vigil")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@server.list_tools()
|
|
38
|
+
async def list_available_tools() -> list[Tool]:
|
|
39
|
+
return [
|
|
40
|
+
Tool(
|
|
41
|
+
name="list_tables",
|
|
42
|
+
description="List all tables in the database with row count estimates and sizes.",
|
|
43
|
+
inputSchema={
|
|
44
|
+
"type": "object",
|
|
45
|
+
"properties": {
|
|
46
|
+
"schema": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"description": "Optional schema name to filter (e.g., 'public').",
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
),
|
|
53
|
+
Tool(
|
|
54
|
+
name="describe_schema",
|
|
55
|
+
description=(
|
|
56
|
+
"Describe a table: columns, types, primary key, foreign keys, indexes, "
|
|
57
|
+
"and a small sample of rows. The single most useful tool for an agent "
|
|
58
|
+
"trying to understand the data model."
|
|
59
|
+
),
|
|
60
|
+
inputSchema={
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"table": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "Table name, optionally schema-qualified (e.g., 'public.users').",
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"required": ["table"],
|
|
69
|
+
},
|
|
70
|
+
),
|
|
71
|
+
Tool(
|
|
72
|
+
name="sample_rows",
|
|
73
|
+
description="Return a random sample of rows from a table.",
|
|
74
|
+
inputSchema={
|
|
75
|
+
"type": "object",
|
|
76
|
+
"properties": {
|
|
77
|
+
"table": {"type": "string"},
|
|
78
|
+
"limit": {"type": "integer", "default": 10},
|
|
79
|
+
},
|
|
80
|
+
"required": ["table"],
|
|
81
|
+
},
|
|
82
|
+
),
|
|
83
|
+
Tool(
|
|
84
|
+
name="safe_select",
|
|
85
|
+
description=(
|
|
86
|
+
"Run a read-only SELECT query. INSERT/UPDATE/DELETE/DDL are rejected. "
|
|
87
|
+
"Results are capped at the server's max_rows setting (default 100)."
|
|
88
|
+
),
|
|
89
|
+
inputSchema={
|
|
90
|
+
"type": "object",
|
|
91
|
+
"properties": {
|
|
92
|
+
"sql": {
|
|
93
|
+
"type": "string",
|
|
94
|
+
"description": "A SELECT/WITH/EXPLAIN/SHOW statement.",
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
"required": ["sql"],
|
|
98
|
+
},
|
|
99
|
+
),
|
|
100
|
+
Tool(
|
|
101
|
+
name="explain",
|
|
102
|
+
description="Return the query plan for a SELECT statement (EXPLAIN FORMAT JSON).",
|
|
103
|
+
inputSchema={
|
|
104
|
+
"type": "object",
|
|
105
|
+
"properties": {"sql": {"type": "string"}},
|
|
106
|
+
"required": ["sql"],
|
|
107
|
+
},
|
|
108
|
+
),
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@server.call_tool()
|
|
113
|
+
async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
|
|
114
|
+
config = load()
|
|
115
|
+
pool = await get_pool(config)
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
if name == "list_tables":
|
|
119
|
+
result = await list_tables(pool, arguments.get("schema"))
|
|
120
|
+
elif name == "describe_schema":
|
|
121
|
+
result = await describe_table(pool, arguments["table"], config.sample_size)
|
|
122
|
+
elif name == "sample_rows":
|
|
123
|
+
limit = min(int(arguments.get("limit", 10)), config.max_rows)
|
|
124
|
+
result = await sample_rows(pool, arguments["table"], limit)
|
|
125
|
+
elif name == "safe_select":
|
|
126
|
+
sql = arguments["sql"]
|
|
127
|
+
assert_read_only(sql)
|
|
128
|
+
result = await safe_select(pool, sql, config.max_rows)
|
|
129
|
+
elif name == "explain":
|
|
130
|
+
sql = arguments["sql"]
|
|
131
|
+
assert_read_only(sql)
|
|
132
|
+
result = await explain(pool, sql)
|
|
133
|
+
else:
|
|
134
|
+
return [TextContent(type="text", text=f"Unknown tool: {name}")]
|
|
135
|
+
|
|
136
|
+
return [TextContent(type="text", text=json.dumps(result, default=str, indent=2))]
|
|
137
|
+
|
|
138
|
+
except UnsafeSQLError as e:
|
|
139
|
+
return [TextContent(type="text", text=f"Rejected by safety policy: {e}")]
|
|
140
|
+
except Exception as e:
|
|
141
|
+
return [TextContent(type="text", text=f"Error: {type(e).__name__}: {e}")]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
async def run() -> None:
|
|
145
|
+
async with stdio_server() as (read, write):
|
|
146
|
+
await server.run(read, write, server.create_initialization_options())
|
|
147
|
+
await close_pool()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def main() -> None:
|
|
151
|
+
asyncio.run(run())
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
if __name__ == "__main__":
|
|
155
|
+
main()
|