tabulus 0.0.3__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tabulus-0.0.3 → tabulus-0.0.4}/.gitignore +4 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/PKG-INFO +49 -22
- tabulus-0.0.4/README.md +147 -0
- tabulus-0.0.4/demo/README.md +24 -0
- tabulus-0.0.4/demo/record.sh +48 -0
- tabulus-0.0.4/demo/seed.sql +28 -0
- tabulus-0.0.4/demo/tabulus-redactor.svg +43 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/pyproject.toml +1 -1
- {tabulus-0.0.3 → tabulus-0.0.4}/src/tabulus/__init__.py +1 -1
- tabulus-0.0.4/src/tabulus/redactor.py +172 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/tests/test_redactor.py +46 -0
- tabulus-0.0.3/README.md +0 -120
- tabulus-0.0.3/src/tabulus/redactor.py +0 -96
- {tabulus-0.0.3 → tabulus-0.0.4}/.github/workflows/ci.yml +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/.github/workflows/publish.yml +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/.mcp.json +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/LICENSE +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/src/tabulus/cli.py +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/src/tabulus/config.py +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/src/tabulus/db.py +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/src/tabulus/safety.py +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/src/tabulus/server.py +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/tests/__init__.py +0 -0
- {tabulus-0.0.3 → tabulus-0.0.4}/tests/test_safety.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tabulus
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: Postgres MCP server — agent-first database workbench
|
|
5
5
|
Project-URL: Repository, https://github.com/WalkingMountain/tabulus
|
|
6
6
|
Project-URL: Issues, https://github.com/WalkingMountain/tabulus/issues
|
|
@@ -26,35 +26,53 @@ Description-Content-Type: text/markdown
|
|
|
26
26
|
|
|
27
27
|
# Tabulus
|
|
28
28
|
|
|
29
|
-
**
|
|
29
|
+
**Let your AI agent query your real Postgres — without leaking customer data to the LLM.**
|
|
30
30
|
|
|
31
|
-
Tabulus is
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
Tabulus is a Postgres MCP server that sits between your AI agent (Claude Code,
|
|
32
|
+
Cursor, any MCP client) and your database. It scrubs emails, API keys, JWTs,
|
|
33
|
+
credit cards, SSNs, phones, and IPs **out of every result before the agent ever
|
|
34
|
+
sees them** — so you can point Claude at a production-shaped database without
|
|
35
|
+
piping your customers' PII into someone else's model.
|
|
35
36
|
|
|
36
|
-
|
|
37
|
+

|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
before that was true. Tabulus flips the model: **the agent is a first-class
|
|
40
|
-
user, not a sidebar feature.**
|
|
39
|
+
<sub>Same query, redactor off vs on — real output. Record the animated version yourself: [`demo/`](demo/).</sub>
|
|
41
40
|
|
|
42
|
-
|
|
41
|
+
## The problem
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
43
|
+
Connecting an AI agent to your database means every row it samples — every
|
|
44
|
+
customer email, every Stripe key sitting in a config table, every JWT in a
|
|
45
|
+
sessions row — gets shipped into the LLM's context window. To Anthropic. To
|
|
46
|
+
OpenAI. To wherever the model runs. Most DB MCP servers solve "can the agent
|
|
47
|
+
drop my tables." **Tabulus solves what leaves the building.**
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
Without redaction: {"email": "jane@acme.com", "api_key": "sk_live_4eC39Hq..."}
|
|
51
|
+
With Tabulus: {"email": "[REDACTED:email]", "api_key": "[REDACTED:stripe_key]"}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Turn it on with `TABULUS_REDACT=on`. The sentinel keeps enough structure for the
|
|
55
|
+
agent to reason (`"Stripe call failed with [REDACTED:stripe_key]"`) without ever
|
|
56
|
+
seeing the secret.
|
|
57
|
+
|
|
58
|
+
## Also in the box
|
|
59
|
+
|
|
60
|
+
- **PII/secret redactor** — emails, API keys (Anthropic/OpenAI/Stripe/GitHub/AWS/
|
|
61
|
+
Slack/Google), OAuth + bearer tokens, JWTs, PEM private keys, credit cards, SSNs,
|
|
62
|
+
international + US phones, IPv4/IPv6 — plus `key=value` secrets and any value in a
|
|
63
|
+
secret-named column (`password`, `api_key`, …). Conservative by design: false
|
|
64
|
+
positives are cheap, false negatives leak.
|
|
65
|
+
- **Read-only, enforced three ways** — keyword gate + Postgres read-only
|
|
66
|
+
transaction + row cap. The agent can't drop your tables.
|
|
67
|
+
- **Schema introspection tuned for context windows** — compact JSON, foreign keys
|
|
68
|
+
flattened, sample rows inline. Fits a 50-table schema in one prompt.
|
|
69
|
+
- **`EXPLAIN` as a tool** — the agent reasons about query plans before proposing
|
|
70
|
+
optimizations.
|
|
71
|
+
- **Statement timeout + row cap** server-side. No accidental DOS.
|
|
54
72
|
|
|
55
73
|
## Status
|
|
56
74
|
|
|
57
|
-
**v0.0.
|
|
75
|
+
**v0.0.4 — alpha.** Postgres only. Stdio MCP transport only. No GUI yet.
|
|
58
76
|
|
|
59
77
|
## Install
|
|
60
78
|
|
|
@@ -133,6 +151,15 @@ Add to `~/.cursor/mcp_servers.json`:
|
|
|
133
151
|
| `TABULUS_REDACT` | `off` | Set `on` to scrub PII (emails, API keys, JWTs, credit cards, phones, IPs) from `sample_rows`, `safe_select`, and `describe_schema` output before the agent sees it. Recommended for production. |
|
|
134
152
|
| `TABULUS_ALLOW_WRITES` | `false` | Set `true` to disable the write block (NOT recommended) |
|
|
135
153
|
|
|
154
|
+
## For teams
|
|
155
|
+
|
|
156
|
+
Tabulus core is free and MIT — and stays that way. If your organization is letting
|
|
157
|
+
AI agents touch real, sensitive databases and you need **audit logs, centrally
|
|
158
|
+
enforced redaction policy, column-level masking, or SSO**, we're exploring a
|
|
159
|
+
self-hosted Team tier (your data never routes through us).
|
|
160
|
+
|
|
161
|
+
👉 **[Tell us what you'd need →](https://github.com/WalkingMountain/tabulus/issues/1)**
|
|
162
|
+
|
|
136
163
|
## Roadmap
|
|
137
164
|
|
|
138
165
|
- v0.1 — Postgres parity, polished install
|
tabulus-0.0.4/README.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Tabulus
|
|
2
|
+
|
|
3
|
+
**Let your AI agent query your real Postgres — without leaking customer data to the LLM.**
|
|
4
|
+
|
|
5
|
+
Tabulus is a Postgres MCP server that sits between your AI agent (Claude Code,
|
|
6
|
+
Cursor, any MCP client) and your database. It scrubs emails, API keys, JWTs,
|
|
7
|
+
credit cards, SSNs, phones, and IPs **out of every result before the agent ever
|
|
8
|
+
sees them** — so you can point Claude at a production-shaped database without
|
|
9
|
+
piping your customers' PII into someone else's model.
|
|
10
|
+
|
|
11
|
+

|
|
12
|
+
|
|
13
|
+
<sub>Same query, redactor off vs on — real output. Record the animated version yourself: [`demo/`](demo/).</sub>
|
|
14
|
+
|
|
15
|
+
## The problem
|
|
16
|
+
|
|
17
|
+
Connecting an AI agent to your database means every row it samples — every
|
|
18
|
+
customer email, every Stripe key sitting in a config table, every JWT in a
|
|
19
|
+
sessions row — gets shipped into the LLM's context window. To Anthropic. To
|
|
20
|
+
OpenAI. To wherever the model runs. Most DB MCP servers solve "can the agent
|
|
21
|
+
drop my tables." **Tabulus solves what leaves the building.**
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
Without redaction: {"email": "jane@acme.com", "api_key": "sk_live_4eC39Hq..."}
|
|
25
|
+
With Tabulus: {"email": "[REDACTED:email]", "api_key": "[REDACTED:stripe_key]"}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Turn it on with `TABULUS_REDACT=on`. The sentinel keeps enough structure for the
|
|
29
|
+
agent to reason (`"Stripe call failed with [REDACTED:stripe_key]"`) without ever
|
|
30
|
+
seeing the secret.
|
|
31
|
+
|
|
32
|
+
## Also in the box
|
|
33
|
+
|
|
34
|
+
- **PII/secret redactor** — emails, API keys (Anthropic/OpenAI/Stripe/GitHub/AWS/
|
|
35
|
+
Slack/Google), OAuth + bearer tokens, JWTs, PEM private keys, credit cards, SSNs,
|
|
36
|
+
international + US phones, IPv4/IPv6 — plus `key=value` secrets and any value in a
|
|
37
|
+
secret-named column (`password`, `api_key`, …). Conservative by design: false
|
|
38
|
+
positives are cheap, false negatives leak.
|
|
39
|
+
- **Read-only, enforced three ways** — keyword gate + Postgres read-only
|
|
40
|
+
transaction + row cap. The agent can't drop your tables.
|
|
41
|
+
- **Schema introspection tuned for context windows** — compact JSON, foreign keys
|
|
42
|
+
flattened, sample rows inline. Fits a 50-table schema in one prompt.
|
|
43
|
+
- **`EXPLAIN` as a tool** — the agent reasons about query plans before proposing
|
|
44
|
+
optimizations.
|
|
45
|
+
- **Statement timeout + row cap** server-side. No accidental DOS.
|
|
46
|
+
|
|
47
|
+
## Status
|
|
48
|
+
|
|
49
|
+
**v0.0.4 — alpha.** Postgres only. Stdio MCP transport only. No GUI yet.
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install tabulus
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Run
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
export DATABASE_URL=postgres://user:pass@host:5432/dbname
|
|
61
|
+
tabulus
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Then point your MCP client at the `tabulus` command.
|
|
65
|
+
|
|
66
|
+
### Claude Code (project-level)
|
|
67
|
+
|
|
68
|
+
Create `.mcp.json` in your project root:
|
|
69
|
+
|
|
70
|
+
```jsonc
|
|
71
|
+
{
|
|
72
|
+
"mcpServers": {
|
|
73
|
+
"tabulus": {
|
|
74
|
+
"command": "tabulus",
|
|
75
|
+
"args": [],
|
|
76
|
+
"env": {
|
|
77
|
+
"DATABASE_URL": "postgres://user:pass@host:5432/dbname"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Restart Claude Code in that directory and approve the trust prompt.
|
|
85
|
+
|
|
86
|
+
### Claude Code (user-level via CLI)
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
claude mcp add tabulus "$(which tabulus)" --env DATABASE_URL=postgres://user:pass@host:5432/dbname
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Cursor
|
|
93
|
+
|
|
94
|
+
Add to `~/.cursor/mcp_servers.json`:
|
|
95
|
+
|
|
96
|
+
```jsonc
|
|
97
|
+
{
|
|
98
|
+
"mcpServers": {
|
|
99
|
+
"tabulus": {
|
|
100
|
+
"command": "tabulus",
|
|
101
|
+
"env": { "DATABASE_URL": "postgres://user:pass@host:5432/dbname" }
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Tools
|
|
108
|
+
|
|
109
|
+
| Tool | Description |
|
|
110
|
+
|---|---|
|
|
111
|
+
| `list_tables` | All tables with row count estimates + sizes |
|
|
112
|
+
| `describe_schema` | Columns, PK, FKs, indexes, sample rows for a table |
|
|
113
|
+
| `sample_rows` | Random sample from a table |
|
|
114
|
+
| `safe_select` | Run a read-only SELECT (write keywords rejected) |
|
|
115
|
+
| `explain` | Get query plan (EXPLAIN FORMAT JSON) |
|
|
116
|
+
|
|
117
|
+
## Configuration
|
|
118
|
+
|
|
119
|
+
| Variable | Default | Purpose |
|
|
120
|
+
|---|---|---|
|
|
121
|
+
| `DATABASE_URL` | — (required) | Postgres connection URL |
|
|
122
|
+
| `TABULUS_MAX_ROWS` | `100` | Hard cap on rows returned by any tool |
|
|
123
|
+
| `TABULUS_SAMPLE_SIZE` | `3` | Sample rows included in `describe_schema` |
|
|
124
|
+
| `TABULUS_STATEMENT_TIMEOUT_MS` | `5000` | Server-side query timeout |
|
|
125
|
+
| `TABULUS_REDACT` | `off` | Set `on` to scrub PII (emails, API keys, JWTs, credit cards, phones, IPs) from `sample_rows`, `safe_select`, and `describe_schema` output before the agent sees it. Recommended for production. |
|
|
126
|
+
| `TABULUS_ALLOW_WRITES` | `false` | Set `true` to disable the write block (NOT recommended) |
|
|
127
|
+
|
|
128
|
+
## For teams
|
|
129
|
+
|
|
130
|
+
Tabulus core is free and MIT — and stays that way. If your organization is letting
|
|
131
|
+
AI agents touch real, sensitive databases and you need **audit logs, centrally
|
|
132
|
+
enforced redaction policy, column-level masking, or SSO**, we're exploring a
|
|
133
|
+
self-hosted Team tier (your data never routes through us).
|
|
134
|
+
|
|
135
|
+
👉 **[Tell us what you'd need →](https://github.com/WalkingMountain/tabulus/issues/1)**
|
|
136
|
+
|
|
137
|
+
## Roadmap
|
|
138
|
+
|
|
139
|
+
- v0.1 — Postgres parity, polished install
|
|
140
|
+
- v0.2 — SQLite adapter
|
|
141
|
+
- v0.3 — MySQL / MariaDB adapter
|
|
142
|
+
- v0.x — Tauri desktop GUI shell on top of the same core
|
|
143
|
+
- v1.0 — Stable, cross-platform, multi-DB
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
MIT. See [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Tabulus redactor demo
|
|
2
|
+
|
|
3
|
+
Records the before/after that is the whole pitch: an agent sampling a real table,
|
|
4
|
+
with and without PII redaction.
|
|
5
|
+
|
|
6
|
+
## Run it
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
# 1. throwaway Postgres (any will do)
|
|
10
|
+
export DATABASE_URL=postgres://postgres:dev@localhost:5433/postgres
|
|
11
|
+
|
|
12
|
+
# 2. install + seed fake-PII table
|
|
13
|
+
pip install -e .
|
|
14
|
+
psql "$DATABASE_URL" -f demo/seed.sql
|
|
15
|
+
|
|
16
|
+
# 3a. just watch it
|
|
17
|
+
bash demo/record.sh
|
|
18
|
+
|
|
19
|
+
# 3b. or record the GIF the README embeds (output path must match)
|
|
20
|
+
asciinema rec -c "bash demo/record.sh" demo/tabulus-redactor.cast
|
|
21
|
+
agg demo/tabulus-redactor.cast demo/tabulus-redactor.gif
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
All seed values are fake. Point this at a scratch database only — never prod.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Scare-then-relieve demo. Records cleanly under asciinema:
|
|
3
|
+
# asciinema rec -c "demo/record.sh" tabulus-redactor.cast
|
|
4
|
+
# Then: agg tabulus-redactor.cast tabulus-redactor.gif (or upload the .cast)
|
|
5
|
+
#
|
|
6
|
+
# Requires: a throwaway Postgres + DATABASE_URL set, and `pip install -e .`
|
|
7
|
+
# Seed first: psql "$DATABASE_URL" -f demo/seed.sql
|
|
8
|
+
set -euo pipefail
|
|
9
|
+
|
|
10
|
+
: "${DATABASE_URL:?set DATABASE_URL to a throwaway Postgres, e.g. postgres://postgres:dev@localhost:5433/postgres}"
|
|
11
|
+
|
|
12
|
+
pause() { sleep "${1:-1.4}"; }
|
|
13
|
+
say() { printf '\n\033[1;36m# %s\033[0m\n' "$1"; pause 1.2; }
|
|
14
|
+
|
|
15
|
+
# Run sample_rows through Tabulus' own db layer, redactor toggled by env.
|
|
16
|
+
sample() {
|
|
17
|
+
python - <<'PY'
|
|
18
|
+
import asyncio, json, os
|
|
19
|
+
from tabulus.config import load
|
|
20
|
+
from tabulus.db import get_pool, sample_rows, close_pool
|
|
21
|
+
async def main():
|
|
22
|
+
pool = await get_pool(load())
|
|
23
|
+
rows = await sample_rows(pool, "customers", 2)
|
|
24
|
+
print(json.dumps(rows, indent=2, default=str))
|
|
25
|
+
await close_pool()
|
|
26
|
+
asyncio.run(main())
|
|
27
|
+
PY
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
clear
|
|
31
|
+
say "Your AI agent samples a 'customers' table. Watch what reaches the model."
|
|
32
|
+
pause 1.6
|
|
33
|
+
|
|
34
|
+
say "WITHOUT Tabulus redaction (TABULUS_REDACT=off) — this is what most DB MCP servers send to the LLM:"
|
|
35
|
+
TABULUS_REDACT=off sample
|
|
36
|
+
pause 2.8
|
|
37
|
+
|
|
38
|
+
say "Emails. Stripe live keys. JWTs. Credit cards. SSNs. All shipped to Anthropic/OpenAI."
|
|
39
|
+
pause 2.4
|
|
40
|
+
|
|
41
|
+
say "WITH Tabulus (TABULUS_REDACT=on) — same query, scrubbed before the agent sees a thing:"
|
|
42
|
+
TABULUS_REDACT=on sample
|
|
43
|
+
pause 3.0
|
|
44
|
+
|
|
45
|
+
say "The agent still reasons over the shape. Your customers' data never left the building."
|
|
46
|
+
pause 2.0
|
|
47
|
+
printf '\n\033[1;32m pip install tabulus · TABULUS_REDACT=on\033[0m\n\n'
|
|
48
|
+
pause 2.0
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
-- Demo seed: a customers table stuffed with the exact kinds of PII/secrets
|
|
2
|
+
-- that leak into an LLM context when an agent samples a real table.
|
|
3
|
+
-- All values are fake. Run against a throwaway database only.
|
|
4
|
+
|
|
5
|
+
DROP TABLE IF EXISTS customers;
|
|
6
|
+
|
|
7
|
+
CREATE TABLE customers (
|
|
8
|
+
id serial PRIMARY KEY,
|
|
9
|
+
name text,
|
|
10
|
+
email text,
|
|
11
|
+
phone text,
|
|
12
|
+
ssn text,
|
|
13
|
+
credit_card text,
|
|
14
|
+
stripe_key text, -- secret accidentally stored in a row (it happens)
|
|
15
|
+
session_jwt text,
|
|
16
|
+
last_ip text
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
-- The stripe_key values are split with || so no secret-shaped literal lands in
|
|
20
|
+
-- git (GitHub push protection rejects contiguous sk_live_ patterns). Postgres
|
|
21
|
+
-- reconstructs the full value at query time, so the redactor still fires on it.
|
|
22
|
+
INSERT INTO customers (name, email, phone, ssn, credit_card, stripe_key, session_jwt, last_ip) VALUES
|
|
23
|
+
('Jane Acme', 'jane@acme.com', '+1 415 555 0132', '123-45-6789', '4242 4242 4242 4242',
|
|
24
|
+
'sk_' || 'live_4eC39HqLyjFAKEdemoT1zdp7dc',
|
|
25
|
+
'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.s5_3Vg7Qktype_demo_signature_xx', '203.0.113.7'),
|
|
26
|
+
('Raj Patel', 'raj.patel@globex.io','+44 20 7946 0991','987-65-4321', '5555 5555 5555 4444',
|
|
27
|
+
'sk_' || 'live_51HxQ2eLkCm0FAKEdemoKEYexampleZZ',
|
|
28
|
+
'eyJhbGciOiJIUzI1NiJ9.eyJ1aWQiOiI0NDcifQ.demo_sig_do_not_use_in_real_life', '198.51.100.23');
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="880" height="430" font-family="ui-monospace,SFMono-Regular,Menlo,Consolas,monospace" font-size="13">
|
|
2
|
+
<rect width="880" height="430" rx="10" fill="#0d1117"/>
|
|
3
|
+
<text x="24" y="34" fill="#e6edf3" font-size="15" font-weight="700">sample_rows("customers") → what your AI agent receives</text>
|
|
4
|
+
|
|
5
|
+
<!-- OFF panel -->
|
|
6
|
+
<rect x="24" y="54" width="404" height="352" rx="8" fill="#161b22" stroke="#f85149" stroke-width="1.5"/>
|
|
7
|
+
<text x="40" y="80" fill="#f85149" font-weight="700">TABULUS_REDACT=off</text>
|
|
8
|
+
<text x="40" y="98" fill="#8b949e" font-size="11">leaks to the LLM ↓</text>
|
|
9
|
+
<g fill="#c9d1d9">
|
|
10
|
+
<text x="40" y="128">{</text>
|
|
11
|
+
<text x="52" y="146">"name": <tspan fill="#a5d6ff">"Jane Acme"</tspan>,</text>
|
|
12
|
+
<text x="52" y="164">"email": <tspan fill="#ff7b72">"jane@acme.com"</tspan>,</text>
|
|
13
|
+
<text x="52" y="182">"phone": <tspan fill="#ff7b72">"+1 415 555 0132"</tspan>,</text>
|
|
14
|
+
<text x="52" y="200">"ssn": <tspan fill="#ff7b72">"123-45-6789"</tspan>,</text>
|
|
15
|
+
<text x="52" y="218">"credit_card": <tspan fill="#ff7b72">"4242 4242…4242"</tspan>,</text>
|
|
16
|
+
<text x="52" y="236">"stripe_key": <tspan fill="#ff7b72">"sk_live_4eC39…"</tspan>,</text>
|
|
17
|
+
<text x="52" y="254">"session_jwt": <tspan fill="#ff7b72">"eyJhbG…3Vg7Qk"</tspan>,</text>
|
|
18
|
+
<text x="52" y="272">"last_ip": <tspan fill="#ff7b72">"203.0.113.7"</tspan></text>
|
|
19
|
+
<text x="40" y="290">}</text>
|
|
20
|
+
</g>
|
|
21
|
+
<text x="40" y="380" fill="#f85149" font-size="11.5">✗ customer PII + a live-shaped secret shipped to the model</text>
|
|
22
|
+
|
|
23
|
+
<!-- arrow -->
|
|
24
|
+
<text x="436" y="240" fill="#8b949e" font-size="22">→</text>
|
|
25
|
+
|
|
26
|
+
<!-- ON panel -->
|
|
27
|
+
<rect x="452" y="54" width="404" height="352" rx="8" fill="#161b22" stroke="#3fb950" stroke-width="1.5"/>
|
|
28
|
+
<text x="468" y="80" fill="#3fb950" font-weight="700">TABULUS_REDACT=on</text>
|
|
29
|
+
<text x="468" y="98" fill="#8b949e" font-size="11">scrubbed before it leaves ↓</text>
|
|
30
|
+
<g fill="#c9d1d9">
|
|
31
|
+
<text x="468" y="128">{</text>
|
|
32
|
+
<text x="480" y="146">"name": <tspan fill="#a5d6ff">"Jane Acme"</tspan>,</text>
|
|
33
|
+
<text x="480" y="164">"email": <tspan fill="#3fb950">"[REDACTED:email]"</tspan>,</text>
|
|
34
|
+
<text x="480" y="182">"phone": <tspan fill="#3fb950">"[REDACTED:phone]"</tspan>,</text>
|
|
35
|
+
<text x="480" y="200">"ssn": <tspan fill="#3fb950">"[REDACTED:ssn]"</tspan>,</text>
|
|
36
|
+
<text x="480" y="218">"credit_card": <tspan fill="#3fb950">"[REDACTED:credit_card]"</tspan>,</text>
|
|
37
|
+
<text x="480" y="236">"stripe_key": <tspan fill="#3fb950">"[REDACTED:stripe_key]"</tspan>,</text>
|
|
38
|
+
<text x="480" y="254">"session_jwt": <tspan fill="#3fb950">"[REDACTED:jwt]"</tspan>,</text>
|
|
39
|
+
<text x="480" y="272">"last_ip": <tspan fill="#3fb950">"[REDACTED:ipv4]"</tspan></text>
|
|
40
|
+
<text x="468" y="290">}</text>
|
|
41
|
+
</g>
|
|
42
|
+
<text x="468" y="380" fill="#3fb950" font-size="11.5">✓ agent still reasons over the shape — data never left the building</text>
|
|
43
|
+
</svg>
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""PII / secret redactor for tool output before LLM sees it.
|
|
2
|
+
|
|
3
|
+
Database tools (sample_rows, safe_select, describe_schema's sample_rows)
|
|
4
|
+
return rows from user tables. Those rows often contain customer emails,
|
|
5
|
+
API keys, JWTs, credit cards, SSNs, phone numbers, IPs. Without redaction
|
|
6
|
+
that data ships to Anthropic on every query — brand-killing leak.
|
|
7
|
+
|
|
8
|
+
Sentinel format: `[REDACTED:type]` — preserves enough structure for the
|
|
9
|
+
LLM to reason ("Stripe call failed with [REDACTED:api_key]") without
|
|
10
|
+
leaking the value.
|
|
11
|
+
|
|
12
|
+
Conservative philosophy: false positives are cheap, false negatives kill.
|
|
13
|
+
|
|
14
|
+
Off by default — set TABULUS_REDACT=on to enable.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Each entry is (kind, pattern, replacement). `replacement` may contain `\1`
|
|
25
|
+
# backreferences to preserve a captured prefix (used by secret_kv to keep the
|
|
26
|
+
# key name and mask only the value). Default replacement is the sentinel.
|
|
27
|
+
def _sentinel(kind: str) -> str:
|
|
28
|
+
return f"[REDACTED:{kind}]"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_PATTERNS: list[tuple[str, re.Pattern[str], str]] = [
|
|
32
|
+
# ── PEM private key blocks (highest value — never let one leak) ──────────
|
|
33
|
+
(
|
|
34
|
+
"private_key",
|
|
35
|
+
re.compile(
|
|
36
|
+
r"-----BEGIN[A-Z0-9 ]*PRIVATE KEY-----.*?-----END[A-Z0-9 ]*PRIVATE KEY-----", re.DOTALL
|
|
37
|
+
),
|
|
38
|
+
_sentinel("private_key"),
|
|
39
|
+
),
|
|
40
|
+
# ── JWT (eyJ... three segments) ─────────────────────────────────────────
|
|
41
|
+
(
|
|
42
|
+
"jwt",
|
|
43
|
+
re.compile(r"\beyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"),
|
|
44
|
+
_sentinel("jwt"),
|
|
45
|
+
),
|
|
46
|
+
# ── Vendor API keys with distinctive prefixes ───────────────────────────
|
|
47
|
+
("anthropic_key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}"), _sentinel("anthropic_key")),
|
|
48
|
+
("openai_key", re.compile(r"\bsk-(?:proj-)?[A-Za-z0-9]{20,}"), _sentinel("openai_key")),
|
|
49
|
+
(
|
|
50
|
+
"stripe_key",
|
|
51
|
+
re.compile(r"\b(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{20,}"),
|
|
52
|
+
_sentinel("stripe_key"),
|
|
53
|
+
),
|
|
54
|
+
(
|
|
55
|
+
"github_token",
|
|
56
|
+
re.compile(r"\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}"),
|
|
57
|
+
_sentinel("github_token"),
|
|
58
|
+
),
|
|
59
|
+
("slack_token", re.compile(r"\bxox[bpars]-[A-Za-z0-9-]{20,}"), _sentinel("slack_token")),
|
|
60
|
+
("aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b"), _sentinel("aws_access_key")),
|
|
61
|
+
("google_api_key", re.compile(r"\bAIza[0-9A-Za-z_-]{35}\b"), _sentinel("google_api_key")),
|
|
62
|
+
# ── Google OAuth access token ───────────────────────────────────────────
|
|
63
|
+
("google_oauth", re.compile(r"\bya29\.[A-Za-z0-9._-]{20,}"), _sentinel("google_oauth")),
|
|
64
|
+
# ── Generic secret in key=value / key: value form (keep key, mask value) ─
|
|
65
|
+
# Gated on a secret-y key name so false positives stay low. Per the module's
|
|
66
|
+
# philosophy, masking a benign value here is cheaper than leaking a real one.
|
|
67
|
+
(
|
|
68
|
+
"secret",
|
|
69
|
+
re.compile(
|
|
70
|
+
r"(?i)(\b\w*(?:password|passwd|pwd|secret|api[_-]?key|apikey|access[_-]?key"
|
|
71
|
+
r"|access[_-]?token|client[_-]?secret|private[_-]?key|auth[_-]?token"
|
|
72
|
+
r"|credentials?|token)\w*\s*[:=]\s*[\"']?)([^\s\"',;]{6,})"
|
|
73
|
+
),
|
|
74
|
+
r"\1[REDACTED:secret]",
|
|
75
|
+
),
|
|
76
|
+
# ── Bearer / Token authorization schemes ────────────────────────────────
|
|
77
|
+
(
|
|
78
|
+
"bearer_token",
|
|
79
|
+
re.compile(r"(?i)\b(?:bearer|token)\s+[A-Za-z0-9._~+/-]{20,}={0,2}"),
|
|
80
|
+
_sentinel("bearer_token"),
|
|
81
|
+
),
|
|
82
|
+
# ── Credit card (13-19 digits, common groupings) ────────────────────────
|
|
83
|
+
("credit_card", re.compile(r"\b(?:\d[ -]*?){13,19}\b"), _sentinel("credit_card")),
|
|
84
|
+
# ── SSN (US) ────────────────────────────────────────────────────────────
|
|
85
|
+
("ssn", re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), _sentinel("ssn")),
|
|
86
|
+
# ── Email ───────────────────────────────────────────────────────────────
|
|
87
|
+
(
|
|
88
|
+
"email",
|
|
89
|
+
re.compile(
|
|
90
|
+
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?"
|
|
91
|
+
r"(?:\.[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?)+\b"
|
|
92
|
+
),
|
|
93
|
+
_sentinel("email"),
|
|
94
|
+
),
|
|
95
|
+
# ── Phone, international: requires a leading + (avoids matching bare number
|
|
96
|
+
# runs like "2020 2021 2022"). Groups need no internal separators. ─────
|
|
97
|
+
(
|
|
98
|
+
"phone",
|
|
99
|
+
re.compile(
|
|
100
|
+
r"(?<![A-Za-z0-9])\+\d{1,3}[\s.-]?\(?\d{2,5}\)?(?:[\s.-]?\d{2,5}){1,3}(?![A-Za-z0-9])"
|
|
101
|
+
),
|
|
102
|
+
_sentinel("phone"),
|
|
103
|
+
),
|
|
104
|
+
# ── Phone, US/local: requires separators between 3-3-4 groups ────────────
|
|
105
|
+
(
|
|
106
|
+
"phone",
|
|
107
|
+
re.compile(r"(?<![A-Za-z0-9])\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}(?![A-Za-z0-9])"),
|
|
108
|
+
_sentinel("phone"),
|
|
109
|
+
),
|
|
110
|
+
# ── IPv4 ────────────────────────────────────────────────────────────────
|
|
111
|
+
("ipv4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), _sentinel("ipv4")),
|
|
112
|
+
# ── IPv6 (handles :: compression, blocks Class::Path false matches) ─────
|
|
113
|
+
(
|
|
114
|
+
"ipv6",
|
|
115
|
+
re.compile(r"(?<![A-Za-z0-9:])(?:[A-Fa-f0-9]{0,4}:){2,}[A-Fa-f0-9]{0,4}(?![A-Za-z0-9:])"),
|
|
116
|
+
_sentinel("ipv6"),
|
|
117
|
+
),
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# Column/field names whose VALUE is a secret regardless of its content. A bare
|
|
122
|
+
# password or token sitting in its own field has no in-text signal (no `key=`
|
|
123
|
+
# prefix), so we key off the column name instead. Matched as a substring, so
|
|
124
|
+
# `db_password`, `user_api_key`, `oauth_token` all trigger.
|
|
125
|
+
_SECRET_KEY_RE = re.compile(
|
|
126
|
+
r"(?i)(?:password|passwd|pwd|secret|api[_-]?key|apikey|access[_-]?key"
|
|
127
|
+
r"|access[_-]?token|client[_-]?secret|private[_-]?key|auth[_-]?token"
|
|
128
|
+
r"|credentials?|token)"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def is_enabled() -> bool:
|
|
133
|
+
return os.environ.get("TABULUS_REDACT", "off").lower() in ("on", "true", "1", "yes")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def redact_string(s: str) -> str:
|
|
137
|
+
"""Replace sensitive substrings with `[REDACTED:type]` sentinels. Idempotent."""
|
|
138
|
+
if not isinstance(s, str) or not s:
|
|
139
|
+
return s
|
|
140
|
+
out = s
|
|
141
|
+
for _kind, pattern, replacement in _PATTERNS:
|
|
142
|
+
out = pattern.sub(replacement, out)
|
|
143
|
+
return out
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def redact_value(v: Any) -> Any:
|
|
147
|
+
"""Recursively redact str / list / dict / tuple. Dict KEYS NOT redacted."""
|
|
148
|
+
if isinstance(v, str):
|
|
149
|
+
return redact_string(v)
|
|
150
|
+
if isinstance(v, dict):
|
|
151
|
+
out = {}
|
|
152
|
+
for k, val in v.items():
|
|
153
|
+
if (
|
|
154
|
+
isinstance(k, str)
|
|
155
|
+
and _SECRET_KEY_RE.search(k)
|
|
156
|
+
and isinstance(val, (str, int, float))
|
|
157
|
+
and val != ""
|
|
158
|
+
):
|
|
159
|
+
out[k] = "[REDACTED:secret]"
|
|
160
|
+
else:
|
|
161
|
+
out[k] = redact_value(val)
|
|
162
|
+
return out
|
|
163
|
+
if isinstance(v, list):
|
|
164
|
+
return [redact_value(item) for item in v]
|
|
165
|
+
if isinstance(v, tuple):
|
|
166
|
+
return tuple(redact_value(item) for item in v)
|
|
167
|
+
return v
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def maybe_redact(v: Any) -> Any:
|
|
171
|
+
"""No-op when TABULUS_REDACT is off, redact otherwise."""
|
|
172
|
+
return redact_value(v) if is_enabled() else v
|
|
@@ -31,12 +31,24 @@ from tabulus.redactor import (
|
|
|
31
31
|
("AKIAIOSFODNN7EXAMPLE", "aws_access_key"),
|
|
32
32
|
("AIzaSyDdI0hCZtE6vySjMm-WEfRq3CPzqKqqsHI", "google_api_key"),
|
|
33
33
|
("Authorization: Bearer abcdef0123456789abcdef0123456789", "bearer_token"),
|
|
34
|
+
("Authorization: Token a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6", "bearer_token"),
|
|
34
35
|
("ssn 123-45-6789", "ssn"),
|
|
35
36
|
("Customer foo@example.com", "email"),
|
|
36
37
|
("call +1 415-555-1234", "phone"),
|
|
37
38
|
("at 192.168.1.42", "ipv4"),
|
|
38
39
|
("addr ::1 down", "ipv6"),
|
|
39
40
|
("card 4111 1111 1111 1111", "credit_card"),
|
|
41
|
+
# ── regression: adversarial false-negatives found in audit ──────────
|
|
42
|
+
(
|
|
43
|
+
"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA\n-----END RSA PRIVATE KEY-----",
|
|
44
|
+
"private_key",
|
|
45
|
+
),
|
|
46
|
+
("ya29.a0AfH6SMBx7kJq_longtokenvaluehere1234567890abcdef", "google_oauth"),
|
|
47
|
+
("aws_secret_access_key=wJalrXUtnFEMIK7MDENGbPxRfiCYEXAMPLEKEY", "secret"),
|
|
48
|
+
("api_key: 9f8e7d6c5b4a39281706f5e4d3c2b1a0", "secret"),
|
|
49
|
+
("client_secret='abcdef123456ghijkl'", "secret"),
|
|
50
|
+
("call +91 98765 43210", "phone"), # international, 5-5 grouping
|
|
51
|
+
("ring (415) 555-0132 now", "phone"), # US parens form
|
|
40
52
|
],
|
|
41
53
|
)
|
|
42
54
|
def test_pattern_redacted(payload, kind):
|
|
@@ -92,6 +104,40 @@ def test_list_of_rows():
|
|
|
92
104
|
assert out[0]["amount"] == 100
|
|
93
105
|
|
|
94
106
|
|
|
107
|
+
def test_secret_column_value_masked():
|
|
108
|
+
"""A value in a secret-named column is masked regardless of its content —
|
|
109
|
+
a bare password has no in-text signal, so we key off the column name."""
|
|
110
|
+
inp = {
|
|
111
|
+
"id": 7,
|
|
112
|
+
"name": "Jane",
|
|
113
|
+
"db_password": "hunter2supersecret",
|
|
114
|
+
"user_api_key": "plainlookingvalue123",
|
|
115
|
+
"oauth_token": "nostructurehere",
|
|
116
|
+
"city": "Berlin",
|
|
117
|
+
}
|
|
118
|
+
out = redact_value(inp)
|
|
119
|
+
assert out["db_password"] == "[REDACTED:secret]"
|
|
120
|
+
assert out["user_api_key"] == "[REDACTED:secret]"
|
|
121
|
+
assert out["oauth_token"] == "[REDACTED:secret]"
|
|
122
|
+
# Non-secret columns untouched
|
|
123
|
+
assert out["name"] == "Jane"
|
|
124
|
+
assert out["city"] == "Berlin"
|
|
125
|
+
assert out["id"] == 7
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_no_false_positive_number_runs():
|
|
129
|
+
"""Bare digit runs and UUIDs must NOT be mistaken for phone numbers."""
|
|
130
|
+
assert redact_string("years 2020 2021 2022 were busy") == "years 2020 2021 2022 were busy"
|
|
131
|
+
uuid = "550e8400-e29b-41d4-a716-446655440000"
|
|
132
|
+
assert redact_string(uuid) == uuid
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def test_benign_keyvalue_preserved():
|
|
136
|
+
"""key=value where the key isn't secret-y stays intact (no over-redaction)."""
|
|
137
|
+
assert redact_string("monkey=banana12345") == "monkey=banana12345"
|
|
138
|
+
assert redact_string("status=active") == "status=active"
|
|
139
|
+
|
|
140
|
+
|
|
95
141
|
def test_non_string_passthrough():
|
|
96
142
|
assert redact_value(42) == 42
|
|
97
143
|
assert redact_value(3.14) == 3.14
|
tabulus-0.0.3/README.md
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
# Tabulus
|
|
2
|
-
|
|
3
|
-
**A Postgres MCP server built for AI agents.**
|
|
4
|
-
|
|
5
|
-
Tabulus is the database workbench for the AI-augmented developer. Connect Claude
|
|
6
|
-
Code, Cursor, or any MCP-compatible client to your Postgres database and let the
|
|
7
|
-
agent introspect the schema, sample data, and write safe queries — without
|
|
8
|
-
copy-pasting schemas into chat windows.
|
|
9
|
-
|
|
10
|
-
## Why
|
|
11
|
-
|
|
12
|
-
Every modern dev workflow now includes an AI agent. Every DB GUI was designed
|
|
13
|
-
before that was true. Tabulus flips the model: **the agent is a first-class
|
|
14
|
-
user, not a sidebar feature.**
|
|
15
|
-
|
|
16
|
-
What that means in practice:
|
|
17
|
-
|
|
18
|
-
- Schema introspection optimized for LLM context windows (compact JSON, foreign
|
|
19
|
-
keys flattened, sample rows inline).
|
|
20
|
-
- Read-only by default — `INSERT`/`UPDATE`/`DELETE`/`DDL` are rejected at the
|
|
21
|
-
gateway. The agent can't drop your tables.
|
|
22
|
-
- `EXPLAIN` exposed as a tool so the agent can reason about query plans before
|
|
23
|
-
proposing optimizations.
|
|
24
|
-
- Statement timeout + row cap enforced server-side. No agent can DOS your
|
|
25
|
-
database by accident.
|
|
26
|
-
- Opt-in PII redactor (`TABULUS_REDACT=on`) scrubs emails, API keys, JWTs,
|
|
27
|
-
credit cards, phones, and IPs from tool output before the agent sees them.
|
|
28
|
-
|
|
29
|
-
## Status
|
|
30
|
-
|
|
31
|
-
**v0.0.1 — alpha.** Postgres only. Stdio MCP transport only. No GUI yet.
|
|
32
|
-
|
|
33
|
-
## Install
|
|
34
|
-
|
|
35
|
-
```bash
|
|
36
|
-
pip install tabulus
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
## Run
|
|
40
|
-
|
|
41
|
-
```bash
|
|
42
|
-
export DATABASE_URL=postgres://user:pass@host:5432/dbname
|
|
43
|
-
tabulus
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
Then point your MCP client at the `tabulus` command.
|
|
47
|
-
|
|
48
|
-
### Claude Code (project-level)
|
|
49
|
-
|
|
50
|
-
Create `.mcp.json` in your project root:
|
|
51
|
-
|
|
52
|
-
```jsonc
|
|
53
|
-
{
|
|
54
|
-
"mcpServers": {
|
|
55
|
-
"tabulus": {
|
|
56
|
-
"command": "tabulus",
|
|
57
|
-
"args": [],
|
|
58
|
-
"env": {
|
|
59
|
-
"DATABASE_URL": "postgres://user:pass@host:5432/dbname"
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
Restart Claude Code in that directory and approve the trust prompt.
|
|
67
|
-
|
|
68
|
-
### Claude Code (user-level via CLI)
|
|
69
|
-
|
|
70
|
-
```bash
|
|
71
|
-
claude mcp add tabulus "$(which tabulus)" --env DATABASE_URL=postgres://user:pass@host:5432/dbname
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### Cursor
|
|
75
|
-
|
|
76
|
-
Add to `~/.cursor/mcp_servers.json`:
|
|
77
|
-
|
|
78
|
-
```jsonc
|
|
79
|
-
{
|
|
80
|
-
"mcpServers": {
|
|
81
|
-
"tabulus": {
|
|
82
|
-
"command": "tabulus",
|
|
83
|
-
"env": { "DATABASE_URL": "postgres://user:pass@host:5432/dbname" }
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
## Tools
|
|
90
|
-
|
|
91
|
-
| Tool | Description |
|
|
92
|
-
|---|---|
|
|
93
|
-
| `list_tables` | All tables with row count estimates + sizes |
|
|
94
|
-
| `describe_schema` | Columns, PK, FKs, indexes, sample rows for a table |
|
|
95
|
-
| `sample_rows` | Random sample from a table |
|
|
96
|
-
| `safe_select` | Run a read-only SELECT (write keywords rejected) |
|
|
97
|
-
| `explain` | Get query plan (EXPLAIN FORMAT JSON) |
|
|
98
|
-
|
|
99
|
-
## Configuration
|
|
100
|
-
|
|
101
|
-
| Variable | Default | Purpose |
|
|
102
|
-
|---|---|---|
|
|
103
|
-
| `DATABASE_URL` | — (required) | Postgres connection URL |
|
|
104
|
-
| `TABULUS_MAX_ROWS` | `100` | Hard cap on rows returned by any tool |
|
|
105
|
-
| `TABULUS_SAMPLE_SIZE` | `3` | Sample rows included in `describe_schema` |
|
|
106
|
-
| `TABULUS_STATEMENT_TIMEOUT_MS` | `5000` | Server-side query timeout |
|
|
107
|
-
| `TABULUS_REDACT` | `off` | Set `on` to scrub PII (emails, API keys, JWTs, credit cards, phones, IPs) from `sample_rows`, `safe_select`, and `describe_schema` output before the agent sees it. Recommended for production. |
|
|
108
|
-
| `TABULUS_ALLOW_WRITES` | `false` | Set `true` to disable the write block (NOT recommended) |
|
|
109
|
-
|
|
110
|
-
## Roadmap
|
|
111
|
-
|
|
112
|
-
- v0.1 — Postgres parity, polished install
|
|
113
|
-
- v0.2 — SQLite adapter
|
|
114
|
-
- v0.3 — MySQL / MariaDB adapter
|
|
115
|
-
- v0.x — Tauri desktop GUI shell on top of the same core
|
|
116
|
-
- v1.0 — Stable, cross-platform, multi-DB
|
|
117
|
-
|
|
118
|
-
## License
|
|
119
|
-
|
|
120
|
-
MIT. See [LICENSE](./LICENSE).
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
"""PII / secret redactor for tool output before LLM sees it.
|
|
2
|
-
|
|
3
|
-
Database tools (sample_rows, safe_select, describe_schema's sample_rows)
|
|
4
|
-
return rows from user tables. Those rows often contain customer emails,
|
|
5
|
-
API keys, JWTs, credit cards, SSNs, phone numbers, IPs. Without redaction
|
|
6
|
-
that data ships to Anthropic on every query — brand-killing leak.
|
|
7
|
-
|
|
8
|
-
Sentinel format: `[REDACTED:type]` — preserves enough structure for the
|
|
9
|
-
LLM to reason ("Stripe call failed with [REDACTED:api_key]") without
|
|
10
|
-
leaking the value.
|
|
11
|
-
|
|
12
|
-
Conservative philosophy: false positives are cheap, false negatives kill.
|
|
13
|
-
|
|
14
|
-
Off by default — set TABULUS_REDACT=on to enable.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import os
|
|
20
|
-
import re
|
|
21
|
-
from typing import Any
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
|
25
|
-
# ── JWT (eyJ... three segments) ─────────────────────────────────────────
|
|
26
|
-
("jwt", re.compile(r"\beyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+")),
|
|
27
|
-
# ── Vendor API keys with distinctive prefixes ───────────────────────────
|
|
28
|
-
("anthropic_key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}")),
|
|
29
|
-
("openai_key", re.compile(r"\bsk-(?:proj-)?[A-Za-z0-9]{20,}")),
|
|
30
|
-
("stripe_key", re.compile(r"\b(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{20,}")),
|
|
31
|
-
("github_token", re.compile(r"\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}")),
|
|
32
|
-
("slack_token", re.compile(r"\bxox[bpars]-[A-Za-z0-9-]{20,}")),
|
|
33
|
-
("aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
|
|
34
|
-
("google_api_key", re.compile(r"\bAIza[0-9A-Za-z_-]{35}\b")),
|
|
35
|
-
# ── Bearer / Authorization headers ──────────────────────────────────────
|
|
36
|
-
("bearer_token", re.compile(r"(?i)bearer\s+[A-Za-z0-9._~+/-]{20,}={0,2}")),
|
|
37
|
-
# ── Credit card (13-19 digits, common groupings) ────────────────────────
|
|
38
|
-
("credit_card", re.compile(r"\b(?:\d[ -]*?){13,19}\b")),
|
|
39
|
-
# ── SSN (US) ────────────────────────────────────────────────────────────
|
|
40
|
-
("ssn", re.compile(r"\b\d{3}-\d{2}-\d{4}\b")),
|
|
41
|
-
# ── Email ───────────────────────────────────────────────────────────────
|
|
42
|
-
(
|
|
43
|
-
"email",
|
|
44
|
-
re.compile(
|
|
45
|
-
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?"
|
|
46
|
-
r"(?:\.[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?)+\b"
|
|
47
|
-
),
|
|
48
|
-
),
|
|
49
|
-
# ── Phone (international + US, conservative) ────────────────────────────
|
|
50
|
-
(
|
|
51
|
-
"phone",
|
|
52
|
-
re.compile(
|
|
53
|
-
r"(?<![A-Za-z0-9])\+?\d{1,3}[\s.-]?\(?\d{2,4}\)?"
|
|
54
|
-
r"[\s.-]?\d{3,4}[\s.-]?\d{3,4}(?![A-Za-z0-9])"
|
|
55
|
-
),
|
|
56
|
-
),
|
|
57
|
-
# ── IPv4 ────────────────────────────────────────────────────────────────
|
|
58
|
-
("ipv4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
|
|
59
|
-
# ── IPv6 (handles :: compression, blocks Class::Path false matches) ─────
|
|
60
|
-
(
|
|
61
|
-
"ipv6",
|
|
62
|
-
re.compile(r"(?<![A-Za-z0-9:])(?:[A-Fa-f0-9]{0,4}:){2,}[A-Fa-f0-9]{0,4}(?![A-Za-z0-9:])"),
|
|
63
|
-
),
|
|
64
|
-
]
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def is_enabled() -> bool:
|
|
68
|
-
return os.environ.get("TABULUS_REDACT", "off").lower() in ("on", "true", "1", "yes")
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def redact_string(s: str) -> str:
|
|
72
|
-
"""Replace sensitive substrings with `[REDACTED:type]` sentinels. Idempotent."""
|
|
73
|
-
if not isinstance(s, str) or not s:
|
|
74
|
-
return s
|
|
75
|
-
out = s
|
|
76
|
-
for kind, pattern in _PATTERNS:
|
|
77
|
-
out = pattern.sub(f"[REDACTED:{kind}]", out)
|
|
78
|
-
return out
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def redact_value(v: Any) -> Any:
|
|
82
|
-
"""Recursively redact str / list / dict / tuple. Dict KEYS NOT redacted."""
|
|
83
|
-
if isinstance(v, str):
|
|
84
|
-
return redact_string(v)
|
|
85
|
-
if isinstance(v, dict):
|
|
86
|
-
return {k: redact_value(val) for k, val in v.items()}
|
|
87
|
-
if isinstance(v, list):
|
|
88
|
-
return [redact_value(item) for item in v]
|
|
89
|
-
if isinstance(v, tuple):
|
|
90
|
-
return tuple(redact_value(item) for item in v)
|
|
91
|
-
return v
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def maybe_redact(v: Any) -> Any:
|
|
95
|
-
"""No-op when TABULUS_REDACT is off, redact otherwise."""
|
|
96
|
-
return redact_value(v) if is_enabled() else v
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|