tablebridge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tablebridge/__init__.py +13 -0
- tablebridge/config.py +43 -0
- tablebridge/db.py +161 -0
- tablebridge/server.py +91 -0
- tablebridge-0.1.0.dist-info/METADATA +140 -0
- tablebridge-0.1.0.dist-info/RECORD +9 -0
- tablebridge-0.1.0.dist-info/WHEEL +4 -0
- tablebridge-0.1.0.dist-info/entry_points.txt +2 -0
- tablebridge-0.1.0.dist-info/licenses/LICENSE +21 -0
tablebridge/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""tablebridge — query your scattered CSV / Parquet / JSON files with SQL, via MCP.
|
|
2
|
+
|
|
3
|
+
Points a DuckDB engine at a directory of tabular files, exposes each as a SQL
|
|
4
|
+
view, and lets an agent run read-only SQL (including JOINs across files) — so a
|
|
5
|
+
pile of exports becomes one queryable source of truth. Sandboxed to a single
|
|
6
|
+
data directory and read-only by default.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .config import Config
|
|
10
|
+
from .db import TableBridge, TableBridgeError
|
|
11
|
+
|
|
12
|
+
__all__ = ["Config", "TableBridge", "TableBridgeError", "__version__"]
|
|
13
|
+
__version__ = "0.1.0"
|
tablebridge/config.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Environment-driven configuration for the tablebridge server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
DEFAULT_MAX_ROWS = 1000
|
|
10
|
+
# File extensions we expose as SQL views, mapped to the DuckDB reader function.
|
|
11
|
+
READERS = {
|
|
12
|
+
".csv": "read_csv_auto",
|
|
13
|
+
".tsv": "read_csv_auto",
|
|
14
|
+
".parquet": "read_parquet",
|
|
15
|
+
".json": "read_json_auto",
|
|
16
|
+
".ndjson": "read_json_auto",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class Config:
|
|
22
|
+
"""Effective server configuration, sourced from the environment."""
|
|
23
|
+
|
|
24
|
+
data_dir: Path = Path(".")
|
|
25
|
+
max_rows: int = DEFAULT_MAX_ROWS
|
|
26
|
+
recursive: bool = True
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_env(cls, env: dict[str, str] | None = None) -> Config:
|
|
30
|
+
src = os.environ if env is None else env
|
|
31
|
+
return cls(
|
|
32
|
+
data_dir=Path(src.get("TABLEBRIDGE_DATA_DIR", ".")).expanduser().resolve(),
|
|
33
|
+
max_rows=int(src.get("TABLEBRIDGE_MAX_ROWS", str(DEFAULT_MAX_ROWS))),
|
|
34
|
+
recursive=src.get("TABLEBRIDGE_RECURSIVE", "1").lower() not in ("0", "false", "no"),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def as_dict(self) -> dict[str, object]:
|
|
38
|
+
return {
|
|
39
|
+
"data_dir": str(self.data_dir),
|
|
40
|
+
"max_rows": self.max_rows,
|
|
41
|
+
"recursive": self.recursive,
|
|
42
|
+
"supported_extensions": sorted(READERS),
|
|
43
|
+
}
|
tablebridge/db.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""DuckDB engine: load a directory of tabular files as in-memory tables and run
|
|
2
|
+
read-only SQL over them.
|
|
3
|
+
|
|
4
|
+
Security posture:
|
|
5
|
+
- Files are **materialized** into in-memory tables at scan time, so queries never
|
|
6
|
+
touch the filesystem afterward.
|
|
7
|
+
- Query SQL is validated to be a single read-only statement, and raw file-reader
|
|
8
|
+
functions (read_csv, read_parquet, glob, copy, attach, …) are rejected — an
|
|
9
|
+
agent cannot read a path outside the configured data directory.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import contextlib
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from .config import READERS, Config
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TableBridgeError(RuntimeError):
|
|
23
|
+
"""A user-facing error (bad SQL, unknown table, load failure)."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_ALLOWED_START = {
|
|
27
|
+
"SELECT", "WITH", "FROM", "DESCRIBE", "SUMMARIZE", "SHOW", "EXPLAIN", "VALUES", "TABLE",
|
|
28
|
+
}
|
|
29
|
+
_FORBIDDEN = re.compile(
|
|
30
|
+
r"\b(read_csv|read_csv_auto|read_parquet|read_json|read_json_auto|read_ndjson|"
|
|
31
|
+
r"read_text|read_blob|parquet_scan|glob|copy|attach|detach|install|load|export|import)\b",
|
|
32
|
+
re.IGNORECASE,
|
|
33
|
+
)
|
|
34
|
+
_IDENT = re.compile(r"\W+")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _table_name(path: Path, taken: set[str]) -> str:
|
|
38
|
+
base = _IDENT.sub("_", path.stem).strip("_").lower() or "table"
|
|
39
|
+
name, i = base, 2
|
|
40
|
+
while name in taken:
|
|
41
|
+
name, i = f"{base}_{i}", i + 1
|
|
42
|
+
return name
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def validate_sql(sql: str) -> str:
|
|
46
|
+
"""Return the SQL if it is a single safe read-only statement, else raise."""
|
|
47
|
+
stmts = [s for s in (part.strip() for part in sql.split(";")) if s]
|
|
48
|
+
if len(stmts) != 1:
|
|
49
|
+
raise TableBridgeError("Provide exactly one SQL statement.")
|
|
50
|
+
stmt = stmts[0]
|
|
51
|
+
first = stmt.split(None, 1)[0].upper() if stmt.split() else ""
|
|
52
|
+
if first not in _ALLOWED_START:
|
|
53
|
+
raise TableBridgeError(
|
|
54
|
+
f"Only read-only queries are allowed (got '{first or '?'}'). "
|
|
55
|
+
"Use SELECT / WITH / DESCRIBE / SUMMARIZE / SHOW."
|
|
56
|
+
)
|
|
57
|
+
if _FORBIDDEN.search(stmt):
|
|
58
|
+
raise TableBridgeError(
|
|
59
|
+
"Raw file access functions are not allowed. Query the registered tables "
|
|
60
|
+
"by name (see list_sources)."
|
|
61
|
+
)
|
|
62
|
+
return stmt
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TableBridge:
|
|
66
|
+
"""Loads a data directory into DuckDB and answers read-only queries."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, config: Config, con: Any = None) -> None:
|
|
69
|
+
self._config = config
|
|
70
|
+
self._registry: dict[str, dict[str, str]] = {}
|
|
71
|
+
self._own_con = con is None
|
|
72
|
+
self._con = con if con is not None else self._new_con()
|
|
73
|
+
self.scan()
|
|
74
|
+
|
|
75
|
+
def _new_con(self) -> Any:
|
|
76
|
+
import duckdb # noqa: PLC0415
|
|
77
|
+
|
|
78
|
+
return duckdb.connect(":memory:")
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def config(self) -> Config:
|
|
82
|
+
return self._config
|
|
83
|
+
|
|
84
|
+
# -- loading -------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
def scan(self) -> int:
|
|
87
|
+
"""(Re)load supported files under the data dir as in-memory tables.
|
|
88
|
+
|
|
89
|
+
Reconnects first (when we own the connection) so a prior scan's
|
|
90
|
+
``enable_external_access=false`` lock is reset and files can be read again.
|
|
91
|
+
"""
|
|
92
|
+
if self._own_con:
|
|
93
|
+
self._con = self._new_con()
|
|
94
|
+
self._registry.clear()
|
|
95
|
+
pattern = "**/*" if self._config.recursive else "*"
|
|
96
|
+
taken: set[str] = set()
|
|
97
|
+
for path in sorted(self._config.data_dir.glob(pattern)):
|
|
98
|
+
reader = READERS.get(path.suffix.lower())
|
|
99
|
+
if not path.is_file() or reader is None:
|
|
100
|
+
continue
|
|
101
|
+
name = _table_name(path, taken)
|
|
102
|
+
taken.add(name)
|
|
103
|
+
try:
|
|
104
|
+
self._con.execute(
|
|
105
|
+
f'CREATE OR REPLACE TABLE "{name}" AS SELECT * FROM {reader}(?)',
|
|
106
|
+
[str(path)],
|
|
107
|
+
)
|
|
108
|
+
except Exception as exc: # noqa: BLE001 - surface load errors per file
|
|
109
|
+
raise TableBridgeError(f"Failed to load {path.name}: {exc}") from exc
|
|
110
|
+
rel = str(path.relative_to(self._config.data_dir))
|
|
111
|
+
self._registry[name] = {"file": rel, "kind": path.suffix.lower().lstrip(".")}
|
|
112
|
+
# Defense in depth: once data is materialized, forbid further file access.
|
|
113
|
+
with contextlib.suppress(Exception):
|
|
114
|
+
self._con.execute("SET enable_external_access=false")
|
|
115
|
+
return len(self._registry)
|
|
116
|
+
|
|
117
|
+
# -- introspection -------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
def list_sources(self) -> list[dict[str, Any]]:
|
|
120
|
+
out = []
|
|
121
|
+
for name, meta in self._registry.items():
|
|
122
|
+
cols = self._con.execute(f'SELECT * FROM "{name}" LIMIT 0').description
|
|
123
|
+
out.append({"table": name, "file": meta["file"], "kind": meta["kind"], "columns": len(cols)})
|
|
124
|
+
return out
|
|
125
|
+
|
|
126
|
+
def describe(self, table: str) -> list[dict[str, str]]:
|
|
127
|
+
self._require(table)
|
|
128
|
+
rows = self._con.execute(f'DESCRIBE "{table}"').fetchall()
|
|
129
|
+
return [{"column": r[0], "type": r[1]} for r in rows]
|
|
130
|
+
|
|
131
|
+
def preview(self, table: str, n: int = 20) -> dict[str, Any]:
|
|
132
|
+
self._require(table)
|
|
133
|
+
n = max(1, min(n, self._config.max_rows))
|
|
134
|
+
return self._fetch(f'SELECT * FROM "{table}" LIMIT {n}')
|
|
135
|
+
|
|
136
|
+
def query(self, sql: str) -> dict[str, Any]:
|
|
137
|
+
return self._fetch(validate_sql(sql))
|
|
138
|
+
|
|
139
|
+
# -- helpers -------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
def _require(self, table: str) -> None:
|
|
142
|
+
if table not in self._registry:
|
|
143
|
+
known = ", ".join(self._registry) or "(none)"
|
|
144
|
+
raise TableBridgeError(f"Unknown table '{table}'. Available: {known}")
|
|
145
|
+
|
|
146
|
+
def _fetch(self, sql: str) -> dict[str, Any]:
|
|
147
|
+
try:
|
|
148
|
+
cur = self._con.execute(sql)
|
|
149
|
+
except Exception as exc: # noqa: BLE001 - return query errors to the agent
|
|
150
|
+
raise TableBridgeError(f"Query failed: {exc}") from exc
|
|
151
|
+
columns = [d[0] for d in cur.description] if cur.description else []
|
|
152
|
+
cap = self._config.max_rows
|
|
153
|
+
rows = cur.fetchmany(cap + 1)
|
|
154
|
+
truncated = len(rows) > cap
|
|
155
|
+
rows = rows[:cap]
|
|
156
|
+
return {
|
|
157
|
+
"columns": columns,
|
|
158
|
+
"rows": [dict(zip(columns, r, strict=False)) for r in rows],
|
|
159
|
+
"row_count": len(rows),
|
|
160
|
+
"truncated": truncated,
|
|
161
|
+
}
|
tablebridge/server.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""The tablebridge MCP server.
|
|
2
|
+
|
|
3
|
+
Tools return JSON so the agent gets structured results. Everything is read-only
|
|
4
|
+
and sandboxed to the configured data directory.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from mcp.server.fastmcp import FastMCP
|
|
13
|
+
|
|
14
|
+
from .config import Config
|
|
15
|
+
from .db import TableBridge
|
|
16
|
+
|
|
17
|
+
mcp = FastMCP("tablebridge")
|
|
18
|
+
|
|
19
|
+
_bridge: TableBridge | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_bridge() -> TableBridge:
|
|
23
|
+
global _bridge
|
|
24
|
+
if _bridge is None:
|
|
25
|
+
_bridge = TableBridge(Config.from_env())
|
|
26
|
+
return _bridge
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def set_bridge(bridge: TableBridge) -> None:
|
|
30
|
+
"""Replace the module-level bridge (used by tests)."""
|
|
31
|
+
global _bridge
|
|
32
|
+
_bridge = bridge
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _json(data: Any) -> str:
|
|
36
|
+
return json.dumps(data, indent=2, default=str)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@mcp.tool()
|
|
40
|
+
def list_sources() -> str:
|
|
41
|
+
"""List the tables available to query (one per data file) with column counts.
|
|
42
|
+
|
|
43
|
+
Start here: each CSV/Parquet/JSON file under the data directory is exposed as
|
|
44
|
+
a table you can SELECT from and JOIN across.
|
|
45
|
+
"""
|
|
46
|
+
return _json(get_bridge().list_sources())
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@mcp.tool()
|
|
50
|
+
def describe(table: str) -> str:
|
|
51
|
+
"""Show a table's columns and types."""
|
|
52
|
+
return _json(get_bridge().describe(table))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@mcp.tool()
|
|
56
|
+
def preview(table: str, n: int = 20) -> str:
|
|
57
|
+
"""Return the first ``n`` rows of a table (capped by TABLEBRIDGE_MAX_ROWS)."""
|
|
58
|
+
return _json(get_bridge().preview(table, n))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@mcp.tool()
|
|
62
|
+
def query(sql: str) -> str:
|
|
63
|
+
"""Run a read-only SQL query (DuckDB dialect) across the loaded tables.
|
|
64
|
+
|
|
65
|
+
Supports SELECT / WITH / DESCRIBE / SUMMARIZE and JOINs across files. Writes
|
|
66
|
+
and raw file-access functions are rejected. Results are capped at
|
|
67
|
+
TABLEBRIDGE_MAX_ROWS; a ``truncated`` flag indicates when more rows exist.
|
|
68
|
+
"""
|
|
69
|
+
return _json(get_bridge().query(sql))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@mcp.tool()
|
|
73
|
+
def refresh() -> str:
|
|
74
|
+
"""Re-scan the data directory (pick up added/changed files) and report the count."""
|
|
75
|
+
count = get_bridge().scan()
|
|
76
|
+
return _json({"reloaded_tables": count})
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@mcp.tool()
|
|
80
|
+
def server_info() -> str:
|
|
81
|
+
"""Report the effective configuration (data dir, row cap, supported formats)."""
|
|
82
|
+
return _json(get_bridge().config.as_dict())
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main() -> None:
|
|
86
|
+
"""Console-script entry point: run the server over stdio."""
|
|
87
|
+
mcp.run()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
main()
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tablebridge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An MCP server that turns a folder of CSV / Parquet / JSON files into one SQL-queryable source for your AI agent.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Michael-WhiteCapData/tablebridge-mcp
|
|
6
|
+
Project-URL: Repository, https://github.com/Michael-WhiteCapData/tablebridge-mcp
|
|
7
|
+
Project-URL: Issues, https://github.com/Michael-WhiteCapData/tablebridge-mcp/issues
|
|
8
|
+
Author: Michael Tierney
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: claude,csv,data-integration,duckdb,mcp,model-context-protocol,parquet,sql
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Requires-Dist: duckdb>=1.0
|
|
21
|
+
Requires-Dist: mcp>=1.2
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest-cov>=5; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
<!-- mcp-name: io.github.Michael-WhiteCapData/tablebridge-mcp -->
|
|
29
|
+
|
|
30
|
+
# tablebridge
|
|
31
|
+
|
|
32
|
+
**Turn a folder of CSV / Parquet / JSON files into one SQL-queryable source for your AI agent.**
|
|
33
|
+
|
|
34
|
+
[](https://github.com/Michael-WhiteCapData/tablebridge-mcp/actions/workflows/ci.yml)
|
|
35
|
+
[](https://pypi.org/project/tablebridge/)
|
|
36
|
+
[](https://www.python.org/)
|
|
37
|
+
[](https://modelcontextprotocol.io/)
|
|
38
|
+
[](LICENSE)
|
|
39
|
+
|
|
40
|
+
Small businesses don't have a data warehouse — they have a folder full of exports: `customers.csv`, last month's `orders.xlsx`, a `regions.json` someone emailed over. `tablebridge` is an [MCP](https://modelcontextprotocol.io/) server that points [DuckDB](https://duckdb.org/) at that folder, exposes **each file as a SQL table**, and lets your agent run **read-only SQL — including JOINs across files** — to answer questions over all of them at once. Scattered spreadsheets become one queryable source of truth.
|
|
41
|
+
|
|
42
|
+
It's **read-only and sandboxed**: files are loaded into an in-memory database, the data directory is the only thing it can see, and queries are validated so an agent can't write, escape to other paths, or call raw file functions.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Why you'd want this
|
|
47
|
+
|
|
48
|
+
- 🔗 **One source over many files.** JOIN `orders.csv` to `customers.csv` to `regions.json` in a single query — no ETL, no database to stand up.
|
|
49
|
+
- 🦆 **DuckDB-powered.** Fast analytical SQL over CSV, TSV, Parquet, JSON/NDJSON.
|
|
50
|
+
- 🔒 **Safe by design.** Files are materialized into memory; queries are validated read-only; raw file-access functions and out-of-sandbox paths are rejected.
|
|
51
|
+
- 🤖 **Agent-friendly.** `list_sources` → `describe` → `query` is a natural flow the agent can follow on its own.
|
|
52
|
+
- 🪶 **Two dependencies** (`mcp`, `duckdb`), fully typed and tested.
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uvx tablebridge # run directly
|
|
58
|
+
# or
|
|
59
|
+
pip install tablebridge # then run: tablebridge
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Claude Code
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
TABLEBRIDGE_DATA_DIR=/path/to/your/data claude mcp add tablebridge -- uvx tablebridge
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Claude Desktop / Cursor
|
|
69
|
+
|
|
70
|
+
```jsonc
|
|
71
|
+
{
|
|
72
|
+
"mcpServers": {
|
|
73
|
+
"tablebridge": {
|
|
74
|
+
"command": "uvx",
|
|
75
|
+
"args": ["tablebridge"],
|
|
76
|
+
"env": { "TABLEBRIDGE_DATA_DIR": "/path/to/your/data" }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Tools
|
|
83
|
+
|
|
84
|
+
| Tool | Description |
|
|
85
|
+
| --- | --- |
|
|
86
|
+
| `list_sources` | List the tables (one per data file) with column counts — start here |
|
|
87
|
+
| `describe` | A table's columns and types |
|
|
88
|
+
| `preview` | First N rows of a table |
|
|
89
|
+
| `query` | Run read-only SQL (DuckDB dialect) across the tables, JOINs included |
|
|
90
|
+
| `refresh` | Re-scan the data directory for added/changed files |
|
|
91
|
+
| `server_info` | Effective config (data dir, row cap, supported formats) |
|
|
92
|
+
|
|
93
|
+
## Example
|
|
94
|
+
|
|
95
|
+
With a folder containing `customers.csv`, `orders.csv`, and `regions.json`:
|
|
96
|
+
|
|
97
|
+
> **You:** Who are my top 3 customers by total spend, and what region are they in?
|
|
98
|
+
>
|
|
99
|
+
> **Agent:** *(calls `list_sources`, then `query`)*
|
|
100
|
+
> ```sql
|
|
101
|
+
> SELECT c.name, r.region, SUM(o.total) AS spend
|
|
102
|
+
> FROM customers c
|
|
103
|
+
> JOIN orders o ON o.customer_id = c.id
|
|
104
|
+
> JOIN regions r ON r.customer_id = c.id
|
|
105
|
+
> GROUP BY c.name, r.region
|
|
106
|
+
> ORDER BY spend DESC
|
|
107
|
+
> LIMIT 3;
|
|
108
|
+
> ```
|
|
109
|
+
|
|
110
|
+
## Configuration
|
|
111
|
+
|
|
112
|
+
| Variable | Default | Description |
|
|
113
|
+
| --- | --- | --- |
|
|
114
|
+
| `TABLEBRIDGE_DATA_DIR` | `.` | Directory of files to expose (the sandbox boundary) |
|
|
115
|
+
| `TABLEBRIDGE_MAX_ROWS` | `1000` | Max rows returned per query/preview |
|
|
116
|
+
| `TABLEBRIDGE_RECURSIVE` | `1` | Scan subdirectories too |
|
|
117
|
+
|
|
118
|
+
Supported formats: `.csv`, `.tsv`, `.parquet`, `.json`, `.ndjson`.
|
|
119
|
+
|
|
120
|
+
## Security model
|
|
121
|
+
|
|
122
|
+
1. **Sandboxed** to `TABLEBRIDGE_DATA_DIR` — only files under it are loaded.
|
|
123
|
+
2. **Materialized** into an in-memory DuckDB, then external filesystem access is disabled — queries can't reach other paths.
|
|
124
|
+
3. **Validated SQL** — a single read-only statement only; writes and raw file-reader functions are rejected.
|
|
125
|
+
|
|
126
|
+
## Development
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
git clone https://github.com/Michael-WhiteCapData/tablebridge-mcp
|
|
130
|
+
cd tablebridge-mcp
|
|
131
|
+
uv pip install -e ".[dev]"
|
|
132
|
+
ruff check .
|
|
133
|
+
pytest # uses real DuckDB over temp files
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
[MIT](LICENSE) © Michael Tierney
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
tablebridge/__init__.py,sha256=FeRW4GxElndLCVFghZAdt93o3iaxhaS7FJD0t0pF9C0,537
|
|
2
|
+
tablebridge/config.py,sha256=l-MdDOkqO1yXBCkRnwCmSfyptRgQHRanIzXqxTBjQI4,1335
|
|
3
|
+
tablebridge/db.py,sha256=akNVP54hJGvTL3RH5VW9JdRAgtHgtQbu71yxmPv_Dhs,6155
|
|
4
|
+
tablebridge/server.py,sha256=sOaRV2SyTGTSfohhl8SMyucL7fd2YWNqBPgDJdv_WcE,2325
|
|
5
|
+
tablebridge-0.1.0.dist-info/METADATA,sha256=lhFfD0qFgSTud64_hGF0CzfYd-fUEn--ZAST3WlaK_E,5658
|
|
6
|
+
tablebridge-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
tablebridge-0.1.0.dist-info/entry_points.txt,sha256=snZv5v1d7GzimKXzbeSp0JqKMpzT4YC5qpeVw1ZlbX4,56
|
|
8
|
+
tablebridge-0.1.0.dist-info/licenses/LICENSE,sha256=CY7xjvDIH4rbWyhYFOZZaAfXsrsdo5apgxDnsY-xq8g,1072
|
|
9
|
+
tablebridge-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Tierney
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|