PyPI - tablebridge - Versions diffs - 0.1.0__tar.gz - Mend

tablebridge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

tablebridge-0.1.0/.github/workflows/ci.yml +31 -0
tablebridge-0.1.0/.github/workflows/publish-mcp.yml +25 -0
tablebridge-0.1.0/.github/workflows/release.yml +36 -0
tablebridge-0.1.0/.gitignore +21 -0
tablebridge-0.1.0/CHANGELOG.md +21 -0
tablebridge-0.1.0/CONTRIBUTING.md +33 -0
tablebridge-0.1.0/LICENSE +21 -0
tablebridge-0.1.0/PKG-INFO +140 -0
tablebridge-0.1.0/README.md +113 -0
tablebridge-0.1.0/pyproject.toml +51 -0
tablebridge-0.1.0/server.json +38 -0
tablebridge-0.1.0/src/tablebridge/__init__.py +13 -0
tablebridge-0.1.0/src/tablebridge/config.py +43 -0
tablebridge-0.1.0/src/tablebridge/db.py +161 -0
tablebridge-0.1.0/src/tablebridge/server.py +91 -0
tablebridge-0.1.0/tests/conftest.py +26 -0
tablebridge-0.1.0/tests/test_config_and_server.py +49 -0
tablebridge-0.1.0/tests/test_db.py +52 -0
tablebridge-0.1.0/tests/test_validate_sql.py +48 -0

tablebridge-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,31 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Create venv (Python ${{ matrix.python-version }})
+        run: uv venv -p ${{ matrix.python-version }}
+      - name: Install project (with dev deps)
+        run: uv pip install -e ".[dev]"
+      - name: Lint
+        run: uv run ruff check .
+      - name: Test
+        run: uv run pytest --cov=tablebridge --cov-report=term-missing

tablebridge-0.1.0/.github/workflows/publish-mcp.yml ADDED Viewed

@@ -0,0 +1,25 @@
+name: Publish to MCP Registry
+on:
+  workflow_dispatch:
+  push:
+    tags: ["v*"]
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install mcp-publisher
+        run: |
+          curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_linux_amd64.tar.gz" | tar xz mcp-publisher
+      - name: Login to the registry (GitHub OIDC)
+        run: ./mcp-publisher login github-oidc
+      - name: Publish server.json
+        run: ./mcp-publisher publish

tablebridge-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,36 @@
+name: Release
+on:
+  push:
+    tags: ["v*"]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Build sdist and wheel
+        run: uv build
+      - name: Upload dist artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  publish:
+    needs: build
+    runs-on: ubuntu-latest
+    # Trusted Publishing (OIDC) — no API token stored in the repo.
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - name: Download dist artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

tablebridge-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,21 @@
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+.eggs/
+build/
+dist/
+.venv/
+venv/
+# Tooling
+.pytest_cache/
+.ruff_cache/
+.coverage
+htmlcov/
+.mypy_cache/
+# Editors / OS
+.idea/
+.vscode/
+.DS_Store

tablebridge-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,21 @@
+# Changelog
+All notable changes to this project are documented here. The format is based on
+[Keep a Changelog](https://keepachangelog.com/), and this project adheres to
+[Semantic Versioning](https://semver.org/).
+## [0.1.0] - 2026-06-21
+### Added
+- Initial release.
+- Exposes a directory of CSV/TSV/Parquet/JSON/NDJSON files as DuckDB tables.
+- Tools: `list_sources`, `describe`, `preview`, `query`, `refresh`, `server_info`.
+- Read-only, sandboxed security model: materialized in-memory tables, external
+  filesystem access disabled post-load, single read-only statement validation
+  rejecting writes and raw file-access functions.
+- Configurable via `TABLEBRIDGE_DATA_DIR`, `TABLEBRIDGE_MAX_ROWS`,
+  `TABLEBRIDGE_RECURSIVE`.
+- Test suite using real DuckDB over temp files; CI on Python 3.11 and 3.12.
+- MCP registry manifest, PyPI publish + registry-publish workflows.
+[0.1.0]: https://github.com/Michael-WhiteCapData/tablebridge-mcp/releases/tag/v0.1.0

tablebridge-0.1.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,33 @@
+# Contributing to tablebridge
+Thanks for your interest! This server stays small, focused, and read-only-safe — contributions that keep it that way merge easiest.
+## Getting set up
+```bash
+git clone https://github.com/Michael-WhiteCapData/tablebridge-mcp
+cd tablebridge-mcp
+uv pip install -e ".[dev]"
+```
+## Before opening a PR
+- `ruff check .` passes (`ruff check --fix .` to autofix).
+- `pytest` passes. Tests use real DuckDB over temp files — no external services.
+- New behavior comes with a test.
+- **Security:** any change to `query`/SQL handling must keep the read-only guarantees — single statement, no writes, no raw file-access functions, no escaping the data directory. Add a test proving the new path stays sandboxed.
+## Architecture
+- `config.py` — env-driven config (data dir, row cap) + supported formats.
+- `db.py` — DuckDB engine: materializes files as tables, validates SQL, runs queries.
+- `server.py` — the MCP tool layer (thin; delegates to `TableBridge`).
+## Ideas welcome
+- More input formats (e.g. Excel via the DuckDB `excel` extension).
+- A `schema_summary` tool that profiles columns.
+## Code of conduct
+Be decent, assume good faith, keep it constructive.

tablebridge-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Michael Tierney
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

tablebridge-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,140 @@
+Metadata-Version: 2.4
+Name: tablebridge
+Version: 0.1.0
+Summary: An MCP server that turns a folder of CSV / Parquet / JSON files into one SQL-queryable source for your AI agent.
+Project-URL: Homepage, https://github.com/Michael-WhiteCapData/tablebridge-mcp
+Project-URL: Repository, https://github.com/Michael-WhiteCapData/tablebridge-mcp
+Project-URL: Issues, https://github.com/Michael-WhiteCapData/tablebridge-mcp/issues
+Author: Michael Tierney
+License: MIT
+License-File: LICENSE
+Keywords: claude,csv,data-integration,duckdb,mcp,model-context-protocol,parquet,sql
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Database
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Requires-Python: >=3.11
+Requires-Dist: duckdb>=1.0
+Requires-Dist: mcp>=1.2
+Provides-Extra: dev
+Requires-Dist: pytest-cov>=5; extra == 'dev'
+Requires-Dist: pytest>=8; extra == 'dev'
+Requires-Dist: ruff>=0.6; extra == 'dev'
+Description-Content-Type: text/markdown
+<!-- mcp-name: io.github.Michael-WhiteCapData/tablebridge-mcp -->
+# tablebridge
+**Turn a folder of CSV / Parquet / JSON files into one SQL-queryable source for your AI agent.**
+[![CI](https://github.com/Michael-WhiteCapData/tablebridge-mcp/actions/workflows/ci.yml/badge.svg)](https://github.com/Michael-WhiteCapData/tablebridge-mcp/actions/workflows/ci.yml)
+[![PyPI](https://img.shields.io/pypi/v/tablebridge?color=3775A9&logo=pypi&logoColor=white)](https://pypi.org/project/tablebridge/)
+[![Python](https://img.shields.io/badge/python-3.11%2B-3776AB?logo=python&logoColor=white)](https://www.python.org/)
+[![MCP](https://img.shields.io/badge/MCP-server-D97757)](https://modelcontextprotocol.io/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
+Small businesses don't have a data warehouse — they have a folder full of exports: `customers.csv`, last month's `orders.xlsx`, a `regions.json` someone emailed over. `tablebridge` is an [MCP](https://modelcontextprotocol.io/) server that points [DuckDB](https://duckdb.org/) at that folder, exposes **each file as a SQL table**, and lets your agent run **read-only SQL — including JOINs across files** — to answer questions over all of them at once. Scattered spreadsheets become one queryable source of truth.
+It's **read-only and sandboxed**: files are loaded into an in-memory database, the data directory is the only thing it can see, and queries are validated so an agent can't write, escape to other paths, or call raw file functions.
+---
+## Why you'd want this
+- 🔗 **One source over many files.** JOIN `orders.csv` to `customers.csv` to `regions.json` in a single query — no ETL, no database to stand up.
+- 🦆 **DuckDB-powered.** Fast analytical SQL over CSV, TSV, Parquet, JSON/NDJSON.
+- 🔒 **Safe by design.** Files are materialized into memory; queries are validated read-only; raw file-access functions and out-of-sandbox paths are rejected.
+- 🤖 **Agent-friendly.** `list_sources` → `describe` → `query` is a natural flow the agent can follow on its own.
+- 🪶 **Two dependencies** (`mcp`, `duckdb`), fully typed and tested.
+## Install
+```bash
+uvx tablebridge          # run directly
+# or
+pip install tablebridge  # then run: tablebridge
+```
+### Claude Code
+```bash
+TABLEBRIDGE_DATA_DIR=/path/to/your/data claude mcp add tablebridge -- uvx tablebridge
+```
+### Claude Desktop / Cursor
+```jsonc
+{
+  "mcpServers": {
+    "tablebridge": {
+      "command": "uvx",
+      "args": ["tablebridge"],
+      "env": { "TABLEBRIDGE_DATA_DIR": "/path/to/your/data" }
+    }
+  }
+}
+```
+## Tools
+| Tool | Description |
+| --- | --- |
+| `list_sources` | List the tables (one per data file) with column counts — start here |
+| `describe` | A table's columns and types |
+| `preview` | First N rows of a table |
+| `query` | Run read-only SQL (DuckDB dialect) across the tables, JOINs included |
+| `refresh` | Re-scan the data directory for added/changed files |
+| `server_info` | Effective config (data dir, row cap, supported formats) |
+## Example
+With a folder containing `customers.csv`, `orders.csv`, and `regions.json`:
+> **You:** Who are my top 3 customers by total spend, and what region are they in?
+>
+> **Agent:** *(calls `list_sources`, then `query`)*
+> ```sql
+> SELECT c.name, r.region, SUM(o.total) AS spend
+> FROM customers c
+> JOIN orders o   ON o.customer_id = c.id
+> JOIN regions r  ON r.customer_id = c.id
+> GROUP BY c.name, r.region
+> ORDER BY spend DESC
+> LIMIT 3;
+> ```
+## Configuration
+| Variable | Default | Description |
+| --- | --- | --- |
+| `TABLEBRIDGE_DATA_DIR` | `.` | Directory of files to expose (the sandbox boundary) |
+| `TABLEBRIDGE_MAX_ROWS` | `1000` | Max rows returned per query/preview |
+| `TABLEBRIDGE_RECURSIVE` | `1` | Scan subdirectories too |
+Supported formats: `.csv`, `.tsv`, `.parquet`, `.json`, `.ndjson`.
+## Security model
+1. **Sandboxed** to `TABLEBRIDGE_DATA_DIR` — only files under it are loaded.
+2. **Materialized** into an in-memory DuckDB, then external filesystem access is disabled — queries can't reach other paths.
+3. **Validated SQL** — a single read-only statement only; writes and raw file-reader functions are rejected.
+## Development
+```bash
+git clone https://github.com/Michael-WhiteCapData/tablebridge-mcp
+cd tablebridge-mcp
+uv pip install -e ".[dev]"
+ruff check .
+pytest          # uses real DuckDB over temp files
+```
+See [CONTRIBUTING.md](CONTRIBUTING.md).
+## License
+[MIT](LICENSE) © Michael Tierney

tablebridge-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,113 @@
+<!-- mcp-name: io.github.Michael-WhiteCapData/tablebridge-mcp -->
+# tablebridge
+**Turn a folder of CSV / Parquet / JSON files into one SQL-queryable source for your AI agent.**
+[![CI](https://github.com/Michael-WhiteCapData/tablebridge-mcp/actions/workflows/ci.yml/badge.svg)](https://github.com/Michael-WhiteCapData/tablebridge-mcp/actions/workflows/ci.yml)
+[![PyPI](https://img.shields.io/pypi/v/tablebridge?color=3775A9&logo=pypi&logoColor=white)](https://pypi.org/project/tablebridge/)
+[![Python](https://img.shields.io/badge/python-3.11%2B-3776AB?logo=python&logoColor=white)](https://www.python.org/)
+[![MCP](https://img.shields.io/badge/MCP-server-D97757)](https://modelcontextprotocol.io/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
+Small businesses don't have a data warehouse — they have a folder full of exports: `customers.csv`, last month's `orders.xlsx`, a `regions.json` someone emailed over. `tablebridge` is an [MCP](https://modelcontextprotocol.io/) server that points [DuckDB](https://duckdb.org/) at that folder, exposes **each file as a SQL table**, and lets your agent run **read-only SQL — including JOINs across files** — to answer questions over all of them at once. Scattered spreadsheets become one queryable source of truth.
+It's **read-only and sandboxed**: files are loaded into an in-memory database, the data directory is the only thing it can see, and queries are validated so an agent can't write, escape to other paths, or call raw file functions.
+---
+## Why you'd want this
+- 🔗 **One source over many files.** JOIN `orders.csv` to `customers.csv` to `regions.json` in a single query — no ETL, no database to stand up.
+- 🦆 **DuckDB-powered.** Fast analytical SQL over CSV, TSV, Parquet, JSON/NDJSON.
+- 🔒 **Safe by design.** Files are materialized into memory; queries are validated read-only; raw file-access functions and out-of-sandbox paths are rejected.
+- 🤖 **Agent-friendly.** `list_sources` → `describe` → `query` is a natural flow the agent can follow on its own.
+- 🪶 **Two dependencies** (`mcp`, `duckdb`), fully typed and tested.
+## Install
+```bash
+uvx tablebridge          # run directly
+# or
+pip install tablebridge  # then run: tablebridge
+```
+### Claude Code
+```bash
+TABLEBRIDGE_DATA_DIR=/path/to/your/data claude mcp add tablebridge -- uvx tablebridge
+```
+### Claude Desktop / Cursor
+```jsonc
+{
+  "mcpServers": {
+    "tablebridge": {
+      "command": "uvx",
+      "args": ["tablebridge"],
+      "env": { "TABLEBRIDGE_DATA_DIR": "/path/to/your/data" }
+    }
+  }
+}
+```
+## Tools
+| Tool | Description |
+| --- | --- |
+| `list_sources` | List the tables (one per data file) with column counts — start here |
+| `describe` | A table's columns and types |
+| `preview` | First N rows of a table |
+| `query` | Run read-only SQL (DuckDB dialect) across the tables, JOINs included |
+| `refresh` | Re-scan the data directory for added/changed files |
+| `server_info` | Effective config (data dir, row cap, supported formats) |
+## Example
+With a folder containing `customers.csv`, `orders.csv`, and `regions.json`:
+> **You:** Who are my top 3 customers by total spend, and what region are they in?
+>
+> **Agent:** *(calls `list_sources`, then `query`)*
+> ```sql
+> SELECT c.name, r.region, SUM(o.total) AS spend
+> FROM customers c
+> JOIN orders o   ON o.customer_id = c.id
+> JOIN regions r  ON r.customer_id = c.id
+> GROUP BY c.name, r.region
+> ORDER BY spend DESC
+> LIMIT 3;
+> ```
+## Configuration
+| Variable | Default | Description |
+| --- | --- | --- |
+| `TABLEBRIDGE_DATA_DIR` | `.` | Directory of files to expose (the sandbox boundary) |
+| `TABLEBRIDGE_MAX_ROWS` | `1000` | Max rows returned per query/preview |
+| `TABLEBRIDGE_RECURSIVE` | `1` | Scan subdirectories too |
+Supported formats: `.csv`, `.tsv`, `.parquet`, `.json`, `.ndjson`.
+## Security model
+1. **Sandboxed** to `TABLEBRIDGE_DATA_DIR` — only files under it are loaded.
+2. **Materialized** into an in-memory DuckDB, then external filesystem access is disabled — queries can't reach other paths.
+3. **Validated SQL** — a single read-only statement only; writes and raw file-reader functions are rejected.
+## Development
+```bash
+git clone https://github.com/Michael-WhiteCapData/tablebridge-mcp
+cd tablebridge-mcp
+uv pip install -e ".[dev]"
+ruff check .
+pytest          # uses real DuckDB over temp files
+```
+See [CONTRIBUTING.md](CONTRIBUTING.md).
+## License
+[MIT](LICENSE) © Michael Tierney

tablebridge-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,51 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "tablebridge"
+version = "0.1.0"
+description = "An MCP server that turns a folder of CSV / Parquet / JSON files into one SQL-queryable source for your AI agent."
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.11"
+authors = [{ name = "Michael Tierney" }]
+keywords = ["mcp", "model-context-protocol", "duckdb", "sql", "csv", "parquet", "data-integration", "claude"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+dependencies = [
+    "mcp>=1.2",
+    "duckdb>=1.0",
+]
+[project.urls]
+Homepage = "https://github.com/Michael-WhiteCapData/tablebridge-mcp"
+Repository = "https://github.com/Michael-WhiteCapData/tablebridge-mcp"
+Issues = "https://github.com/Michael-WhiteCapData/tablebridge-mcp/issues"
+[project.scripts]
+tablebridge = "tablebridge.server:main"
+[project.optional-dependencies]
+dev = ["pytest>=8", "pytest-cov>=5", "ruff>=0.6"]
+[tool.hatch.build.targets.wheel]
+packages = ["src/tablebridge"]
+[tool.pytest.ini_options]
+addopts = "-q"
+testpaths = ["tests"]
+[tool.ruff]
+line-length = 110
+target-version = "py311"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM", "PLC"]

tablebridge-0.1.0/server.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-09-29/server.schema.json",
+  "name": "io.github.Michael-WhiteCapData/tablebridge-mcp",
+  "description": "Query a folder of CSV / Parquet / JSON files with SQL — one queryable source, read-only.",
+  "repository": {
+    "url": "https://github.com/Michael-WhiteCapData/tablebridge-mcp",
+    "source": "github"
+  },
+  "version": "0.1.0",
+  "packages": [
+    {
+      "registryType": "pypi",
+      "identifier": "tablebridge",
+      "version": "0.1.0",
+      "transport": { "type": "stdio" },
+      "environmentVariables": [
+        {
+          "name": "TABLEBRIDGE_DATA_DIR",
+          "description": "Directory of data files to expose as SQL tables (sandboxed).",
+          "default": ".",
+          "isRequired": false
+        },
+        {
+          "name": "TABLEBRIDGE_MAX_ROWS",
+          "description": "Maximum rows returned per query/preview.",
+          "default": "1000",
+          "isRequired": false
+        },
+        {
+          "name": "TABLEBRIDGE_RECURSIVE",
+          "description": "Scan subdirectories too (1/0).",
+          "default": "1",
+          "isRequired": false
+        }
+      ]
+    }
+  ]
+}

tablebridge-0.1.0/src/tablebridge/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""tablebridge — query your scattered CSV / Parquet / JSON files with SQL, via MCP.
+Points a DuckDB engine at a directory of tabular files, exposes each as a SQL
+view, and lets an agent run read-only SQL (including JOINs across files) — so a
+pile of exports becomes one queryable source of truth. Sandboxed to a single
+data directory and read-only by default.
+"""
+from .config import Config
+from .db import TableBridge, TableBridgeError
+__all__ = ["Config", "TableBridge", "TableBridgeError", "__version__"]
+__version__ = "0.1.0"

tablebridge-0.1.0/src/tablebridge/config.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Environment-driven configuration for the tablebridge server."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass
+from pathlib import Path
+DEFAULT_MAX_ROWS = 1000
+# File extensions we expose as SQL views, mapped to the DuckDB reader function.
+READERS = {
+    ".csv": "read_csv_auto",
+    ".tsv": "read_csv_auto",
+    ".parquet": "read_parquet",
+    ".json": "read_json_auto",
+    ".ndjson": "read_json_auto",
+}
+@dataclass(frozen=True)
+class Config:
+    """Effective server configuration, sourced from the environment."""
+    data_dir: Path = Path(".")
+    max_rows: int = DEFAULT_MAX_ROWS
+    recursive: bool = True
+    @classmethod
+    def from_env(cls, env: dict[str, str] | None = None) -> Config:
+        src = os.environ if env is None else env
+        return cls(
+            data_dir=Path(src.get("TABLEBRIDGE_DATA_DIR", ".")).expanduser().resolve(),
+            max_rows=int(src.get("TABLEBRIDGE_MAX_ROWS", str(DEFAULT_MAX_ROWS))),
+            recursive=src.get("TABLEBRIDGE_RECURSIVE", "1").lower() not in ("0", "false", "no"),
+        )
+    def as_dict(self) -> dict[str, object]:
+        return {
+            "data_dir": str(self.data_dir),
+            "max_rows": self.max_rows,
+            "recursive": self.recursive,
+            "supported_extensions": sorted(READERS),
+        }

tablebridge-0.1.0/src/tablebridge/db.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""DuckDB engine: load a directory of tabular files as in-memory tables and run
+read-only SQL over them.
+Security posture:
+- Files are **materialized** into in-memory tables at scan time, so queries never
+  touch the filesystem afterward.
+- Query SQL is validated to be a single read-only statement, and raw file-reader
+  functions (read_csv, read_parquet, glob, copy, attach, …) are rejected — an
+  agent cannot read a path outside the configured data directory.
+"""
+from __future__ import annotations
+import contextlib
+import re
+from pathlib import Path
+from typing import Any
+from .config import READERS, Config
+class TableBridgeError(RuntimeError):
+    """A user-facing error (bad SQL, unknown table, load failure)."""
+_ALLOWED_START = {
+    "SELECT", "WITH", "FROM", "DESCRIBE", "SUMMARIZE", "SHOW", "EXPLAIN", "VALUES", "TABLE",
+}
+_FORBIDDEN = re.compile(
+    r"\b(read_csv|read_csv_auto|read_parquet|read_json|read_json_auto|read_ndjson|"
+    r"read_text|read_blob|parquet_scan|glob|copy|attach|detach|install|load|export|import)\b",
+    re.IGNORECASE,
+)
+_IDENT = re.compile(r"\W+")
+def _table_name(path: Path, taken: set[str]) -> str:
+    base = _IDENT.sub("_", path.stem).strip("_").lower() or "table"
+    name, i = base, 2
+    while name in taken:
+        name, i = f"{base}_{i}", i + 1
+    return name
+def validate_sql(sql: str) -> str:
+    """Return the SQL if it is a single safe read-only statement, else raise."""
+    stmts = [s for s in (part.strip() for part in sql.split(";")) if s]
+    if len(stmts) != 1:
+        raise TableBridgeError("Provide exactly one SQL statement.")
+    stmt = stmts[0]
+    first = stmt.split(None, 1)[0].upper() if stmt.split() else ""
+    if first not in _ALLOWED_START:
+        raise TableBridgeError(
+            f"Only read-only queries are allowed (got '{first or '?'}'). "
+            "Use SELECT / WITH / DESCRIBE / SUMMARIZE / SHOW."
+        )
+    if _FORBIDDEN.search(stmt):
+        raise TableBridgeError(
+            "Raw file access functions are not allowed. Query the registered tables "
+            "by name (see list_sources)."
+        )
+    return stmt
+class TableBridge:
+    """Loads a data directory into DuckDB and answers read-only queries."""
+    def __init__(self, config: Config, con: Any = None) -> None:
+        self._config = config
+        self._registry: dict[str, dict[str, str]] = {}
+        self._own_con = con is None
+        self._con = con if con is not None else self._new_con()
+        self.scan()
+    def _new_con(self) -> Any:
+        import duckdb  # noqa: PLC0415
+        return duckdb.connect(":memory:")
+    @property
+    def config(self) -> Config:
+        return self._config
+    # -- loading -------------------------------------------------------------
+    def scan(self) -> int:
+        """(Re)load supported files under the data dir as in-memory tables.
+        Reconnects first (when we own the connection) so a prior scan's
+        ``enable_external_access=false`` lock is reset and files can be read again.
+        """
+        if self._own_con:
+            self._con = self._new_con()
+        self._registry.clear()
+        pattern = "**/*" if self._config.recursive else "*"
+        taken: set[str] = set()
+        for path in sorted(self._config.data_dir.glob(pattern)):
+            reader = READERS.get(path.suffix.lower())
+            if not path.is_file() or reader is None:
+                continue
+            name = _table_name(path, taken)
+            taken.add(name)
+            try:
+                self._con.execute(
+                    f'CREATE OR REPLACE TABLE "{name}" AS SELECT * FROM {reader}(?)',
+                    [str(path)],
+                )
+            except Exception as exc:  # noqa: BLE001 - surface load errors per file
+                raise TableBridgeError(f"Failed to load {path.name}: {exc}") from exc
+            rel = str(path.relative_to(self._config.data_dir))
+            self._registry[name] = {"file": rel, "kind": path.suffix.lower().lstrip(".")}
+        # Defense in depth: once data is materialized, forbid further file access.
+        with contextlib.suppress(Exception):
+            self._con.execute("SET enable_external_access=false")
+        return len(self._registry)
+    # -- introspection -------------------------------------------------------
+    def list_sources(self) -> list[dict[str, Any]]:
+        out = []
+        for name, meta in self._registry.items():
+            cols = self._con.execute(f'SELECT * FROM "{name}" LIMIT 0').description
+            out.append({"table": name, "file": meta["file"], "kind": meta["kind"], "columns": len(cols)})
+        return out
+    def describe(self, table: str) -> list[dict[str, str]]:
+        self._require(table)
+        rows = self._con.execute(f'DESCRIBE "{table}"').fetchall()
+        return [{"column": r[0], "type": r[1]} for r in rows]
+    def preview(self, table: str, n: int = 20) -> dict[str, Any]:
+        self._require(table)
+        n = max(1, min(n, self._config.max_rows))
+        return self._fetch(f'SELECT * FROM "{table}" LIMIT {n}')
+    def query(self, sql: str) -> dict[str, Any]:
+        return self._fetch(validate_sql(sql))
+    # -- helpers -------------------------------------------------------------
+    def _require(self, table: str) -> None:
+        if table not in self._registry:
+            known = ", ".join(self._registry) or "(none)"
+            raise TableBridgeError(f"Unknown table '{table}'. Available: {known}")
+    def _fetch(self, sql: str) -> dict[str, Any]:
+        try:
+            cur = self._con.execute(sql)
+        except Exception as exc:  # noqa: BLE001 - return query errors to the agent
+            raise TableBridgeError(f"Query failed: {exc}") from exc
+        columns = [d[0] for d in cur.description] if cur.description else []
+        cap = self._config.max_rows
+        rows = cur.fetchmany(cap + 1)
+        truncated = len(rows) > cap
+        rows = rows[:cap]
+        return {
+            "columns": columns,
+            "rows": [dict(zip(columns, r, strict=False)) for r in rows],
+            "row_count": len(rows),
+            "truncated": truncated,
+        }

tablebridge-0.1.0/src/tablebridge/server.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""The tablebridge MCP server.
+Tools return JSON so the agent gets structured results. Everything is read-only
+and sandboxed to the configured data directory.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from mcp.server.fastmcp import FastMCP
+from .config import Config
+from .db import TableBridge
+mcp = FastMCP("tablebridge")
+_bridge: TableBridge | None = None
+def get_bridge() -> TableBridge:
+    global _bridge
+    if _bridge is None:
+        _bridge = TableBridge(Config.from_env())
+    return _bridge
+def set_bridge(bridge: TableBridge) -> None:
+    """Replace the module-level bridge (used by tests)."""
+    global _bridge
+    _bridge = bridge
+def _json(data: Any) -> str:
+    return json.dumps(data, indent=2, default=str)
+@mcp.tool()
+def list_sources() -> str:
+    """List the tables available to query (one per data file) with column counts.
+    Start here: each CSV/Parquet/JSON file under the data directory is exposed as
+    a table you can SELECT from and JOIN across.
+    """
+    return _json(get_bridge().list_sources())
+@mcp.tool()
+def describe(table: str) -> str:
+    """Show a table's columns and types."""
+    return _json(get_bridge().describe(table))
+@mcp.tool()
+def preview(table: str, n: int = 20) -> str:
+    """Return the first ``n`` rows of a table (capped by TABLEBRIDGE_MAX_ROWS)."""
+    return _json(get_bridge().preview(table, n))
+@mcp.tool()
+def query(sql: str) -> str:
+    """Run a read-only SQL query (DuckDB dialect) across the loaded tables.
+    Supports SELECT / WITH / DESCRIBE / SUMMARIZE and JOINs across files. Writes
+    and raw file-access functions are rejected. Results are capped at
+    TABLEBRIDGE_MAX_ROWS; a ``truncated`` flag indicates when more rows exist.
+    """
+    return _json(get_bridge().query(sql))
+@mcp.tool()
+def refresh() -> str:
+    """Re-scan the data directory (pick up added/changed files) and report the count."""
+    count = get_bridge().scan()
+    return _json({"reloaded_tables": count})
+@mcp.tool()
+def server_info() -> str:
+    """Report the effective configuration (data dir, row cap, supported formats)."""
+    return _json(get_bridge().config.as_dict())
+def main() -> None:
+    """Console-script entry point: run the server over stdio."""
+    mcp.run()
+if __name__ == "__main__":
+    main()

tablebridge-0.1.0/tests/conftest.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Fixtures: a temp data directory with real CSV/JSON files + a live bridge."""
+from __future__ import annotations
+import pytest
+from tablebridge.config import Config
+from tablebridge.db import TableBridge
+@pytest.fixture
+def data_dir(tmp_path):
+    (tmp_path / "customers.csv").write_text("id,name\n1,Alice\n2,Bob\n")
+    (tmp_path / "orders.csv").write_text(
+        "id,customer_id,total\n10,1,99.5\n11,1,5.0\n12,2,20.0\n"
+    )
+    (tmp_path / "regions.json").write_text(
+        '[{"customer_id":1,"region":"NY"},{"customer_id":2,"region":"CA"}]'
+    )
+    return tmp_path
+@pytest.fixture
+def bridge(data_dir):
+    # max_rows=2 so truncation behavior is exercised.
+    return TableBridge(Config(data_dir=data_dir, max_rows=2))

tablebridge-0.1.0/tests/test_config_and_server.py ADDED Viewed

@@ -0,0 +1,49 @@
+import json
+from tablebridge import server
+from tablebridge.config import Config
+def test_config_from_env(tmp_path):
+    cfg = Config.from_env(env={"TABLEBRIDGE_DATA_DIR": str(tmp_path), "TABLEBRIDGE_MAX_ROWS": "50"})
+    assert cfg.data_dir == tmp_path.resolve()
+    assert cfg.max_rows == 50
+    assert cfg.recursive is True
+def test_config_recursive_off():
+    assert Config.from_env(env={"TABLEBRIDGE_RECURSIVE": "0"}).recursive is False
+class StubBridge:
+    def __init__(self):
+        self.config = Config()
+        self.calls = []
+    def list_sources(self):
+        return [{"table": "customers", "columns": 2}]
+    def query(self, sql):
+        self.calls.append(sql)
+        return {"columns": ["x"], "rows": [{"x": 1}], "row_count": 1, "truncated": False}
+    def scan(self):
+        return 3
+def test_list_sources_tool_returns_json():
+    server.set_bridge(StubBridge())
+    assert json.loads(server.list_sources())[0]["table"] == "customers"
+def test_query_tool_delegates():
+    stub = StubBridge()
+    server.set_bridge(stub)
+    out = json.loads(server.query("SELECT 1"))
+    assert out["row_count"] == 1
+    assert stub.calls == ["SELECT 1"]
+def test_refresh_tool_reports_count():
+    server.set_bridge(StubBridge())
+    assert json.loads(server.refresh())["reloaded_tables"] == 3

tablebridge-0.1.0/tests/test_db.py ADDED Viewed

@@ -0,0 +1,52 @@
+import pytest
+from tablebridge.db import TableBridgeError
+def test_scan_registers_tables(bridge):
+    tables = {s["table"] for s in bridge.list_sources()}
+    assert {"customers", "orders", "regions"} <= tables
+def test_describe_columns(bridge):
+    cols = {c["column"] for c in bridge.describe("customers")}
+    assert cols == {"id", "name"}
+def test_preview_respects_max_rows(bridge):
+    out = bridge.preview("orders", n=20)  # max_rows=2
+    assert out["row_count"] == 2
+def test_query_join_across_files(bridge):
+    out = bridge.query(
+        "SELECT c.name, SUM(o.total) AS spend FROM customers c "
+        "JOIN orders o ON o.customer_id = c.id GROUP BY c.name ORDER BY c.name"
+    )
+    assert out["columns"] == ["name", "spend"]
+    # max_rows=2 caps the two groups; both fit
+    names = {r["name"] for r in out["rows"]}
+    assert "Alice" in names
+def test_query_truncation_flag(bridge):
+    out = bridge.query("SELECT * FROM orders")  # 3 rows, cap 2
+    assert out["row_count"] == 2
+    assert out["truncated"] is True
+def test_query_blocks_file_escape(bridge):
+    with pytest.raises(TableBridgeError):
+        bridge.query("SELECT * FROM read_csv_auto('/etc/passwd')")
+def test_unknown_table_errors(bridge):
+    with pytest.raises(TableBridgeError, match="Unknown table"):
+        bridge.describe("nope")
+def test_refresh_picks_up_new_file(bridge, data_dir):
+    (data_dir / "extra.csv").write_text("a\n1\n")
+    count = bridge.scan()
+    assert "extra" in {s["table"] for s in bridge.list_sources()}
+    assert count >= 4

tablebridge-0.1.0/tests/test_validate_sql.py ADDED Viewed

@@ -0,0 +1,48 @@
+import pytest
+from tablebridge.db import TableBridgeError, validate_sql
+def test_allows_select_and_with():
+    assert validate_sql("SELECT 1") == "SELECT 1"
+    assert validate_sql("WITH t AS (SELECT 1) SELECT * FROM t").startswith("WITH")
+def test_strips_trailing_semicolon():
+    assert validate_sql("SELECT 1;") == "SELECT 1"
+@pytest.mark.parametrize(
+    "sql",
+    [
+        "INSERT INTO t VALUES (1)",
+        "UPDATE t SET x=1",
+        "DELETE FROM t",
+        "DROP TABLE t",
+        "CREATE TABLE t (x int)",
+        "COPY t TO 'out.csv'",
+        "ATTACH 'x.db'",
+        "SET enable_external_access=true",
+    ],
+)
+def test_rejects_writes_and_dangerous(sql):
+    with pytest.raises(TableBridgeError):
+        validate_sql(sql)
+@pytest.mark.parametrize(
+    "sql",
+    [
+        "SELECT * FROM read_csv_auto('/etc/passwd')",
+        "SELECT * FROM read_parquet('x')",
+        "SELECT * FROM glob('/**')",
+    ],
+)
+def test_rejects_raw_file_readers(sql):
+    with pytest.raises(TableBridgeError, match="Raw file access"):
+        validate_sql(sql)
+def test_rejects_multiple_statements():
+    with pytest.raises(TableBridgeError, match="exactly one"):
+        validate_sql("SELECT 1; SELECT 2")