PyPI - tesserakit-sql - Versions diffs - 0.4.0__tar.gz - Mend

tesserakit-sql 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

tesserakit_sql-0.4.0/.gitignore +8 -0
tesserakit_sql-0.4.0/PKG-INFO +72 -0
tesserakit_sql-0.4.0/README.md +51 -0
tesserakit_sql-0.4.0/pyproject.toml +40 -0
tesserakit_sql-0.4.0/src/tessera_sql/__init__.py +3 -0
tesserakit_sql-0.4.0/src/tessera_sql/cli.py +45 -0
tesserakit_sql-0.4.0/src/tessera_sql/compiler.py +115 -0
tesserakit_sql-0.4.0/src/tessera_sql/loader.py +62 -0
tesserakit_sql-0.4.0/src/tessera_sql/pack.py +36 -0
tesserakit_sql-0.4.0/src/tessera_sql/parse.py +156 -0
tesserakit_sql-0.4.0/src/tessera_sql/schema.py +26 -0
tesserakit_sql-0.4.0/src/tessera_sql/validator.py +66 -0
tesserakit_sql-0.4.0/tests/test_sql_pack.py +148 -0

tesserakit_sql-0.4.0/.gitignore ADDED Viewed

@@ -0,0 +1,8 @@
+.venv/
+__pycache__/
+*.pyc
+dist/
+build/
+*.egg-info/
+out/
+.DS_Store

tesserakit_sql-0.4.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,72 @@
+Metadata-Version: 2.4
+Name: tesserakit-sql
+Version: 0.4.0
+Summary: SQL job pack for Tessera: lint SQL files/migrations into a statement and table catalog.
+Project-URL: Homepage, https://github.com/ShaileshRawat1403/tessera
+Project-URL: Repository, https://github.com/ShaileshRawat1403/tessera
+Project-URL: Issues, https://github.com/ShaileshRawat1403/tessera/issues
+Author: Shailesh Rawat
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.10
+Requires-Dist: pydantic>=2.7
+Requires-Dist: rich>=13.7
+Requires-Dist: tesserakit-core>=0.1.0
+Requires-Dist: typer>=0.12
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# tesserakit-sql
+Lint SQL files and migrations into a statement and table catalog.
+`tessera-sql` parses `.sql` files with lightweight heuristics (no database connection, no execution), builds a catalog of statements and declared tables, and flags high-signal migration-safety issues.
+## Lint SQL
+```bash
+tessera sql lint --input migrations/ --output ./out/sql_pack
+tessera sql lint --input schema.sql --output ./out/sql_pack
+```
+Artifacts written:
+```text
+statements.jsonl         one SqlStatement per parsed statement (kind, target, flags)
+tables.jsonl             one SqlTable per CREATE TABLE (columns, primary-key flag)
+index.md                 statement catalog
+validation_report.md     safety findings
+coverage_report.md       statement-kind distribution
+tables.md                table catalog with columns and PK status
+```
+## Lint rules
+Query safety:
+- `delete_without_where` (error) — `DELETE` with no `WHERE` removes every row
+- `update_without_where` (warning) — `UPDATE` with no `WHERE` writes every row
+- `select_star` (info) — `SELECT *` couples the query to column shape
+Migration safety (the costly, easy-to-miss class):
+- `add_not_null_without_default` (error) — `ALTER TABLE ... ADD COLUMN ... NOT NULL` with no `DEFAULT` rewrites the table and fails on existing rows
+- `truncate_table` (warning) — `TRUNCATE` wipes all rows and is often non-transactional / irreversible
+- `drop_column` (warning) — dropping a column is destructive and irreversible
+- `rename_breaks_compatibility` (warning) — `RENAME` breaks code referencing the old name; prefer add-new + backfill + drop-old
+- `drop_without_if_exists` (warning) — `DROP` without `IF EXISTS` fails if the object is absent
+- `create_table_without_if_not_exists` (info) — non-idempotent if the migration re-runs
+Schema:
+- `table_without_primary_key` (warning) — a `CREATE TABLE` declares no `PRIMARY KEY`
+- `no_statements` — nothing parsed
+## Limitations (v0.1)
+Parsing is heuristic: comments are stripped, statements are split on top-level
+semicolons (quote-aware), and classification is keyword/regex based. It is tuned
+for migration and schema files, not for validating arbitrary vendor SQL dialects.

tesserakit_sql-0.4.0/README.md ADDED Viewed

@@ -0,0 +1,51 @@
+# tesserakit-sql
+Lint SQL files and migrations into a statement and table catalog.
+`tessera-sql` parses `.sql` files with lightweight heuristics (no database connection, no execution), builds a catalog of statements and declared tables, and flags high-signal migration-safety issues.
+## Lint SQL
+```bash
+tessera sql lint --input migrations/ --output ./out/sql_pack
+tessera sql lint --input schema.sql --output ./out/sql_pack
+```
+Artifacts written:
+```text
+statements.jsonl         one SqlStatement per parsed statement (kind, target, flags)
+tables.jsonl             one SqlTable per CREATE TABLE (columns, primary-key flag)
+index.md                 statement catalog
+validation_report.md     safety findings
+coverage_report.md       statement-kind distribution
+tables.md                table catalog with columns and PK status
+```
+## Lint rules
+Query safety:
+- `delete_without_where` (error) — `DELETE` with no `WHERE` removes every row
+- `update_without_where` (warning) — `UPDATE` with no `WHERE` writes every row
+- `select_star` (info) — `SELECT *` couples the query to column shape
+Migration safety (the costly, easy-to-miss class):
+- `add_not_null_without_default` (error) — `ALTER TABLE ... ADD COLUMN ... NOT NULL` with no `DEFAULT` rewrites the table and fails on existing rows
+- `truncate_table` (warning) — `TRUNCATE` wipes all rows and is often non-transactional / irreversible
+- `drop_column` (warning) — dropping a column is destructive and irreversible
+- `rename_breaks_compatibility` (warning) — `RENAME` breaks code referencing the old name; prefer add-new + backfill + drop-old
+- `drop_without_if_exists` (warning) — `DROP` without `IF EXISTS` fails if the object is absent
+- `create_table_without_if_not_exists` (info) — non-idempotent if the migration re-runs
+Schema:
+- `table_without_primary_key` (warning) — a `CREATE TABLE` declares no `PRIMARY KEY`
+- `no_statements` — nothing parsed
+## Limitations (v0.1)
+Parsing is heuristic: comments are stripped, statements are split on top-level
+semicolons (quote-aware), and classification is keyword/regex based. It is tuned
+for migration and schema files, not for validating arbitrary vendor SQL dialects.

tesserakit_sql-0.4.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,40 @@
+[build-system]
+requires = ["hatchling>=1.25"]
+build-backend = "hatchling.build"
+[project]
+name = "tesserakit-sql"
+version = "0.4.0"
+description = "SQL job pack for Tessera: lint SQL files/migrations into a statement and table catalog."
+readme = "README.md"
+requires-python = ">=3.10"
+authors = [{ name = "Shailesh Rawat" }]
+dependencies = [
+  "tesserakit-core>=0.1.0",
+  "typer>=0.12",
+  "rich>=13.7",
+  "pydantic>=2.7",
+]
+classifiers = [
+  "Development Status :: 3 - Alpha",
+  "Environment :: Console",
+  "Intended Audience :: Developers",
+  "Programming Language :: Python :: 3",
+]
+[project.urls]
+Homepage = "https://github.com/ShaileshRawat1403/tessera"
+Repository = "https://github.com/ShaileshRawat1403/tessera"
+Issues = "https://github.com/ShaileshRawat1403/tessera/issues"
+[project.optional-dependencies]
+dev = ["pytest>=8.0"]
+[project.entry-points."tessera.commands"]
+sql = "tessera_sql.cli:register"
+[project.entry-points."tessera.jobpacks"]
+sql = "tessera_sql.pack:create_pack"
+[tool.hatch.build.targets.wheel]
+packages = ["src/tessera_sql"]

tesserakit_sql-0.4.0/src/tessera_sql/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Tessera sql pack."""
+__version__ = "0.3.1"

tesserakit_sql-0.4.0/src/tessera_sql/cli.py ADDED Viewed

@@ -0,0 +1,45 @@
+from __future__ import annotations
+from pathlib import Path
+import typer
+from rich.console import Console
+from rich.table import Table
+from tessera_core.models import RunContext
+from tessera_sql.pack import SqlPack
+console = Console()
+sql_app = typer.Typer(help="Lint SQL files/migrations into a statement and table catalog.")
+@sql_app.command("lint")
+def lint_cmd(
+    input: Path = typer.Option(..., "--input", "-i", exists=True, readable=True, help="A .sql file or a directory of them."),
+    output: Path = typer.Option(Path("sql_pack"), "--output", "-o", help="Output directory."),
+) -> None:
+    """Parse and lint SQL; emit statement/table catalogs and findings."""
+    ctx = RunContext(job_name="sql", output_dir=output)
+    pack = SqlPack()
+    artifacts = pack.run(input_path=input, ctx=ctx, options={})
+    table = Table(title="SQL Pack Created")
+    table.add_column("Artifact")
+    table.add_column("Path")
+    table.add_column("Kind")
+    for art in artifacts:
+        table.add_row(art.name, str(art.path), art.kind)
+    console.print(table)
+    summary = Table(title="Run Summary")
+    summary.add_column("Metric")
+    summary.add_column("Value")
+    summary.add_row("run_id", ctx.run_id)
+    summary.add_row("statements", str(ctx.metadata.get("record_count", 0)))
+    summary.add_row("findings", str(ctx.metadata.get("finding_count", 0)))
+    console.print(summary)
+def register(root_app: typer.Typer) -> None:
+    root_app.add_typer(sql_app, name="sql")

tesserakit_sql-0.4.0/src/tessera_sql/compiler.py ADDED Viewed

@@ -0,0 +1,115 @@
+from __future__ import annotations
+from collections import Counter
+from pathlib import Path
+from typing import Any
+from tessera_core.artifacts import write_jsonl, write_markdown
+from tessera_core.models import Artifact, RunContext, ValidationFinding
+from tessera_sql.loader import load_sql_records
+from tessera_sql.schema import SqlStatement, SqlTable
+from tessera_sql.validator import validate_sql_records
+def load_records(input_path: Path, options: dict[str, Any]) -> list[SqlStatement]:
+    return load_sql_records(input_path, options)
+def validate_records(statements: list[SqlStatement], options: dict[str, Any]) -> list[ValidationFinding]:
+    return validate_sql_records(statements, options)
+def write_artifacts(statements: list[SqlStatement], ctx: RunContext, options: dict[str, Any]) -> list[Artifact]:
+    ctx.output_dir.mkdir(parents=True, exist_ok=True)
+    tables: list[SqlTable] = options.get("_tables", [])
+    findings: list[ValidationFinding] = ctx.metadata.get("findings") or validate_records(statements, options)
+    statements_jsonl = ctx.output_dir / "statements.jsonl"
+    tables_jsonl = ctx.output_dir / "tables.jsonl"
+    index_md = ctx.output_dir / "index.md"
+    validation_md = ctx.output_dir / "validation_report.md"
+    coverage_md = ctx.output_dir / "coverage_report.md"
+    tables_md = ctx.output_dir / "tables.md"
+    write_jsonl(statements_jsonl, [s.model_dump() for s in statements])
+    write_jsonl(tables_jsonl, [t.model_dump() for t in tables])
+    write_markdown(index_md, _render_index(statements, tables, options))
+    write_markdown(validation_md, _render_validation(statements, findings))
+    write_markdown(coverage_md, _render_coverage(statements))
+    write_markdown(tables_md, _render_tables(tables))
+    return [
+        Artifact(name="statements.jsonl", path=statements_jsonl, kind="jsonl"),
+        Artifact(name="tables.jsonl", path=tables_jsonl, kind="jsonl"),
+        Artifact(name="index.md", path=index_md, kind="markdown"),
+        Artifact(name="validation_report.md", path=validation_md, kind="markdown"),
+        Artifact(name="coverage_report.md", path=coverage_md, kind="markdown"),
+        Artifact(name="tables.md", path=tables_md, kind="markdown"),
+    ]
+def _render_index(statements: list[SqlStatement], tables: list[SqlTable], options: dict[str, Any]) -> str:
+    lines = ["# SQL Catalog", ""]
+    lines.append(f"- Files: {options.get('_file_count', 0)}")
+    lines.append(f"- Statements: {len(statements)}")
+    lines.append(f"- Tables created: {len(tables)}")
+    lines.append("")
+    if not statements:
+        lines.append("_No statements found._")
+        return "\n".join(lines) + "\n"
+    lines.append("| Kind | Target | File:Line |")
+    lines.append("|---|---|---|")
+    for s in statements:
+        lines.append(f"| {s.kind} | {s.target or '-'} | `{s.file}:{s.lineno}` |")
+    return "\n".join(lines) + "\n"
+def _render_validation(statements: list[SqlStatement], findings: list[ValidationFinding]) -> str:
+    lines = ["# Validation Report", ""]
+    lines.append(f"- Statements: {len(statements)}")
+    lines.append(f"- Findings: {len(findings)}")
+    lines.append("")
+    by_sev = Counter(f.severity for f in findings)
+    lines.append("## Severity Breakdown")
+    lines.append("")
+    for sev in ("error", "warning", "info"):
+        lines.append(f"- {sev}: {by_sev.get(sev, 0)}")
+    lines.append("")
+    if findings:
+        lines.append("## Findings")
+        lines.append("")
+        for f in findings[:200]:
+            lines.append(f"- **{f.severity.upper()}** `{f.code}`: {f.message}")
+    return "\n".join(lines)
+def _render_coverage(statements: list[SqlStatement]) -> str:
+    lines = ["# Coverage Report", ""]
+    lines.append(f"- Statements: {len(statements)}")
+    if not statements:
+        return "\n".join(lines) + "\n"
+    kind_dist = Counter(s.kind for s in statements)
+    lines.append("")
+    lines.append("## Statement kinds")
+    lines.append("")
+    for kind, n in kind_dist.most_common():
+        lines.append(f"- `{kind}`: {n}")
+    return "\n".join(lines) + "\n"
+def _render_tables(tables: list[SqlTable]) -> str:
+    lines = ["# Tables", ""]
+    lines.append(f"- Count: {len(tables)}")
+    lines.append("")
+    if not tables:
+        lines.append("_No CREATE TABLE statements found._")
+        return "\n".join(lines) + "\n"
+    for t in tables:
+        pk = "yes" if t.has_primary_key else "NO"
+        lines.append(f"## `{t.name}` (PK: {pk})")
+        lines.append("")
+        lines.append(f"- Source: `{t.file}:{t.lineno}`")
+        lines.append(f"- Columns ({len(t.columns)}): {', '.join(f'`{c}`' for c in t.columns) or '(none parsed)'}")
+        lines.append("")
+    return "\n".join(lines)

tesserakit_sql-0.4.0/src/tessera_sql/loader.py ADDED Viewed

@@ -0,0 +1,62 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from tessera_sql.parse import (
+    classify,
+    parse_create_table,
+    split_statements,
+    statement_flags,
+)
+from tessera_sql.schema import SqlStatement, SqlTable
+_IGNORE = {
+    ".git", ".venv", "venv", "node_modules", "__pycache__", ".pytest_cache",
+    "dist", "build", ".tox", "target",
+}
+def discover_sql_files(root: Path) -> list[Path]:
+    if root.is_file():
+        return [root]
+    out: list[Path] = []
+    for p in sorted(root.rglob("*.sql")):
+        if any(part in _IGNORE for part in p.relative_to(root).parts):
+            continue
+        out.append(p)
+    return out
+def load_sql_records(input_path: Path, options: dict[str, Any]) -> list[SqlStatement]:
+    """Parse SQL files into statements; stash discovered tables in options."""
+    root = input_path if input_path.is_dir() else input_path.parent
+    files = discover_sql_files(input_path if input_path.is_file() else root)
+    statements: list[SqlStatement] = []
+    tables: list[SqlTable] = []
+    for f in files:
+        try:
+            text = f.read_text(encoding="utf-8")
+        except (OSError, UnicodeDecodeError):
+            continue
+        rel = f.relative_to(root).as_posix() if f.is_relative_to(root) else f.name
+        for stmt_text, lineno in split_statements(text):
+            kind, target = classify(stmt_text)
+            flags = statement_flags(kind, stmt_text)
+            preview = " ".join(stmt_text.split())[:100]
+            statements.append(
+                SqlStatement(kind=kind, target=target, file=rel, lineno=lineno, preview=preview, flags=flags)
+            )
+            if kind == "create_table":
+                t = parse_create_table(stmt_text, target)
+                if t is not None:
+                    t.file = rel
+                    t.lineno = lineno
+                    tables.append(t)
+    options["_tables"] = tables
+    options["_file_count"] = len(files)
+    options["_root"] = str(root)
+    return statements

tesserakit_sql-0.4.0/src/tessera_sql/pack.py ADDED Viewed

@@ -0,0 +1,36 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from tessera_core.jobpack import JobPack
+from tessera_core.models import Artifact, RunContext, ValidationFinding
+from tessera_sql.compiler import load_records, validate_records, write_artifacts
+class SqlPack(JobPack):
+    name = "sql"
+    version = "0.3.1"
+    def normalize(self, input_path: Path, options: dict[str, Any]) -> list[Any]:
+        return load_records(input_path, options)
+    def validate(
+        self,
+        records: list[Any],
+        options: dict[str, Any],
+    ) -> list[ValidationFinding]:
+        return validate_records(records, options)
+    def generate(
+        self,
+        records: list[Any],
+        ctx: RunContext,
+        options: dict[str, Any],
+    ) -> list[Artifact]:
+        return write_artifacts(records, ctx, options)
+def create_pack() -> SqlPack:
+    return SqlPack()

tesserakit_sql-0.4.0/src/tessera_sql/parse.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Lightweight SQL parsing: strip comments, split statements, classify, extract.
+Not a full SQL grammar. It strips comments, splits on top-level semicolons,
+and uses keyword/regex heuristics to classify statements and pull out the
+high-signal facts a migration reviewer cares about.
+"""
+from __future__ import annotations
+import re
+from tessera_sql.schema import SqlStatement, SqlTable
+_LINE_COMMENT = re.compile(r"--[^\n]*")
+_BLOCK_COMMENT = re.compile(r"/\*.*?\*/", re.DOTALL)
+_IDENT = r'[`"\[]?([A-Za-z_][A-Za-z0-9_.$]*)[`"\]]?'
+def strip_comments(sql: str) -> str:
+    sql = _BLOCK_COMMENT.sub(" ", sql)
+    sql = _LINE_COMMENT.sub("", sql)
+    return sql
+def split_statements(sql: str) -> list[tuple[str, int]]:
+    """Split into (statement_text, line_number) on top-level semicolons.
+    Semicolons inside single/double quotes are ignored.
+    """
+    cleaned = strip_comments(sql)
+    statements: list[tuple[str, int]] = []
+    buf: list[str] = []
+    line = 1
+    start_line = 1
+    quote: str | None = None
+    for ch in cleaned:
+        if ch == "\n":
+            line += 1
+        if quote:
+            buf.append(ch)
+            if ch == quote:
+                quote = None
+            continue
+        if ch in ("'", '"'):
+            quote = ch
+            buf.append(ch)
+            continue
+        if ch == ";":
+            text = "".join(buf).strip()
+            if text:
+                statements.append((text, start_line))
+            buf = []
+            start_line = line
+            continue
+        if not buf and ch.strip() == "":
+            start_line = line
+        buf.append(ch)
+    tail = "".join(buf).strip()
+    if tail:
+        statements.append((tail, start_line))
+    return statements
+def classify(stmt: str) -> tuple[str, str]:
+    """Return (kind, target_name)."""
+    s = stmt.lstrip()
+    low = s.lower()
+    def grab(pat: str) -> str:
+        m = re.search(pat, s, re.IGNORECASE)
+        return m.group(1) if m else ""
+    if low.startswith("create") and re.search(r"create\s+(temp\w*\s+)?table", low):
+        return "create_table", grab(rf"create\s+(?:temp\w*\s+)?table\s+(?:if\s+not\s+exists\s+)?{_IDENT}")
+    if low.startswith("create") and "index" in low.split("(")[0]:
+        return "create_index", grab(rf"index\s+(?:if\s+not\s+exists\s+)?{_IDENT}")
+    if low.startswith("alter"):
+        return "alter", grab(rf"alter\s+table\s+{_IDENT}")
+    if low.startswith("truncate"):
+        return "truncate", grab(rf"truncate\s+(?:table\s+)?{_IDENT}")
+    if low.startswith("drop"):
+        return "drop", grab(rf"drop\s+\w+\s+(?:if\s+exists\s+)?{_IDENT}")
+    if low.startswith("insert"):
+        return "insert", grab(rf"insert\s+into\s+{_IDENT}")
+    if low.startswith("update"):
+        return "update", grab(rf"update\s+{_IDENT}")
+    if low.startswith("delete"):
+        return "delete", grab(rf"delete\s+from\s+{_IDENT}")
+    if low.startswith("select") or low.startswith("with"):
+        return "select", ""
+    return "other", ""
+def statement_flags(kind: str, stmt: str) -> dict:
+    low = stmt.lower()
+    flags: dict = {}
+    if kind in ("update", "delete"):
+        flags["has_where"] = bool(re.search(r"\bwhere\b", low))
+    if kind == "drop":
+        flags["if_exists"] = "if exists" in low
+        flags["drops_column"] = bool(re.search(r"\bdrop\s+column\b", low))  # only via ALTER, but guard anyway
+    if kind == "select":
+        # SELECT * (not count(*))
+        flags["select_star"] = bool(re.search(r"select\s+\*", low))
+    if kind == "create_table":
+        flags["if_not_exists"] = "if not exists" in low
+    if kind == "alter":
+        adds_col = bool(re.search(r"\badd\s+(column\s+)?", low))
+        flags["adds_column"] = adds_col
+        flags["drops_column"] = bool(re.search(r"\bdrop\s+(column\s+)?", low))
+        flags["renames"] = bool(re.search(r"\brename\b", low))
+        # locking risk: ADD COLUMN ... NOT NULL without a DEFAULT rewrites the table
+        if adds_col and re.search(r"\bnot\s+null\b", low) and not re.search(r"\bdefault\b", low):
+            flags["add_not_null_without_default"] = True
+    return flags
+def parse_create_table(stmt: str, target: str) -> SqlTable | None:
+    m = re.search(r"\((.*)\)", stmt, re.DOTALL)
+    if not m:
+        return SqlTable(name=target, columns=[], has_primary_key=False)
+    body = m.group(1)
+    columns: list[str] = []
+    has_pk = bool(re.search(r"primary\s+key", body, re.IGNORECASE))
+    for part in _split_top_level(body):
+        p = part.strip()
+        if not p:
+            continue
+        low = p.lower()
+        if low.startswith(("primary key", "foreign key", "unique", "constraint", "check", "index", "key ")):
+            continue
+        m2 = re.match(_IDENT, p)
+        if m2:
+            columns.append(m2.group(1))
+            if "primary key" in low:
+                has_pk = True
+    return SqlTable(name=target, columns=columns, has_primary_key=has_pk)
+def _split_top_level(body: str) -> list[str]:
+    parts: list[str] = []
+    depth = 0
+    buf: list[str] = []
+    for ch in body:
+        if ch == "(":
+            depth += 1
+        elif ch == ")":
+            depth -= 1
+        if ch == "," and depth == 0:
+            parts.append("".join(buf))
+            buf = []
+        else:
+            buf.append(ch)
+    if buf:
+        parts.append("".join(buf))
+    return parts

tesserakit_sql-0.4.0/src/tessera_sql/schema.py ADDED Viewed

@@ -0,0 +1,26 @@
+from __future__ import annotations
+from typing import Any
+from pydantic import BaseModel, Field
+class SqlStatement(BaseModel):
+    """One SQL statement. Serialized to ``statements.jsonl``."""
+    kind: str          # create_table / create_index / alter / drop / select / insert / update / delete / other
+    target: str = ""   # table/index name when determinable
+    file: str = ""
+    lineno: int = 0
+    preview: str = ""  # first ~100 chars, comments stripped
+    flags: dict[str, Any] = Field(default_factory=dict)  # parser observations (has_where, if_exists, select_star, ...)
+class SqlTable(BaseModel):
+    """A table declared by a CREATE TABLE. Serialized to ``tables.jsonl``."""
+    name: str
+    columns: list[str] = Field(default_factory=list)
+    has_primary_key: bool = False
+    file: str = ""
+    lineno: int = 0

tesserakit_sql-0.4.0/src/tessera_sql/validator.py ADDED Viewed

@@ -0,0 +1,66 @@
+from __future__ import annotations
+from typing import Any
+from tessera_core.models import ValidationFinding
+from tessera_sql.schema import SqlStatement, SqlTable
+def validate_sql_records(statements: list[SqlStatement], options: dict[str, Any]) -> list[ValidationFinding]:
+    findings: list[ValidationFinding] = []
+    if not statements:
+        findings.append(ValidationFinding(severity="info", code="no_statements",
+                                          message="no SQL statements found", field=None))
+        return findings
+    for s in statements:
+        loc = f"{s.file}:{s.lineno}"
+        def f(severity: str, code: str, message: str) -> ValidationFinding:
+            return ValidationFinding(severity=severity, code=code, message=message,
+                                     field="sql", metadata={"file": s.file, "lineno": s.lineno, "kind": s.kind})
+        if s.kind == "delete" and s.flags.get("has_where") is False:
+            findings.append(f("error", "delete_without_where",
+                              f"{loc}: DELETE without WHERE removes every row"))
+        if s.kind == "update" and s.flags.get("has_where") is False:
+            findings.append(f("warning", "update_without_where",
+                              f"{loc}: UPDATE without WHERE writes every row"))
+        if s.kind == "drop" and not s.flags.get("if_exists"):
+            findings.append(f("warning", "drop_without_if_exists",
+                              f"{loc}: DROP without IF EXISTS fails if the object is absent"))
+        if s.kind == "select" and s.flags.get("select_star"):
+            findings.append(f("info", "select_star",
+                              f"{loc}: SELECT * couples the query to column order/shape"))
+        # --- migration-safety rules (the costly, easy-to-miss ones) ---
+        if s.kind == "truncate":
+            findings.append(f("warning", "truncate_table",
+                              f"{loc}: TRUNCATE removes all rows and is often non-transactional / non-reversible"))
+        if s.kind == "alter" and s.flags.get("add_not_null_without_default"):
+            findings.append(f("error", "add_not_null_without_default",
+                              f"{loc}: ADD COLUMN NOT NULL without DEFAULT rewrites the table and fails on existing rows"))
+        if s.kind == "alter" and s.flags.get("drops_column"):
+            findings.append(f("warning", "drop_column",
+                              f"{loc}: dropping a column is destructive and irreversible; ensure no code still reads it"))
+        if s.kind == "alter" and s.flags.get("renames"):
+            findings.append(f("warning", "rename_breaks_compatibility",
+                              f"{loc}: RENAME breaks any code/queries referencing the old name; prefer add-new + backfill + drop-old"))
+        if s.kind == "create_table" and not s.flags.get("if_not_exists"):
+            findings.append(f("info", "create_table_without_if_not_exists",
+                              f"{loc}: CREATE TABLE without IF NOT EXISTS is not idempotent if the migration re-runs"))
+    tables: list[SqlTable] = options.get("_tables", [])
+    for t in tables:
+        if not t.has_primary_key:
+            findings.append(
+                ValidationFinding(
+                    severity="warning", code="table_without_primary_key",
+                    message=f"{t.file}:{t.lineno}: table `{t.name}` has no PRIMARY KEY",
+                    field="sql", metadata={"table": t.name, "file": t.file, "lineno": t.lineno},
+                )
+            )
+    return findings

tesserakit_sql-0.4.0/tests/test_sql_pack.py ADDED Viewed

@@ -0,0 +1,148 @@
+from __future__ import annotations
+import json
+from pathlib import Path
+from tessera_core.models import RunContext
+from tessera_sql.pack import SqlPack
+from tessera_sql.parse import classify, parse_create_table, split_statements, statement_flags
+from tessera_sql.schema import SqlStatement
+REPO_ROOT = Path(__file__).resolve().parents[3]
+SAMPLE = REPO_ROOT / "examples" / "sql" / "schema.sql"
+# ---------- parsing ----------
+def test_split_ignores_semicolons_in_strings():
+    sql = "INSERT INTO t VALUES ('a;b'); SELECT 1;"
+    stmts = [s for s, _ in split_statements(sql)]
+    assert len(stmts) == 2
+    assert "a;b" in stmts[0]
+def test_split_strips_comments():
+    sql = "-- a comment\nSELECT 1; /* block\ncomment */ SELECT 2;"
+    stmts = [s for s, _ in split_statements(sql)]
+    assert len(stmts) == 2
+    assert "comment" not in " ".join(stmts)
+def test_classify_kinds():
+    assert classify("CREATE TABLE users (id int)")[0] == "create_table"
+    assert classify("create index idx on t(a)")[0] == "create_index"
+    assert classify("ALTER TABLE users ADD COLUMN x int")[0] == "alter"
+    assert classify("DROP TABLE t")[0] == "drop"
+    assert classify("DELETE FROM t")[0] == "delete"
+    assert classify("UPDATE t SET a=1")[0] == "update"
+    assert classify("SELECT * FROM t")[0] == "select"
+def test_classify_targets():
+    assert classify("CREATE TABLE users (id int)")[1] == "users"
+    assert classify("DELETE FROM sessions WHERE x=1")[1] == "sessions"
+def test_flags():
+    assert statement_flags("delete", "DELETE FROM t")["has_where"] is False
+    assert statement_flags("delete", "DELETE FROM t WHERE a=1")["has_where"] is True
+    assert statement_flags("drop", "DROP TABLE t")["if_exists"] is False
+    assert statement_flags("drop", "DROP TABLE IF EXISTS t")["if_exists"] is True
+    assert statement_flags("select", "SELECT * FROM t")["select_star"] is True
+def test_parse_create_table_columns_and_pk():
+    t = parse_create_table("CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT)", "users")
+    assert t.name == "users"
+    assert "id" in t.columns and "email" in t.columns
+    assert t.has_primary_key is True
+    t2 = parse_create_table("CREATE TABLE logs (message TEXT, created_at TIMESTAMP)", "logs")
+    assert t2.has_primary_key is False
+# ---------- end-to-end ----------
+def _run(tmp_path: Path):
+    out = tmp_path / "sql_pack"
+    ctx = RunContext(job_name="sql", output_dir=out)
+    SqlPack().run(input_path=SAMPLE, ctx=ctx, options={})
+    return out, ctx
+def test_findings(tmp_path: Path):
+    _, ctx = _run(tmp_path)
+    codes = {f.code for f in ctx.metadata["findings"]}
+    assert "delete_without_where" in codes        # DELETE FROM sessions;
+    assert "update_without_where" in codes         # UPDATE users SET active=false;
+    assert "drop_without_if_exists" in codes        # DROP TABLE temp_data;
+    assert "table_without_primary_key" in codes     # logs
+    assert "select_star" in codes                   # SELECT * FROM users
+def test_safe_statements_not_flagged(tmp_path: Path):
+    _, ctx = _run(tmp_path)
+    # guarded delete + guarded drop should not raise their dangerous-variant codes for those lines
+    delete_findings = [f for f in ctx.metadata["findings"] if f.code == "delete_without_where"]
+    # only the unguarded DELETE should be flagged, not the WHERE one
+    assert len(delete_findings) == 1
+def test_migration_safety_findings(tmp_path: Path):
+    _, ctx = _run(tmp_path)
+    codes = {f.code for f in ctx.metadata["findings"]}
+    assert "add_not_null_without_default" in codes   # ALTER ... ADD COLUMN phone TEXT NOT NULL
+    assert "drop_column" in codes                     # ALTER ... DROP COLUMN active
+    assert "rename_breaks_compatibility" in codes      # ALTER ... RENAME TO audit_logs
+    assert "truncate_table" in codes                   # TRUNCATE TABLE audit_logs
+    assert "create_table_without_if_not_exists" in codes  # users / logs
+def test_add_column_with_default_not_flagged(tmp_path: Path):
+    """ADD COLUMN ... DEFAULT '' is safe and must not raise add_not_null_without_default."""
+    _, ctx = _run(tmp_path)
+    offenders = [
+        f for f in ctx.metadata["findings"]
+        if f.code == "add_not_null_without_default"
+    ]
+    # only the phone column (no default) should be flagged, not nickname (has default)
+    assert len(offenders) == 1
+def test_if_not_exists_create_not_flagged(tmp_path: Path):
+    """CREATE TABLE IF NOT EXISTS settings must not trigger the idempotency info."""
+    from tessera_sql.parse import classify, statement_flags
+    kind, _ = classify("CREATE TABLE IF NOT EXISTS settings (id INTEGER PRIMARY KEY)")
+    flags = statement_flags(kind, "CREATE TABLE IF NOT EXISTS settings (id INTEGER PRIMARY KEY)")
+    assert kind == "create_table"
+    assert flags["if_not_exists"] is True
+def test_truncate_classified():
+    from tessera_sql.parse import classify
+    kind, target = classify("TRUNCATE TABLE audit_logs")
+    assert kind == "truncate"
+    assert target == "audit_logs"
+def test_artifacts_and_tables(tmp_path: Path):
+    out, _ = _run(tmp_path)
+    names = {p.name for p in out.iterdir()}
+    assert {
+        "statements.jsonl", "tables.jsonl", "index.md",
+        "validation_report.md", "coverage_report.md", "tables.md",
+    } <= names
+    tables = [json.loads(l) for l in (out / "tables.jsonl").read_text().splitlines()]
+    by_name = {t["name"]: t for t in tables}
+    assert by_name["users"]["has_primary_key"] is True
+    assert by_name["logs"]["has_primary_key"] is False
+def test_statement_round_trip(tmp_path: Path):
+    out, _ = _run(tmp_path)
+    for line in (out / "statements.jsonl").read_text().splitlines():
+        s = SqlStatement.model_validate_json(line)
+        assert s.kind