PyPI - sqlproof - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

sqlproof 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

sqlproof/__init__.py +32 -0
sqlproof/_version.py +1 -0
sqlproof/cli.py +151 -0
sqlproof/client.py +159 -0
sqlproof/config.py +42 -0
sqlproof/contrib/__init__.py +3 -0
sqlproof/contrib/supabase.py +136 -0
sqlproof/core.py +344 -0
sqlproof/coverage/__init__.py +6 -0
sqlproof/coverage/diversity.py +11 -0
sqlproof/coverage/plpgsql.py +5 -0
sqlproof/coverage/schema_shape.py +7 -0
sqlproof/exceptions.py +47 -0
sqlproof/generators/__init__.py +21 -0
sqlproof/generators/columns.py +93 -0
sqlproof/generators/constraints.py +181 -0
sqlproof/generators/functions.py +9 -0
sqlproof/generators/graph.py +51 -0
sqlproof/generators/rows.py +153 -0
sqlproof/generators/sampling.py +15 -0
sqlproof/generators/well_known.py +59 -0
sqlproof/pytest_plugin.py +24 -0
sqlproof/reporter/__init__.py +5 -0
sqlproof/reporter/console.py +20 -0
sqlproof/reporter/json_io.py +26 -0
sqlproof/runners/__init__.py +14 -0
sqlproof/runners/db.py +48 -0
sqlproof/runners/migration.py +51 -0
sqlproof/runners/overload.py +41 -0
sqlproof/runners/property.py +119 -0
sqlproof/runners/rls.py +40 -0
sqlproof/runners/stateful.py +36 -0
sqlproof/schema/__init__.py +27 -0
sqlproof/schema/dependency_graph.py +38 -0
sqlproof/schema/fingerprint.py +34 -0
sqlproof/schema/introspect.py +229 -0
sqlproof/schema/model.py +98 -0
sqlproof/schema/parse_sql.py +206 -0
sqlproof/testing.py +101 -0
sqlproof/types.py +34 -0
sqlproof-0.1.0a1.dist-info/METADATA +248 -0
sqlproof-0.1.0a1.dist-info/RECORD +44 -0
sqlproof-0.1.0a1.dist-info/WHEEL +4 -0
sqlproof-0.1.0a1.dist-info/entry_points.txt +5 -0

sqlproof/core.py ADDED Viewed

@@ -0,0 +1,344 @@
+from __future__ import annotations
+from collections.abc import Callable, Generator, Mapping
+from contextlib import contextmanager
+from pathlib import Path
+from types import TracebackType
+from typing import Any, Self, cast
+import psycopg
+from hypothesis import strategies as st
+from hypothesis.strategies import SearchStrategy
+from psycopg.rows import dict_row
+from psycopg.types.json import Json, Jsonb
+from sqlproof.client import InMemorySqlProofClient, PsycopgSqlProofClient, SqlProofClient
+from sqlproof.config import ExternalSeed, ExternalTableSpec, SqlProofConfig
+from sqlproof.exceptions import SqlProofPropertyFailure, SqlProofUsageError
+from sqlproof.generators.graph import ColumnOverrides, Dataset, SizeSpec, dataset_strategy
+from sqlproof.generators.sampling import draw_example
+from sqlproof.schema.dependency_graph import insertion_order
+from sqlproof.schema.fingerprint import compute
+from sqlproof.schema.introspect import introspect_schema
+from sqlproof.schema.model import Column, SchemaInfo, Table
+from sqlproof.schema.parse_sql import parse_schema_sql
+class SqlProof:
+    def __init__(self, config: SqlProofConfig) -> None:
+        from sqlproof.runners.db import DBManager
+        self.config = config
+        self.schema_info = self._load_schema(config)
+        self.schema_fingerprint = compute(self.schema_info)
+        self._db_manager = DBManager(config) if config.connection_string is not None else None
+        self._external_sample_cache: dict[str, list[object]] = {}
+    @classmethod
+    def from_schema_file(cls, path: str | Path, **kwargs: Any) -> Self:
+        return cls(SqlProofConfig(schema_file=path, **kwargs))
+    @classmethod
+    def from_connection_string(cls, dsn: str, **kwargs: Any) -> Self:
+        return cls(SqlProofConfig(connection_string=dsn, **kwargs))
+    @classmethod
+    def from_config(cls, config: SqlProofConfig) -> Self:
+        return cls(config)
+    def customize(self, table: str, **overrides: object) -> Self:
+        del table, overrides
+        return self
+    def dataset_strategy(
+        self,
+        *,
+        sizes: Mapping[str, SizeSpec],
+        columns: ColumnOverrides | None = None,
+    ) -> SearchStrategy[Dataset]:
+        if self.config.external_tables:
+            return self._dataset_strategy_with_external_tables(sizes=sizes, columns=columns)
+        return dataset_strategy(
+            self.schema_info,
+            sizes=sizes,
+            columns=columns,
+        )
+    def _dataset_strategy_with_external_tables(
+        self,
+        *,
+        sizes: Mapping[str, SizeSpec],
+        columns: ColumnOverrides | None,
+    ) -> SearchStrategy[Dataset]:
+        @st.composite
+        def dataset(draw: st.DrawFn) -> Dataset:
+            external_parent_rows = self._external_parent_rows(draw=draw)
+            return draw(
+                dataset_strategy(
+                    self.schema_info,
+                    sizes=sizes,
+                    external_parent_rows=external_parent_rows,
+                    columns=columns,
+                )
+            )
+        return dataset()
+    def run_state_machine(
+        self,
+        machine_class: type,
+        *,
+        settings: Any = None,
+    ) -> None:
+        """Run a `SqlProofStateMachine` subclass against this proof.
+        Binds `self` as the proof for the machine, then dispatches to
+        `hypothesis.stateful.run_state_machine_as_test`. Each example gets
+        an isolated dataset client; writes from one example are rolled back
+        before the next begins.
+        """
+        from hypothesis.stateful import run_state_machine_as_test
+        from sqlproof.testing import SqlProofStateMachine
+        if not isinstance(machine_class, type) or not issubclass(
+            machine_class, SqlProofStateMachine
+        ):
+            msg = "machine_class must be a subclass of SqlProofStateMachine."
+            raise SqlProofUsageError(msg)
+        bound_class = type(
+            machine_class.__name__,
+            (machine_class,),
+            {"_sqlproof_proof": self},
+        )
+        run_state_machine_as_test(bound_class, settings=settings)
+    @contextmanager
+    def client_for_dataset(
+        self, dataset: dict[str, list[dict[str, Any]]]
+    ) -> Generator[SqlProofClient]:
+        if self._db_manager is None:
+            yield InMemorySqlProofClient(dataset)
+            return
+        with self._db_manager.acquire() as client:
+            client.execute("SAVEPOINT sqlproof_run")
+            try:
+                _insert_dataset(client, self.schema_info, dataset)
+                yield client
+            finally:
+                client.execute("ROLLBACK TO SAVEPOINT sqlproof_run")
+                client.execute("RELEASE SAVEPOINT sqlproof_run")
+    def check(
+        self,
+        name: str,
+        *,
+        sizes: Mapping[str, SizeSpec],
+        property: Callable[..., None],
+        setup: object | None = None,
+        runs: int = 100,
+        seed: int | None = None,
+        timeout_ms: int = 5000,
+        commit: bool = False,
+    ) -> None:
+        from sqlproof.runners.property import run_property
+        del name, setup, seed, timeout_ms, commit
+        if not callable(property):
+            msg = "property must be callable"
+            raise TypeError(msg)
+        run_property(self, property, sizes=sizes, runs=runs, failure_dir=Path(".sqlproof/failures"))
+    def invariant(
+        self,
+        name: str,
+        *,
+        sizes: Mapping[str, SizeSpec],
+        query: str,
+        expect_empty: bool = True,
+        runs: int = 100,
+        seed: int | None = None,
+        timeout_ms: int = 5000,
+    ) -> None:
+        del seed, timeout_ms
+        strategy = self.dataset_strategy(sizes=sizes)
+        for run_index in range(runs):
+            client = InMemorySqlProofClient(draw_example(strategy))
+            rows = client.query(query)
+            failed = bool(rows) if expect_empty else not rows
+            if failed:
+                payload = {
+                    "property_name": name,
+                    "runs": run_index + 1,
+                    "row_context": {},
+                    "dataset": client.get_generated_data(),
+                    "schema_fingerprint": self.schema_fingerprint,
+                }
+                raise SqlProofPropertyFailure(
+                    f"Invariant {name!r} failed: query returned {len(rows)} rows.",
+                    counterexample=payload,
+                )
+    def disconnect(self) -> None:
+        if self._db_manager is not None:
+            self._db_manager.stop()
+        return None
+    def __enter__(self) -> Self:
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        self.disconnect()
+    @staticmethod
+    def _load_schema(config: SqlProofConfig) -> SchemaInfo:
+        if config.schema_file is not None:
+            path = Path(config.schema_file)
+            return parse_schema_sql(path.read_text(encoding="utf-8"), schema=config.schema)
+        if config.connection_string is not None:
+            connection = psycopg.connect(
+                conninfo=config.connection_string,
+                autocommit=True,
+                row_factory=cast(Any, dict_row),
+            )
+            try:
+                return introspect_schema(connection, schema=config.schema)
+            finally:
+                connection.close()
+        return SchemaInfo()
+    def _external_parent_rows(
+        self,
+        *,
+        draw: st.DrawFn | None = None,
+    ) -> dict[str, list[dict[str, Any]]]:
+        if not self.config.external_tables:
+            return {}
+        if self.config.connection_string is None:
+            msg = "external_tables requires a connection_string-backed SqlProof instance."
+            raise SqlProofUsageError(msg)
+        connection = psycopg.connect(
+            conninfo=self.config.connection_string,
+            autocommit=True,
+            row_factory=cast(Any, dict_row),
+        )
+        try:
+            client = PsycopgSqlProofClient(connection)
+            return _external_parent_rows(
+                self.config.external_tables,
+                client,
+                draw=draw,
+                sample_cache=self._external_sample_cache,
+            )
+        finally:
+            connection.close()
+def _insert_dataset(
+    client: SqlProofClient,
+    schema_info: SchemaInfo,
+    dataset: dict[str, list[dict[str, Any]]],
+) -> None:
+    for table in insertion_order(schema_info.tables):
+        rows = dataset.get(table.name, [])
+        for row in rows:
+            if not row:
+                continue
+            columns = list(row)
+            placeholders = ", ".join(["%s"] * len(columns))
+            column_sql = ", ".join(_quote_identifier(column) for column in columns)
+            table_sql = f"{_quote_identifier(table.schema)}.{_quote_identifier(table.name)}"
+            sql = f"INSERT INTO {table_sql} ({column_sql}) VALUES ({placeholders})"
+            values = [_adapt_insert_value(table, column, row[column]) for column in columns]
+            client.execute(sql, *values)
+def _quote_identifier(identifier: str) -> str:
+    return '"' + identifier.replace('"', '""') + '"'
+def _adapt_insert_value(table: Table, column_name: str, value: Any) -> object:
+    column = table.column(column_name)
+    type_name = _base_type_name(column)
+    if type_name == "jsonb":
+        return Jsonb(value)
+    if type_name == "json":
+        return Json(value)
+    return value
+def _base_type_name(column: Column) -> str:
+    pg_type = column.type
+    while pg_type.base is not None:
+        pg_type = pg_type.base
+    return pg_type.name.lower()
+def _external_parent_rows(
+    specs: Mapping[str, ExternalTableSpec],
+    client: SqlProofClient,
+    *,
+    draw: st.DrawFn | None = None,
+    sample_cache: dict[str, list[object]] | None = None,
+) -> dict[str, list[dict[str, Any]]]:
+    rows_by_table: dict[str, list[dict[str, Any]]] = {}
+    for table_name, spec in specs.items():
+        seed_count = _draw_seed_count(spec.seed_count, draw=draw)
+        if spec.seed is not None:
+            _call_external_seed(spec.seed, client, seed_count)
+        sampled_values = _sample_external_values(
+            table_name,
+            spec,
+            client,
+            sample_cache=sample_cache,
+        )
+        if seed_count is not None:
+            sampled_values = sampled_values[:seed_count]
+        rows = [{spec.primary_key: value} for value in sampled_values]
+        rows_by_table[table_name] = rows
+        if "." in table_name:
+            rows_by_table.setdefault(table_name.rsplit(".", 1)[1], rows)
+    return rows_by_table
+def _sample_external_values(
+    table_name: str,
+    spec: ExternalTableSpec,
+    client: SqlProofClient,
+    *,
+    sample_cache: dict[str, list[object]] | None,
+) -> list[object]:
+    if spec.seed is not None or sample_cache is None:
+        return list(spec.sample(client))
+    if table_name not in sample_cache:
+        sample_cache[table_name] = list(spec.sample(client))
+    return sample_cache[table_name]
+def _draw_seed_count(size: SizeSpec | None, *, draw: st.DrawFn | None) -> int | None:
+    if size is None:
+        return None
+    if isinstance(size, int):
+        return size
+    if draw is None:
+        msg = "ExternalTableSpec.seed_count strategies require dataset_strategy() generation."
+        raise SqlProofUsageError(msg)
+    return draw(size)
+def _call_external_seed(
+    seed: ExternalSeed,
+    client: SqlProofClient,
+    count: int | None,
+) -> None:
+    if count is None:
+        cast(Callable[[SqlProofClient], None], seed)(client)
+        return
+    cast(Callable[[SqlProofClient, int], None], seed)(client, count)

sqlproof/coverage/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from sqlproof.coverage.diversity import diversity_ratio
+from sqlproof.coverage.schema_shape import summarize_dataset_shape
+__all__ = ["diversity_ratio", "summarize_dataset_shape"]

sqlproof/coverage/diversity.py ADDED Viewed

@@ -0,0 +1,11 @@
+from __future__ import annotations
+import json
+from typing import Any
+def diversity_ratio(datasets: list[dict[str, list[dict[str, Any]]]]) -> float:
+    if not datasets:
+        return 0.0
+    fingerprints = {json.dumps(dataset, sort_keys=True, default=str) for dataset in datasets}
+    return len(fingerprints) / len(datasets)

sqlproof/coverage/plpgsql.py ADDED Viewed

@@ -0,0 +1,5 @@
+from __future__ import annotations
+def coverage_available() -> bool:
+    return False

sqlproof/coverage/schema_shape.py ADDED Viewed

@@ -0,0 +1,7 @@
+from __future__ import annotations
+from typing import Any
+def summarize_dataset_shape(dataset: dict[str, list[dict[str, Any]]]) -> dict[str, dict[str, int]]:
+    return {table: {"rows": len(rows)} for table, rows in dataset.items()}

sqlproof/exceptions.py ADDED Viewed

@@ -0,0 +1,47 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+class SqlProofError(Exception):
+    """Base for all SqlProof errors."""
+class SqlProofUsageError(SqlProofError):
+    """Caller misuse: invalid sizes, conflicting decorators, ambiguous types, etc."""
+class SqlProofSchemaError(SqlProofError):
+    """Schema parsing or introspection failure."""
+class CircularDependencyError(SqlProofSchemaError):
+    """FK cycle between distinct tables."""
+class SqlProofGenerationError(SqlProofError):
+    """Data generation exhausted retry budget for assume-and-retry constraints."""
+class SqlProofMappingError(SqlProofError):
+    """query_typed could not map a row to the requested model."""
+class SqlProofTimeoutError(SqlProofError):
+    """A property run exceeded its timeout."""
+@dataclass(slots=True)
+class SqlProofPropertyFailure(SqlProofError):
+    """The property was falsified."""
+    message: str
+    counterexample: dict[str, Any] | None = None
+    def __str__(self) -> str:
+        return self.message
+class SqlProofContainerError(SqlProofError):
+    """testcontainers startup, container died mid-run, etc."""

sqlproof/generators/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+from __future__ import annotations
+from sqlproof.generators.columns import strategy_for_column, strategy_for_type
+from sqlproof.generators.graph import Dataset, SizeSpec, dataset_strategy
+from sqlproof.generators.rows import ColumnContext, ColumnOverrides
+from sqlproof.generators.well_known import emails, phone_numbers, postal_codes, slugs, urls
+__all__ = [
+    "ColumnContext",
+    "ColumnOverrides",
+    "Dataset",
+    "SizeSpec",
+    "dataset_strategy",
+    "emails",
+    "phone_numbers",
+    "postal_codes",
+    "slugs",
+    "strategy_for_column",
+    "strategy_for_type",
+    "urls",
+]

sqlproof/generators/columns.py ADDED Viewed

@@ -0,0 +1,93 @@
+from __future__ import annotations
+from decimal import Decimal
+from typing import Any
+from hypothesis import strategies as st
+from hypothesis.strategies import SearchStrategy
+from sqlproof.schema.model import Column, PgType
+POSTGRES_TEXT_ALPHABET = st.characters(
+    blacklist_characters="\x00",
+    blacklist_categories=("Cs",),
+)
+def strategy_for_column(column: Column) -> SearchStrategy[Any]:
+    strategy = strategy_for_type(column.type)
+    if column.nullable:
+        strategy = st.one_of(st.none(), strategy)
+    return strategy
+def strategy_for_type(pg_type: PgType) -> SearchStrategy[Any]:
+    name = pg_type.name.lower()
+    if pg_type.kind == "enum":
+        return st.sampled_from(pg_type.enum_values)
+    if name in {"smallint", "int2"}:
+        return st.integers(-32_768, 32_767)
+    if name in {"integer", "int", "int4", "serial"}:
+        return st.integers(-2_147_483_648, 2_147_483_647)
+    if name in {"bigint", "int8", "bigserial"}:
+        return st.integers(-(2**63), 2**63 - 1)
+    if name in {"numeric", "decimal"}:
+        places = pg_type.modifiers[1] if len(pg_type.modifiers) > 1 else 2
+        return st.decimals(
+            min_value=Decimal("-1000000"),
+            max_value=Decimal("1000000"),
+            places=places,
+            allow_nan=False,
+            allow_infinity=False,
+        )
+    if name in {"real", "float4"}:
+        return st.floats(width=32, allow_nan=False, allow_infinity=False)
+    if name in {"double precision", "float8"}:
+        return st.floats(allow_nan=False, allow_infinity=False)
+    if name in {"boolean", "bool"}:
+        return st.booleans()
+    if name in {"text", "citext"}:
+        return _postgres_text(max_size=255)
+    if name in {"varchar", "character varying"}:
+        max_size = pg_type.modifiers[0] if pg_type.modifiers else 255
+        return _postgres_text(max_size=max_size)
+    if name in {"char", "character"}:
+        size = pg_type.modifiers[0] if pg_type.modifiers else 1
+        return _postgres_text(min_size=size, max_size=size)
+    if name == "uuid":
+        return st.uuids().map(str)
+    if name in {
+        "timestamp",
+        "timestamp without time zone",
+        "timestamptz",
+        "timestamp with time zone",
+    }:
+        return st.datetimes()
+    if name == "date":
+        return st.dates()
+    if name in {"time", "timetz"}:
+        return st.times()
+    if name == "interval":
+        return st.timedeltas()
+    if name in {"json", "jsonb"}:
+        json_scalar = (
+            st.none()
+            | st.booleans()
+            | st.floats(allow_nan=False, allow_infinity=False)
+            | _postgres_text()
+        )
+        return st.recursive(
+            json_scalar,
+            lambda children: (
+                st.lists(children, max_size=5)
+                | st.dictionaries(_postgres_text(max_size=20), children, max_size=5)
+            ),
+            max_leaves=10,
+        )
+    if name == "bytea":
+        return st.binary()
+    return _postgres_text(max_size=255)
+def _postgres_text(*, min_size: int = 0, max_size: int | None = None) -> SearchStrategy[str]:
+    return st.text(alphabet=POSTGRES_TEXT_ALPHABET, min_size=min_size, max_size=max_size)