PyPI - sql-testing-library - Versions diffs - 0.4.0__py3-none-any.whl - Mend

sql-testing-library 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

sql_testing_library/__init__.py +42 -0
sql_testing_library/_adapters/__init__.py +15 -0
sql_testing_library/_adapters/athena.py +309 -0
sql_testing_library/_adapters/base.py +49 -0
sql_testing_library/_adapters/bigquery.py +139 -0
sql_testing_library/_adapters/redshift.py +219 -0
sql_testing_library/_adapters/snowflake.py +270 -0
sql_testing_library/_adapters/trino.py +263 -0
sql_testing_library/_core.py +502 -0
sql_testing_library/_exceptions.py +55 -0
sql_testing_library/_mock_table.py +200 -0
sql_testing_library/_pytest_plugin.py +451 -0
sql_testing_library/_sql_utils.py +225 -0
sql_testing_library/_types.py +142 -0
sql_testing_library/py.typed +0 -0
sql_testing_library-0.4.0.dist-info/LICENSE +21 -0
sql_testing_library-0.4.0.dist-info/METADATA +956 -0
sql_testing_library-0.4.0.dist-info/RECORD +20 -0
sql_testing_library-0.4.0.dist-info/WHEEL +4 -0
sql_testing_library-0.4.0.dist-info/entry_points.txt +3 -0

sql_testing_library/__init__.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""SQL Testing Library - Test SQL queries with mock data injection."""
+# Import from private modules (leading underscore indicates internal use)
+from ._adapters.base import DatabaseAdapter  # noqa: F401
+from ._core import SQLTestCase, SQLTestFramework  # noqa: F401
+from ._exceptions import (
+    MockTableNotFoundError,  # noqa: F401
+    QuerySizeLimitExceeded,  # noqa: F401
+    SQLParseError,  # noqa: F401
+    SQLTestingError,  # noqa: F401
+    TypeConversionError,  # noqa: F401
+)
+from ._mock_table import BaseMockTable  # noqa: F401
+from ._pytest_plugin import sql_test  # noqa: F401
+# Backward compatibility alias
+TestCase = SQLTestCase
+# Import adapters if their dependencies are available
+try:
+    from ._adapters.bigquery import BigQueryAdapter
+    __all__ = ["BigQueryAdapter"]
+except ImportError:
+    __all__ = []
+__version__ = "0.3.0"
+__all__.extend(
+    [
+        "SQLTestFramework",
+        "TestCase",
+        "BaseMockTable",
+        "DatabaseAdapter",
+        "sql_test",
+        "SQLTestingError",
+        "MockTableNotFoundError",
+        "SQLParseError",
+        "QuerySizeLimitExceeded",
+        "TypeConversionError",
+    ]
+)

sql_testing_library/_adapters/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Database adapters for SQL testing library."""
+from typing import List
+# Lazy import adapters - only import when explicitly requested
+# This prevents loading all heavy database SDKs when just importing the base adapter
+__all__: List[str] = []
+# Individual adapters can be imported directly:
+# from sql_testing_library._adapters.bigquery import BigQueryAdapter
+# from sql_testing_library._adapters.athena import AthenaAdapter
+# from sql_testing_library._adapters.redshift import RedshiftAdapter
+# from sql_testing_library._adapters.trino import TrinoAdapter
+# from sql_testing_library._adapters.snowflake import SnowflakeAdapter

sql_testing_library/_adapters/athena.py ADDED Viewed

@@ -0,0 +1,309 @@
+"""Amazon Athena adapter implementation."""
+import logging
+import time
+from datetime import date, datetime
+from decimal import Decimal
+from typing import TYPE_CHECKING, Any, List, Optional, Type, Union, get_args
+if TYPE_CHECKING:
+    import pandas as pd
+import boto3
+# Heavy import moved to function level for better performance
+from .._mock_table import BaseMockTable
+from .._types import BaseTypeConverter
+from .base import DatabaseAdapter
+HAS_BOTO3 = True
+try:
+    # This is a separate import to keep the module type
+    # for type checking, even if the module fails to import
+    import boto3 as _boto3_module  # noqa: F401
+except ImportError:
+    HAS_BOTO3 = False
+class AthenaTypeConverter(BaseTypeConverter):
+    """Athena-specific type converter."""
+    def convert(self, value: Any, target_type: Type) -> Any:
+        """Convert Athena result value to target type."""
+        # Handle Athena NULL values (returned as string "NULL")
+        if value == "NULL":
+            return None
+        # Athena returns proper Python types in most cases, so use base converter
+        return super().convert(value, target_type)
+class AthenaAdapter(DatabaseAdapter):
+    """Amazon Athena adapter for SQL testing."""
+    def __init__(
+        self,
+        database: str,
+        s3_output_location: str,
+        region: str = "us-west-2",
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+    ) -> None:
+        if not HAS_BOTO3:
+            raise ImportError(
+                "Athena adapter requires boto3. "
+                "Install with: pip install sql-testing-library[athena]"
+            )
+        self.database = database
+        self.s3_output_location = s3_output_location
+        self.region = region
+        # Initialize Athena client
+        if aws_access_key_id and aws_secret_access_key:
+            self.client = boto3.client(
+                "athena",
+                region_name=region,
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret_access_key,
+            )
+        else:
+            # Use default credentials from ~/.aws/credentials or environment variables
+            self.client = boto3.client("athena", region_name=region)
+    def get_sqlglot_dialect(self) -> str:
+        """Return Presto dialect for sqlglot (Athena uses Presto SQL)."""
+        return "presto"
+    def execute_query(self, query: str) -> "pd.DataFrame":
+        """Execute query and return results as DataFrame."""
+        import pandas as pd
+        # Start query execution
+        response = self.client.start_query_execution(
+            QueryString=query,
+            QueryExecutionContext={"Database": self.database},
+            ResultConfiguration={"OutputLocation": self.s3_output_location},
+        )
+        query_execution_id = response["QueryExecutionId"]
+        # Wait for query to complete
+        query_status, error_info = self._wait_for_query_with_error(query_execution_id)
+        if query_status != "SUCCEEDED":
+            error_message = f"Athena query failed with status: {query_status}"
+            if error_info:
+                error_message += f";Error details: {error_info}"
+            raise Exception(error_message)
+        # Get query results
+        results = self.client.get_query_results(QueryExecutionId=query_execution_id)
+        # Convert to DataFrame
+        if "ResultSet" in results and "Rows" in results["ResultSet"]:
+            rows = results["ResultSet"]["Rows"]
+            if not rows:
+                return pd.DataFrame()
+            # First row is header
+            header = [col["VarCharValue"] for col in rows[0]["Data"]]
+            # Rest are data
+            data = []
+            for row in rows[1:]:
+                data.append([col.get("VarCharValue") for col in row["Data"]])
+            return pd.DataFrame(data, columns=header)
+        else:
+            return pd.DataFrame()
+    def create_temp_table(self, mock_table: BaseMockTable) -> str:
+        """Create a temporary table in Athena using CTAS."""
+        timestamp = int(time.time() * 1000)
+        temp_table_name = f"temp_{mock_table.get_table_name()}_{timestamp}"
+        qualified_table_name = f"{self.database}.{temp_table_name}"
+        # Generate CTAS statement (CREATE TABLE AS SELECT)
+        ctas_sql = self._generate_ctas_sql(temp_table_name, mock_table)
+        # Execute CTAS query
+        self.execute_query(ctas_sql)
+        return qualified_table_name
+    def cleanup_temp_tables(self, table_names: List[str]) -> None:
+        """Clean up temporary tables."""
+        for full_table_name in table_names:
+            try:
+                # Extract just the table name, not the database.table format
+                if "." in full_table_name:
+                    table_name = full_table_name.split(".")[-1]
+                else:
+                    table_name = full_table_name
+                drop_query = f"DROP TABLE IF EXISTS {table_name}"
+                self.execute_query(drop_query)
+            except Exception as e:
+                logging.warning(f"Warning: Failed to drop table {full_table_name}: {e}")
+    def format_value_for_cte(self, value: Any, column_type: type) -> str:
+        """Format value for Athena CTE VALUES clause."""
+        from .._sql_utils import format_sql_value
+        return format_sql_value(value, column_type, dialect="athena")
+    def get_type_converter(self) -> BaseTypeConverter:
+        """Get Athena-specific type converter."""
+        return AthenaTypeConverter()
+    def get_query_size_limit(self) -> Optional[int]:
+        """Return query size limit in bytes for Athena."""
+        # Athena has a 256KB limit for query strings
+        return 256 * 1024  # 256KB
+    def _wait_for_query(self, query_execution_id: str, max_retries: int = 60) -> str:
+        """Wait for query to complete, returns final status."""
+        status, _ = self._wait_for_query_with_error(query_execution_id, max_retries)
+        return status
+    def _wait_for_query_with_error(
+        self, query_execution_id: str, max_retries: int = 60
+    ) -> tuple[str, Optional[str]]:
+        """Wait for query to complete, returns final status and error info if failed."""
+        for _ in range(max_retries):
+            response = self.client.get_query_execution(QueryExecutionId=query_execution_id)
+            query_execution = response["QueryExecution"]
+            status = query_execution["Status"]["State"]
+            # Explicitly cast to string to satisfy type checker
+            query_status: str = str(status)
+            if query_status in ("SUCCEEDED", "FAILED", "CANCELLED"):
+                error_info = None
+                if query_status in ("FAILED", "CANCELLED"):
+                    # Extract error information
+                    status_info = query_execution["Status"]
+                    if "StateChangeReason" in status_info:
+                        error_info = status_info["StateChangeReason"]
+                    elif "AthenaError" in status_info:
+                        athena_error = status_info["AthenaError"]
+                        error_type = athena_error.get("ErrorType", "Unknown")
+                        error_message = athena_error.get("ErrorMessage", "No details available")
+                        error_info = f"{error_type}: {error_message}"
+                return query_status, error_info
+            # Wait before checking again
+            time.sleep(1)
+        # If we reached here, we timed out
+        return "TIMEOUT", "Query execution timed out after waiting for completion"
+    def _build_s3_location(self, table_name: str) -> str:
+        """Build proper S3 location path avoiding double slashes."""
+        # Remove trailing slash from s3_output_location if present
+        base_location = self.s3_output_location.rstrip("/")
+        return f"{base_location}/{table_name}/"
+    def _generate_ctas_sql(self, table_name: str, mock_table: BaseMockTable) -> str:
+        """Generate CREATE TABLE AS SELECT (CTAS) statement for Athena."""
+        df = mock_table.to_dataframe()
+        column_types = mock_table.get_column_types()
+        columns = list(df.columns)
+        if df.empty:
+            # For empty tables, create an empty table with correct schema
+            # Type mapping from Python types to Athena types
+            type_mapping = {
+                str: "VARCHAR",
+                int: "INTEGER",
+                float: "DOUBLE",
+                bool: "BOOLEAN",
+                date: "DATE",
+                datetime: "TIMESTAMP",
+                Decimal: "DECIMAL(38,9)",
+            }
+            # Generate column definitions
+            column_defs = []
+            for col_name, col_type in column_types.items():
+                # Handle Optional types
+                if hasattr(col_type, "__origin__") and col_type.__origin__ is Union:
+                    # Extract the non-None type from Optional[T]
+                    non_none_types = [arg for arg in get_args(col_type) if arg is not type(None)]
+                    if non_none_types:
+                        col_type = non_none_types[0]
+                athena_type = type_mapping.get(col_type, "VARCHAR")
+                column_defs.append(f'"{col_name}" {athena_type}')
+            columns_sql = ",\n  ".join(column_defs)
+            # Create an empty external table with the correct schema
+            return f"""
+            CREATE EXTERNAL TABLE {table_name} (
+              {columns_sql}
+            )
+            STORED AS PARQUET
+            LOCATION '{self._build_s3_location(table_name)}'
+            """
+        else:
+            # For tables with data, use CTAS with a VALUES clause
+            # Build a SELECT statement with literal values
+            select_expressions = []
+            # Generate column expressions for the first row
+            first_row = df.iloc[0]
+            for col_name in columns:
+                col_type = column_types.get(col_name, str)
+                value = first_row[col_name]
+                # Handle Optional types by extracting the non-None type for proper formatting
+                actual_type = col_type
+                if hasattr(col_type, "__origin__") and col_type.__origin__ is Union:
+                    # Extract the non-None type from Optional[T]
+                    non_none_types = [arg for arg in get_args(col_type) if arg is not type(None)]
+                    if non_none_types:
+                        actual_type = non_none_types[0]
+                formatted_value = self.format_value_for_cte(value, actual_type)
+                select_expressions.append(f'{formatted_value} AS "{col_name}"')
+            # Start with the first row in the SELECT
+            select_sql = f"SELECT {', '.join(select_expressions)}"
+            # Add UNION ALL for each additional row
+            for i in range(1, len(df)):
+                row = df.iloc[i]
+                row_values = []
+                for col_name in columns:
+                    col_type = column_types.get(col_name, str)
+                    value = row[col_name]
+                    # Handle Optional types by extracting the non-None type for proper formatting
+                    actual_type = col_type
+                    if hasattr(col_type, "__origin__") and col_type.__origin__ is Union:
+                        # Extract the non-None type from Optional[T]
+                        non_none_types = [
+                            arg for arg in get_args(col_type) if arg is not type(None)
+                        ]
+                        if non_none_types:
+                            actual_type = non_none_types[0]
+                    formatted_value = self.format_value_for_cte(value, actual_type)
+                    row_values.append(formatted_value)
+                select_sql += f"\nUNION ALL SELECT {', '.join(row_values)}"
+            # Create the CTAS statement for external table
+            return f"""
+            CREATE TABLE {table_name}
+            WITH (
+                format = 'PARQUET',
+                external_location = '{self._build_s3_location(table_name)}'
+            )
+            AS {select_sql}
+            """

sql_testing_library/_adapters/base.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""Base database adapter interface."""
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, List, Optional
+if TYPE_CHECKING:
+    import pandas as pd
+# Heavy import moved to function level for better performance
+from .._mock_table import BaseMockTable
+from .._types import BaseTypeConverter
+class DatabaseAdapter(ABC):
+    """Abstract base class for database adapters."""
+    @abstractmethod
+    def get_sqlglot_dialect(self) -> str:
+        """Return the sqlglot dialect string for this database."""
+        pass
+    @abstractmethod
+    def execute_query(self, query: str) -> "pd.DataFrame":
+        """Execute query and return results as DataFrame."""
+        pass
+    @abstractmethod
+    def create_temp_table(self, mock_table: BaseMockTable) -> str:
+        """Create a temporary table with mock data. Returns temp table name."""
+        pass
+    @abstractmethod
+    def cleanup_temp_tables(self, table_names: List[str]) -> None:
+        """Clean up temporary tables."""
+        pass
+    @abstractmethod
+    def format_value_for_cte(self, value: Any, column_type: type) -> str:
+        """Format a value for inclusion in a CTE VALUES clause."""
+        pass
+    def get_type_converter(self) -> BaseTypeConverter:
+        """Get the type converter for this adapter. Override for custom conversion."""
+        return BaseTypeConverter()
+    def get_query_size_limit(self) -> Optional[int]:
+        """Return query size limit in bytes, or None if no limit."""
+        return None

sql_testing_library/_adapters/bigquery.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""BigQuery adapter implementation."""
+import logging
+from datetime import date, datetime
+from decimal import Decimal
+from typing import TYPE_CHECKING, Any, List, Optional, Type, Union, get_args
+if TYPE_CHECKING:
+    import pandas as pd
+# Heavy imports moved to function level for better performance
+from google.cloud import bigquery
+from .._mock_table import BaseMockTable
+from .._types import BaseTypeConverter
+from .base import DatabaseAdapter
+HAS_BIGQUERY = True
+# The duplicate import is intentional
+# First import is to get the types, second is to actually import the module
+# If the second fails, we set HAS_BIGQUERY to False to handle it gracefully
+try:
+    # This is a separate import to keep the module type
+    # for type checking, even if the module fails to import
+    import google.cloud.bigquery as _bigquery_module  # noqa: F401
+except ImportError:
+    HAS_BIGQUERY = False
+class BigQueryTypeConverter(BaseTypeConverter):
+    """BigQuery-specific type converter."""
+    def convert(self, value: Any, target_type: Type) -> Any:
+        """Convert BigQuery result value to target type."""
+        # BigQuery typically returns proper Python types, so use base converter
+        return super().convert(value, target_type)
+class BigQueryAdapter(DatabaseAdapter):
+    """Google BigQuery adapter for SQL testing."""
+    def __init__(
+        self, project_id: str, dataset_id: str, credentials_path: Optional[str] = None
+    ) -> None:
+        if not HAS_BIGQUERY:
+            raise ImportError(
+                "BigQuery adapter requires google-cloud-bigquery. "
+                "Install with: pip install sql-testing-library[bigquery]"
+            )
+        self.project_id = project_id
+        self.dataset_id = dataset_id
+        if credentials_path:
+            self.client = bigquery.Client.from_service_account_json(credentials_path)
+        else:
+            self.client = bigquery.Client(project=project_id)
+    def get_sqlglot_dialect(self) -> str:
+        """Return BigQuery dialect for sqlglot."""
+        return "bigquery"
+    def execute_query(self, query: str) -> "pd.DataFrame":
+        """Execute query and return results as DataFrame."""
+        job = self.client.query(query)
+        return job.to_dataframe()
+    def create_temp_table(self, mock_table: BaseMockTable) -> str:
+        """Create temporary table in BigQuery."""
+        import time
+        temp_table_name = f"temp_{mock_table.get_table_name()}_{int(time.time() * 1000)}"
+        table_id = f"{self.project_id}.{self.dataset_id}.{temp_table_name}"
+        # Create table schema from mock table
+        schema = self._get_bigquery_schema(mock_table)
+        # Create table
+        table = bigquery.Table(table_id, schema=schema)
+        table = self.client.create_table(table)
+        # Insert data
+        df = mock_table.to_dataframe()
+        if not df.empty:
+            job_config = bigquery.LoadJobConfig()
+            job = self.client.load_table_from_dataframe(df, table, job_config=job_config)
+            job.result()  # Wait for job to complete
+        return table_id
+    def cleanup_temp_tables(self, table_names: List[str]) -> None:
+        """Delete temporary tables."""
+        for table_name in table_names:
+            try:
+                self.client.delete_table(table_name)
+            except Exception as e:
+                logging.warning(f"Warning: Failed to delete table {table_name}: {e}")
+    def format_value_for_cte(self, value: Any, column_type: type) -> str:
+        """Format value for BigQuery CTE VALUES clause."""
+        from .._sql_utils import format_sql_value
+        return format_sql_value(value, column_type, dialect="bigquery")
+    def get_type_converter(self) -> BaseTypeConverter:
+        """Get BigQuery-specific type converter."""
+        return BigQueryTypeConverter()
+    def _get_bigquery_schema(self, mock_table: BaseMockTable) -> List[bigquery.SchemaField]:
+        """Convert mock table schema to BigQuery schema."""
+        column_types = mock_table.get_column_types()
+        # Type mapping from Python types to BigQuery types
+        type_mapping = {
+            str: bigquery.enums.SqlTypeNames.STRING,
+            int: bigquery.enums.SqlTypeNames.INT64,
+            float: bigquery.enums.SqlTypeNames.FLOAT64,
+            bool: bigquery.enums.SqlTypeNames.BOOL,
+            date: bigquery.enums.SqlTypeNames.DATE,
+            datetime: bigquery.enums.SqlTypeNames.DATETIME,
+            Decimal: bigquery.enums.SqlTypeNames.NUMERIC,
+        }
+        schema = []
+        for col_name, col_type in column_types.items():
+            # Handle Optional types
+            if hasattr(col_type, "__origin__") and col_type.__origin__ is Union:
+                # Extract the non-None type from Optional[T]
+                non_none_types = [arg for arg in get_args(col_type) if arg is not type(None)]
+                if non_none_types:
+                    col_type = non_none_types[0]
+            bq_type = type_mapping.get(col_type, bigquery.enums.SqlTypeNames.STRING)
+            schema.append(bigquery.SchemaField(col_name, bq_type))
+        return schema