PyPI - sql-testing-library - Versions diffs - 0.9.0__tar.gz → 0.10.0__tar.gz - Mend

sql-testing-library 0.9.0tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{sql_testing_library-0.9.0 → sql_testing_library-0.10.0}/CHANGELOG.md RENAMED Viewed

@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## 0.10.0 (2025-06-15)
+### Feat
+- **bigquery**: add support for map in bigquery (#99)
 ## 0.9.0 (2025-06-10)
 ### Feat

{sql_testing_library-0.9.0 → sql_testing_library-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: sql-testing-library
-Version: 0.9.0
+Version: 0.10.0
 Summary: A powerful Python framework for unit testing SQL queries across BigQuery, Snowflake, Redshift, Athena, and Trino with mock data
 License: MIT
 Keywords: sql,testing,unit-testing,mock-data,database-testing,bigquery,snowflake,redshift,athena,trino,data-engineering,etl-testing,sql-validation,query-testing
@@ -136,14 +136,13 @@ The library supports different data types across database engines. All checkmark
 | **Integer Array** | `List[int]` | ✅ | ✅ | ✅ | ✅ | ✅ |
 | **Decimal Array** | `List[Decimal]` | ✅ | ✅ | ✅ | ✅ | ✅ |
 | **Optional Array** | `Optional[List[T]]` | ✅ | ✅ | ✅ | ✅ | ✅ |
-| **Map/Object** | `Dict[K, V]` | ❌ | ✅ | ✅ | ✅ | ❌ |
-| **Struct/Record** | `dict`/`dataclass` | ❌ | ❌ | ❌ | ❌ | ❌ |
+| **Map/Dict** | `Dict[K, V]` | ✅ | ✅ | ✅ | ✅ | ❌ |
+| **Struct/Record** | `dataclass` | ❌ | ❌ | ❌ | ❌ | ❌ |
 | **Nested Arrays** | `List[List[T]]` | ❌ | ❌ | ❌ | ❌ | ❌ |
-| **JSON Objects** | `JSON` | ❌ | ❌ | ❌ | ❌ | ❌ |
 ### Database-Specific Notes
-- **BigQuery**: NULL arrays become empty arrays `[]`; uses scientific notation for large decimals
+- **BigQuery**: NULL arrays become empty arrays `[]`; uses scientific notation for large decimals; dict/map types stored as JSON strings
 - **Athena**: 256KB query size limit; supports arrays and maps using `ARRAY[]` and `MAP(ARRAY[], ARRAY[])` syntax
 - **Redshift**: Arrays and maps implemented via SUPER type (JSON parsing); 16MB query size limit
 - **Trino**: Memory catalog for testing; excellent decimal precision; supports arrays and maps

{sql_testing_library-0.9.0 → sql_testing_library-0.10.0}/README.md RENAMED Viewed

@@ -79,14 +79,13 @@ The library supports different data types across database engines. All checkmark
 | **Integer Array** | `List[int]` | ✅ | ✅ | ✅ | ✅ | ✅ |
 | **Decimal Array** | `List[Decimal]` | ✅ | ✅ | ✅ | ✅ | ✅ |
 | **Optional Array** | `Optional[List[T]]` | ✅ | ✅ | ✅ | ✅ | ✅ |
-| **Map/Object** | `Dict[K, V]` | ❌ | ✅ | ✅ | ✅ | ❌ |
-| **Struct/Record** | `dict`/`dataclass` | ❌ | ❌ | ❌ | ❌ | ❌ |
+| **Map/Dict** | `Dict[K, V]` | ✅ | ✅ | ✅ | ✅ | ❌ |
+| **Struct/Record** | `dataclass` | ❌ | ❌ | ❌ | ❌ | ❌ |
 | **Nested Arrays** | `List[List[T]]` | ❌ | ❌ | ❌ | ❌ | ❌ |
-| **JSON Objects** | `JSON` | ❌ | ❌ | ❌ | ❌ | ❌ |
 ### Database-Specific Notes
-- **BigQuery**: NULL arrays become empty arrays `[]`; uses scientific notation for large decimals
+- **BigQuery**: NULL arrays become empty arrays `[]`; uses scientific notation for large decimals; dict/map types stored as JSON strings
 - **Athena**: 256KB query size limit; supports arrays and maps using `ARRAY[]` and `MAP(ARRAY[], ARRAY[])` syntax
 - **Redshift**: Arrays and maps implemented via SUPER type (JSON parsing); 16MB query size limit
 - **Trino**: Memory catalog for testing; excellent decimal precision; supports arrays and maps

{sql_testing_library-0.9.0 → sql_testing_library-0.10.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "sql-testing-library"
-version = "0.9.0"
+version = "0.10.0"
 description = "A powerful Python framework for unit testing SQL queries across BigQuery, Snowflake, Redshift, Athena, and Trino with mock data"
 authors = ["Gurmeet Saran <gurmeetx@gmail.com>", "Kushal Thakkar <kushal.thakkar@gmail.com>"]
 maintainers = ["Gurmeet Saran <gurmeetx@gmail.com>", "Kushal Thakkar <kushal.thakkar@gmail.com>"]

{sql_testing_library-0.9.0 → sql_testing_library-0.10.0}/src/sql_testing_library/_adapters/bigquery.py RENAMED Viewed

@@ -30,6 +30,32 @@ class BigQueryTypeConverter(BaseTypeConverter):
     def convert(self, value: Any, target_type: Type) -> Any:
         """Convert BigQuery result value to target type."""
+        # Handle None/NULL values first
+        if value is None:
+            return None
+        # Handle Optional types
+        if self.is_optional_type(target_type):
+            if value is None:
+                return None
+            target_type = self.get_optional_inner_type(target_type)
+        # Handle dict/map types from BigQuery STRING columns containing JSON
+        if hasattr(target_type, "__origin__") and target_type.__origin__ is dict:
+            # BigQuery returns JSON stored as strings, so we need to parse them
+            if isinstance(value, str):
+                import json
+                try:
+                    return json.loads(value)
+                except json.JSONDecodeError:
+                    return {}
+            elif isinstance(value, dict):
+                # Already a dict (shouldn't happen with STRING columns, but handle it)
+                return value
+            else:
+                return {}
         # BigQuery typically returns proper Python types, so use base converter
         return super().convert(value, target_type)
@@ -82,6 +108,9 @@ class BigQueryAdapter(DatabaseAdapter):
         # Insert data
         df = mock_table.to_dataframe()
         if not df.empty:
+            # Convert dict columns to JSON strings for BigQuery
+            df = self._prepare_dataframe_for_bigquery(df, mock_table)
             job_config = bigquery.LoadJobConfig()
             job = self.client.load_table_from_dataframe(df, table, job_config=job_config)
             job.result()  # Wait for job to complete
@@ -130,6 +159,9 @@ class BigQueryAdapter(DatabaseAdapter):
         # Insert data if any
         if not df.empty:
+            # Convert dict columns to JSON strings for BigQuery
+            df = self._prepare_dataframe_for_bigquery(df, mock_table)
             job_config = bigquery.LoadJobConfig()
             job = self.client.load_table_from_dataframe(df, table, job_config=job_config)
             job.result()
@@ -188,9 +220,51 @@ class BigQueryAdapter(DatabaseAdapter):
                 # Create field with mode=REPEATED for arrays
                 schema.append(bigquery.SchemaField(col_name, element_bq_type, mode="REPEATED"))
+            # Handle Dict/Map types
+            elif hasattr(col_type, "__origin__") and col_type.__origin__ is dict:
+                # BigQuery stores JSON data as STRING type
+                schema.append(bigquery.SchemaField(col_name, bigquery.enums.SqlTypeNames.STRING))
             else:
                 # Handle scalar types
                 bq_type = type_mapping.get(col_type, bigquery.enums.SqlTypeNames.STRING)
                 schema.append(bigquery.SchemaField(col_name, bq_type))
         return schema
+    def _prepare_dataframe_for_bigquery(
+        self, df: "pd.DataFrame", mock_table: BaseMockTable
+    ) -> "pd.DataFrame":
+        """Prepare DataFrame for BigQuery by converting dict columns to JSON strings."""
+        import json
+        import pandas as pd
+        from .._sql_utils import DecimalEncoder
+        # Create a copy to avoid modifying the original
+        df_copy = df.copy()
+        column_types = mock_table.get_column_types()
+        for col_name, col_type in column_types.items():
+            # Handle Optional types
+            if hasattr(col_type, "__origin__") and col_type.__origin__ is Union:
+                # Extract the non-None type from Optional[T]
+                non_none_types = [arg for arg in get_args(col_type) if arg is not type(None)]
+                if non_none_types:
+                    col_type = non_none_types[0]
+            # Check if this is a dict type
+            if hasattr(col_type, "__origin__") and col_type.__origin__ is dict:
+                # Convert dict values to JSON strings
+                def convert_dict_to_json(val):
+                    if pd.isna(val) or val is None:
+                        return None
+                    elif isinstance(val, dict):
+                        # Use DecimalEncoder to handle Decimal values in dicts
+                        return json.dumps(val, cls=DecimalEncoder)
+                    else:
+                        return val
+                df_copy[col_name] = df_copy[col_name].apply(convert_dict_to_json)
+        return df_copy

{sql_testing_library-0.9.0 → sql_testing_library-0.10.0}/src/sql_testing_library/_sql_utils.py RENAMED Viewed

@@ -167,6 +167,9 @@ def format_sql_value(value: Any, column_type: Type, dialect: str = "standard") -
             elif dialect == "redshift":
                 # Redshift SUPER type handles NULL maps
                 return "NULL::SUPER"
+            elif dialect == "bigquery":
+                # BigQuery JSON type handles NULL maps
+                return "NULL"
             else:
                 return "NULL"
@@ -289,9 +292,14 @@ def format_sql_value(value: Any, column_type: Type, dialect: str = "standard") -
             # Redshift uses SUPER type with JSON-like syntax for maps
             json_str = json.dumps(value, cls=DecimalEncoder)
             return f"JSON_PARSE('{json_str}')"
+        elif dialect == "bigquery":
+            # BigQuery stores JSON as strings
+            json_str = json.dumps(value, cls=DecimalEncoder)
+            # Escape single quotes in JSON string for SQL
+            json_str = json_str.replace("'", "''")
+            return f"'{json_str}'"
         else:
             # Other databases don't have native map support yet
-            # Could potentially use JSON for BigQuery, Snowflake
             raise NotImplementedError(f"Map type not yet supported for dialect: {dialect}")
     # Handle string types