PyPI - soda-sqlserver - Versions diffs - 4.0.5__tar.gz - Mend

soda-sqlserver 4.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

soda_sqlserver-4.0.5/PKG-INFO ADDED Viewed

@@ -0,0 +1,6 @@
+Metadata-Version: 2.4
+Name: soda-sqlserver
+Version: 4.0.5
+Requires-Dist: soda-core==4.0.5
+Requires-Dist: pyodbc
+Dynamic: requires-dist

soda_sqlserver-4.0.5/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

soda_sqlserver-4.0.5/setup.py ADDED Viewed

@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+from setuptools import setup
+package_name = "soda-sqlserver"
+package_version = "4.0.5"
+description = "Soda SQL Server V4"
+requires = [
+    f"soda-core=={package_version}",
+    "pyodbc",
+]
+setup(
+    name=package_name,
+    version=package_version,
+    install_requires=requires,
+    package_dir={"": "src"},
+    entry_points={
+        "soda.plugins.data_source.sqlserver": [
+            "SqlServerDataSourceImpl = soda_sqlserver.common.data_sources.sqlserver_data_source:SqlServerDataSourceImpl",
+        ],
+    },
+)

soda_sqlserver-4.0.5/src/soda_sqlserver/common/data_sources/sqlserver_data_source.py ADDED Viewed

@@ -0,0 +1,427 @@
+import logging
+from copy import deepcopy
+from datetime import date, datetime
+from typing import Optional
+from soda_core.common.data_source_connection import DataSourceConnection
+from soda_core.common.data_source_impl import DataSourceImpl
+from soda_core.common.dataset_identifier import DatasetIdentifier
+from soda_core.common.logging_constants import soda_logger
+from soda_core.common.metadata_types import SodaDataTypeName, SqlDataType
+from soda_core.common.sql_ast import (
+    AND,
+    COLUMN,
+    COUNT,
+    CREATE_TABLE,
+    CREATE_TABLE_AS_SELECT,
+    CREATE_TABLE_IF_NOT_EXISTS,
+    CREATE_VIEW,
+    DISTINCT,
+    DROP_TABLE,
+    DROP_TABLE_IF_EXISTS,
+    DROP_VIEW,
+    DROP_VIEW_IF_EXISTS,
+    FROM,
+    INSERT_INTO,
+    INSERT_INTO_VIA_SELECT,
+    INTO,
+    LENGTH,
+    LIMIT,
+    OFFSET,
+    ORDER_BY_ASC,
+    REGEX_LIKE,
+    SELECT,
+    STAR,
+    STRING_HASH,
+    TUPLE,
+    VALUES,
+    WHERE,
+    WITH,
+    SqlExpressionStr,
+)
+from soda_core.common.sql_dialect import SqlDialect
+from soda_sqlserver.common.data_sources.sqlserver_data_source_connection import (
+    SqlServerDataSource as SqlServerDataSourceModel,
+)
+from soda_sqlserver.common.data_sources.sqlserver_data_source_connection import (
+    SqlServerDataSourceConnection,
+)
+logger: logging.Logger = soda_logger
+class SqlServerDataSourceImpl(DataSourceImpl, model_class=SqlServerDataSourceModel):
+    def __init__(self, data_source_model: SqlServerDataSourceModel, connection: Optional[DataSourceConnection] = None):
+        super().__init__(data_source_model=data_source_model, connection=connection)
+    def _create_sql_dialect(self) -> SqlDialect:
+        return SqlServerSqlDialect(data_source_impl=self)
+    def _create_data_source_connection(self) -> DataSourceConnection:
+        return SqlServerDataSourceConnection(
+            name=self.data_source_model.name, connection_properties=self.data_source_model.connection_properties
+        )
+class SqlServerSqlDialect(SqlDialect):
+    DEFAULT_QUOTE_CHAR = "["  # Do not use this! Always use quote_default()
+    SODA_DATA_TYPE_SYNONYMS = ((SodaDataTypeName.TEXT, SodaDataTypeName.VARCHAR),)
+    def build_select_sql(self, select_elements: list, add_semicolon: bool = True) -> str:
+        statement_lines: list[str] = []
+        statement_lines.extend(self._build_cte_sql_lines(select_elements))
+        statement_lines.extend(self._build_select_sql_lines(select_elements))
+        statement_lines.extend(self._build_into_sql_lines(select_elements))
+        statement_lines.extend(self._build_from_sql_lines(select_elements))
+        statement_lines.extend(self._build_where_sql_lines(select_elements))
+        statement_lines.extend(self._build_group_by_sql_lines(select_elements))
+        statement_lines.extend(self._build_order_by_lines(select_elements))
+        offset_line = self._build_offset_line(select_elements)
+        if offset_line:
+            statement_lines.append(offset_line)
+        limit_line = self._build_limit_line(select_elements)
+        if limit_line:
+            statement_lines.append(limit_line)
+        return "\n".join(statement_lines) + (";" if add_semicolon else "")
+    def _build_select_sql_lines(self, select_elements: list) -> list[str]:
+        # Use the default implementation, but we need to handle the case where the select elements contain a LIMIT statement.
+        select_sql_lines: list[str] = super()._build_select_sql_lines(select_elements)
+        if self.__requires_select_top(select_elements):
+            limit_element: LIMIT = [
+                select_element for select_element in select_elements if isinstance(select_element, LIMIT)
+            ][0]
+            select_sql_lines[0] = select_sql_lines[0].replace("SELECT ", f"SELECT TOP {limit_element.limit} ")
+        return select_sql_lines
+    def __requires_select_top(self, select_elements: list) -> bool:
+        # We require TOP when there is a LIMIT statement and no OFFSET statement.
+        return any(isinstance(select_element, LIMIT) for select_element in select_elements) and not any(
+            isinstance(select_element, OFFSET) for select_element in select_elements
+        )
+    def _build_limit_line(self, select_elements: list) -> Optional[str]:
+        # First, check if there is a LIMIT statement in the select elements.
+        limit_statement_present = any(isinstance(select_element, LIMIT) for select_element in select_elements)
+        if not limit_statement_present:
+            return None
+        # Check if there is an OFFSET statement in the select elements. If so, use the default logic.
+        uses_offset = any(isinstance(select_element, OFFSET) for select_element in select_elements)
+        if uses_offset:
+            return super()._build_limit_line(select_elements)
+        else:
+            return None  # This case (limit, but no offset) is handled by the _build_select_sql_lines method; it adds TOP N instead of FETCH NEXT.
+    def literal_date(self, date: date):
+        """Technically dates can be passed directly as strings, but this is more explicit."""
+        date_string = date.strftime("%Y-%m-%d")
+        return f"CAST('{date_string}' AS DATE)"
+    def literal_datetime(self, datetime: datetime):
+        return f"'{datetime.isoformat(timespec='milliseconds')}'"
+    def literal_boolean(self, boolean: bool):
+        return "1" if boolean is True else "0"
+    def quote_default(self, identifier: Optional[str]) -> Optional[str]:
+        return f"[{identifier}]" if isinstance(identifier, str) and len(identifier) > 0 else None
+    def create_schema_if_not_exists_sql(self, prefixes: list[str], add_semicolon: bool = True) -> str:
+        schema_name: str = prefixes[1]
+        return f"""
+        IF NOT EXISTS ( SELECT  *
+                        FROM    sys.schemas
+                        WHERE   name = N'{schema_name}' )
+        EXEC('CREATE SCHEMA [{schema_name}]')
+        """ + (
+            ";" if add_semicolon else ""
+        )
+    def build_drop_table_sql(self, drop_table: DROP_TABLE | DROP_TABLE_IF_EXISTS, add_semicolon: bool = True) -> str:
+        if_exists_sql: str = (
+            f"IF OBJECT_ID('{drop_table.fully_qualified_table_name}', 'U') IS NOT NULL"
+            if isinstance(drop_table, DROP_TABLE_IF_EXISTS)
+            else ""
+        )
+        return f"{if_exists_sql} DROP TABLE {drop_table.fully_qualified_table_name}" + (";" if add_semicolon else "")
+    def _build_create_table_statement_sql(self, create_table: CREATE_TABLE | CREATE_TABLE_IF_NOT_EXISTS) -> str:
+        if_not_exists_sql: str = (
+            f"IF OBJECT_ID('{create_table.fully_qualified_table_name}', 'U') IS NULL"
+            if isinstance(create_table, CREATE_TABLE_IF_NOT_EXISTS)
+            else ""
+        )
+        create_table_sql: str = f"{if_not_exists_sql} CREATE TABLE {create_table.fully_qualified_table_name} "
+        return create_table_sql
+    def _build_length_sql(self, length: LENGTH) -> str:
+        return f"LEN({self.build_expression_sql(length.expression)})"
+    def sql_expr_timestamp_literal(self, datetime_in_iso8601: str) -> str:
+        return f"'{datetime_in_iso8601}'"
+    def sql_expr_timestamp_truncate_day(self, timestamp_literal: str) -> str:
+        return f"DATETRUNC(DAY, {timestamp_literal})"
+    def sql_expr_timestamp_add_day(self, timestamp_literal: str) -> str:
+        return f"DATEADD(DAY, 1, {timestamp_literal})"
+    def _build_tuple_sql(self, tuple: TUPLE) -> str:
+        if tuple.check_context(COUNT) and tuple.check_context(DISTINCT):
+            return f"CHECKSUM{super()._build_tuple_sql(tuple)}"
+        if tuple.check_context(VALUES):
+            # in built_cte_values_sql, elements are dropped in top-level select statement, so can't use parentheses
+            return ", ".join(self.build_expression_sql(e) for e in tuple.expressions)
+        return super()._build_tuple_sql(tuple)
+    def _build_regex_like_sql(self, matches: REGEX_LIKE) -> str:
+        expression: str = self.build_expression_sql(matches.expression)
+        regex_pattern = matches.regex_pattern
+        # alpha expansion doesn't work properly for case sensitive ranges in SQLServer
+        # this is quite a hack to fit the common use-cases.  generally regex's are only partially supported anyway
+        regex_pattern = regex_pattern.replace("a-z", "abcdefghijklmnopqrstuvwxyz")
+        regex_pattern = regex_pattern.replace("A-Z", "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+        # collations define rules for sorting strings and distinguishing similar characters
+        # see: https://learn.microsoft.com/en-us/sql/relational-databases/collations/collation-and-unicode-support?view=sql-server-ver17
+        # CS: Case sensitive; AS: Accent sensitive
+        # The default is SQL_Latin1_General_Cp1_CI_AS (case-insensitive), we replcae with a case sensitive collation
+        return f"PATINDEX ('%{regex_pattern}%', {expression} COLLATE SQL_Latin1_General_Cp1_CS_AS) > 0"
+    def supports_regex_advanced(self) -> bool:
+        return False
+    def build_cte_values_sql(self, values: VALUES, alias_columns: list[COLUMN] | None) -> str:
+        return "\nUNION ALL\n".join(["SELECT " + self.build_expression_sql(value) for value in values.values])
+    def select_all_paginated_sql(
+        self,
+        dataset_identifier: DatasetIdentifier,
+        columns: list[str],
+        filter: Optional[str],
+        order_by: list[str],
+        limit: int,
+        offset: int,
+    ) -> str:
+        where_clauses = []
+        if filter:
+            where_clauses.append(SqlExpressionStr(filter))
+        statements = [
+            SELECT(columns or [STAR()]),
+            FROM(table_name=dataset_identifier.dataset_name, table_prefix=dataset_identifier.prefixes),
+            WHERE.optional(AND.optional(where_clauses)),
+            *[ORDER_BY_ASC(c) for c in order_by],
+            OFFSET(offset),
+            LIMIT(limit),
+        ]
+        return self.build_select_sql(statements)
+    def _build_limit_sql(self, limit_element: LIMIT) -> str:
+        return f"FETCH NEXT {limit_element.limit} ROWS ONLY"
+    def _build_offset_sql(self, offset_element: OFFSET) -> str:
+        return f"OFFSET {offset_element.offset} ROWS"
+    def _get_data_type_name_synonyms(self) -> list[list[str]]:
+        return [
+            ["varchar", "nvarchar"],
+            ["char", "nchar"],
+            ["int", "integer"],
+            ["bigint"],
+            ["smallint"],
+            ["real"],
+            ["float", "double precision"],
+            ["datetime2", "datetime"],
+        ]
+    # copied from redshift
+    def get_data_source_data_type_name_by_soda_data_type_names(self) -> dict:
+        return {
+            SodaDataTypeName.CHAR: "char",
+            SodaDataTypeName.VARCHAR: "varchar",
+            SodaDataTypeName.TEXT: "varchar",
+            SodaDataTypeName.SMALLINT: "smallint",  #
+            SodaDataTypeName.INTEGER: "int",  #
+            SodaDataTypeName.BIGINT: "bigint",  #
+            SodaDataTypeName.NUMERIC: "numeric",  #
+            SodaDataTypeName.DECIMAL: "decimal",  #
+            SodaDataTypeName.FLOAT: "real",  #
+            SodaDataTypeName.DOUBLE: "float",
+            SodaDataTypeName.TIMESTAMP: "datetime2",
+            SodaDataTypeName.TIMESTAMP_TZ: "datetimeoffset",
+            SodaDataTypeName.DATE: "date",
+            SodaDataTypeName.TIME: "time",
+            SodaDataTypeName.BOOLEAN: "bit",
+        }
+    # copied from redshift
+    def get_soda_data_type_name_by_data_source_data_type_names(self) -> dict[str, SodaDataTypeName]:
+        return {
+            # Character types
+            "char": SodaDataTypeName.CHAR,
+            "varchar": SodaDataTypeName.VARCHAR,
+            "text": SodaDataTypeName.TEXT,
+            "nchar": SodaDataTypeName.CHAR,
+            "nvarchar": SodaDataTypeName.VARCHAR,
+            "ntext": SodaDataTypeName.TEXT,
+            # Integer types
+            "tinyint": SodaDataTypeName.SMALLINT,
+            "smallint": SodaDataTypeName.SMALLINT,
+            "int": SodaDataTypeName.INTEGER,
+            "bigint": SodaDataTypeName.BIGINT,
+            # Exact numeric types
+            "numeric": SodaDataTypeName.NUMERIC,
+            "decimal": SodaDataTypeName.DECIMAL,
+            # Approximate numeric types
+            "real": SodaDataTypeName.FLOAT,
+            "float": SodaDataTypeName.DOUBLE,
+            # Date/time types
+            "date": SodaDataTypeName.DATE,
+            "time": SodaDataTypeName.TIME,
+            "datetime2": SodaDataTypeName.TIMESTAMP,
+            "datetimeoffset": SodaDataTypeName.TIMESTAMP_TZ,
+            "datetime": SodaDataTypeName.TIMESTAMP,
+            "smalldatetime": SodaDataTypeName.TIMESTAMP,
+            # Boolean type
+            "bit": SodaDataTypeName.BOOLEAN,
+        }
+    def supports_data_type_character_maximum_length(self) -> bool:
+        return True
+    def supports_data_type_numeric_precision(self) -> bool:
+        return True
+    def supports_data_type_numeric_scale(self) -> bool:
+        return True
+    def supports_data_type_datetime_precision(self) -> bool:
+        return True
+    def supports_datetime_microseconds(self) -> bool:
+        return False
+    def data_type_has_parameter_character_maximum_length(self, data_type_name) -> bool:
+        return data_type_name.lower() in ["varchar", "char", "nvarchar", "nchar"]
+    def data_type_has_parameter_numeric_precision(self, data_type_name) -> bool:
+        return data_type_name.lower() in ["numeric", "decimal", "float"]
+    def data_type_has_parameter_numeric_scale(self, data_type_name) -> bool:
+        return data_type_name.lower() in ["numeric", "decimal"]
+    def data_type_has_parameter_datetime_precision(self, data_type_name) -> bool:
+        return data_type_name.lower() in [
+            "time",
+            "datetime2",
+            "datetimeoffset",
+        ]
+    def default_varchar_length(self) -> Optional[int]:
+        return 255
+    def is_quoted(self, identifier: str) -> bool:
+        return identifier.startswith("[") and identifier.endswith("]")
+    def build_insert_into_sql(self, insert_into: INSERT_INTO, add_semicolon: bool = True) -> str:
+        # SqlServer supports a max of 1000 rows in an insert statement. If that's the case, split the insert into multiple statements and recursively call this function.
+        STEP_SIZE = self.get_preferred_number_of_rows_for_insert()
+        if len(insert_into.values) > STEP_SIZE:
+            final_insert_sql = ""
+            for i in range(0, len(insert_into.values), STEP_SIZE):
+                temp_insert_into = INSERT_INTO(
+                    fully_qualified_table_name=insert_into.fully_qualified_table_name,
+                    columns=insert_into.columns,
+                    values=insert_into.values[i : i + STEP_SIZE],
+                )
+                final_insert_sql += self.build_insert_into_sql(
+                    temp_insert_into, add_semicolon=True
+                )  # Now we force the semicolon to separate the statements
+                final_insert_sql += "\n"
+            return final_insert_sql
+        return super().build_insert_into_sql(insert_into, add_semicolon=add_semicolon)
+    def build_insert_into_via_select_sql(
+        self, insert_into_via_select: INSERT_INTO_VIA_SELECT, add_semicolon: bool = True
+    ) -> str:
+        # First get all the WITH clauses from the select elements.
+        with_clauses: list[str] = []
+        remaining_select_elements: list[str] = []
+        for select_element in insert_into_via_select.select_elements:
+            if isinstance(select_element, WITH):
+                with_clauses.append(select_element)
+            else:  # Split of the other elements
+                remaining_select_elements.append(select_element)
+        # Then build the with statements.
+        with_statements: str = "\n".join(self._build_cte_sql_lines(with_clauses))
+        insert_into_sql: str = f"{with_statements}\nINSERT INTO {insert_into_via_select.fully_qualified_table_name}\n"
+        insert_into_sql += self._build_insert_into_columns_sql(insert_into_via_select) + "\n"
+        insert_into_sql += "(\n" + self.build_select_sql(remaining_select_elements, add_semicolon=False) + "\n)"
+        return insert_into_sql + (";" if add_semicolon else "")
+    def get_preferred_number_of_rows_for_insert(self) -> int:
+        return 1000
+    def map_test_sql_data_type_to_data_source(self, source_data_type: SqlDataType) -> SqlDataType:
+        """SQLServer always requires a varchar length in create table statements."""
+        sql_data_type = super().map_test_sql_data_type_to_data_source(source_data_type)
+        if sql_data_type.name == "varchar" and sql_data_type.character_maximum_length is None:
+            sql_data_type.character_maximum_length = self.default_varchar_length()
+        return sql_data_type
+    @classmethod
+    def is_same_soda_data_type_with_synonyms(cls, expected: SodaDataTypeName, actual: SodaDataTypeName) -> bool:
+        if expected == SodaDataTypeName.CHAR and actual == SodaDataTypeName.VARCHAR:
+            logger.debug(
+                f"In is_same_soda_data_type_with_synonyms, expected {expected} and actual {actual} are treated as the same because of SQLServer cursor not distinguishing between varchar and char"
+            )
+            return True
+        elif expected == SodaDataTypeName.NUMERIC and actual == SodaDataTypeName.DECIMAL:
+            logger.debug(
+                f"In is_same_soda_data_type_with_synonyms, expected {expected} and actual {actual} are treated as the same because of SQLServer cursor not distinguishing between numeric and decimal"
+            )
+            return True
+        elif expected == SodaDataTypeName.TIMESTAMP_TZ and actual == SodaDataTypeName.VARCHAR:
+            logger.debug(
+                f"In is_same_soda_data_type_with_synonyms, expected {expected} and actual {actual} are treated as the same because of SQLServer cursor returns varchar for timestamps with timezone"
+            )
+            return True
+        return super().is_same_soda_data_type_with_synonyms(expected, actual)
+    def _build_string_hash_sql(self, string_hash: STRING_HASH) -> str:
+        return f"CONVERT(VARCHAR(32), HASHBYTES('MD5', {self.build_expression_sql(string_hash.expression)}), 2)"
+    def _get_add_column_sql_expr(self) -> str:
+        return "ADD"
+    def build_create_table_as_select_sql(
+        self, create_table_as_select: CREATE_TABLE_AS_SELECT, add_semicolon: bool = True, add_parenthesis: bool = True
+    ) -> str:
+        # Copy the select elements and insert an INTO with the same table name as the create table as select statement
+        select_elements = create_table_as_select.select_elements.copy()
+        select_elements += [INTO(fully_qualified_table_name=create_table_as_select.fully_qualified_table_name)]
+        result_sql: str = self.build_select_sql(select_elements, add_semicolon=add_semicolon)
+        return result_sql
+    def build_drop_view_sql(self, drop_view: DROP_VIEW | DROP_VIEW_IF_EXISTS, add_semicolon: bool = True) -> str:
+        # SqlServer does not allow for the database name to be specified in the view name, so we need to drop it.
+        drop_view_copy = deepcopy(drop_view)  # Copy the object so we don't modify the original object
+        # Drop the first prefix (database name) from the fully qualified view name
+        drop_view_copy.fully_qualified_view_name = ".".join(drop_view_copy.fully_qualified_view_name.split(".")[1:])
+        return super().build_drop_view_sql(drop_view_copy, add_semicolon)
+    def build_create_view_sql(
+        self, create_view: CREATE_VIEW, add_semicolon: bool = True, add_parenthesis: bool = True
+    ) -> str:
+        # SqlServer does not allow for the database name to be specified in the view name, so we need to drop it.
+        create_view_copy = deepcopy(create_view)  # Copy the object so we don't modify the original object
+        # Drop the first prefix (database name) from the fully qualified view name
+        create_view_copy.fully_qualified_view_name = ".".join(create_view_copy.fully_qualified_view_name.split(".")[1:])
+        return super().build_create_view_sql(create_view_copy, add_semicolon, add_parenthesis=False)

soda_sqlserver-4.0.5/src/soda_sqlserver/common/data_sources/sqlserver_data_source_connection.py ADDED Viewed

@@ -0,0 +1,193 @@
+from __future__ import annotations
+import logging
+import struct
+from abc import ABC
+from datetime import datetime, timedelta, timezone
+from typing import Literal, Optional, Union
+import pyodbc
+from pydantic import Field, SecretStr
+from soda_core.__version__ import SODA_CORE_VERSION
+from soda_core.common.data_source_connection import DataSourceConnection
+from soda_core.common.exceptions import DataSourceConnectionException
+from soda_core.common.logging_constants import soda_logger
+from soda_core.model.data_source.data_source import DataSourceBase
+from soda_core.model.data_source.data_source_connection_properties import (
+    DataSourceConnectionProperties,
+)
+logger: logging.Logger = soda_logger
+CONTEXT_AUTHENTICATION_DESCRIPTION = "Use context authentication"
+USER_DESCRIPTION = "Username for authentication"
+DEFAULT_PORT = 1433
+class SqlServerConnectionProperties(DataSourceConnectionProperties, ABC):
+    host: str = Field(..., description="Host name of the SQL Server instance")
+    port: int = Field(DEFAULT_PORT, description="Port number of the SQL Server instance")
+    database: str = Field(..., description="Name of the database to use")
+    # Optional fields
+    driver: Optional[str] = Field(
+        "ODBC Driver 18 for SQL Server", description="Driver name for the SQL Server instance"
+    )
+    trust_server_certificate: Optional[bool] = Field(False, description="Whether to trust the server certificate")
+    trusted_connection: Optional[bool] = Field(False, description="Whether to use trusted connection")
+    encrypt: Optional[bool] = Field(False, description="Whether to encrypt the connection")
+    connection_max_retries: Optional[int] = Field(0, description="Maximum number of connection retries")
+    enable_tracing: Optional[bool] = Field(False, description="Whether to enable tracing")
+    login_timeout: Optional[int] = Field(0, description="Login timeout")
+    scope: Optional[str] = Field(None, description="Scope for the connection")
+    connection_parameters: Optional[dict[str, str]] = Field(None, description="Connection parameters")
+class SqlServerPasswordAuth(SqlServerConnectionProperties):
+    """SQL Server authentication using password"""
+    user: str = Field(..., description=USER_DESCRIPTION)
+    password: SecretStr = Field(..., description="Password for authentication")
+    authentication: Literal["sql"] = "sql"
+class SqlServerActiveDirectoryAuthentication(SqlServerConnectionProperties):
+    authentication: Literal[
+        "activedirectoryinteractive", "activedirectorypassword", "activedirectoryserviceprincipal"
+    ] = Field(..., description="Authentication type")
+class SqlServerActiveDirectoryInteractiveAuthentication(SqlServerActiveDirectoryAuthentication):
+    user: str = Field(..., description=USER_DESCRIPTION)
+    authentication: Literal["activedirectoryinteractive"] = "activedirectoryinteractive"
+class SqlServerActiveDirectoryPasswordAuthentication(SqlServerActiveDirectoryAuthentication):
+    authentication: Literal["activedirectorypassword"] = "activedirectorypassword"
+    user: str = Field(..., description=USER_DESCRIPTION)
+    password: SecretStr = Field(..., description="Password for authentication")
+class SqlServerActiveDirectoryServicePrincipalAuthentication(SqlServerActiveDirectoryAuthentication):
+    authentication: Literal["activedirectoryserviceprincipal"] = "activedirectoryserviceprincipal"
+    client_id: str = Field(..., description="Client ID for authentication")
+    client_secret: SecretStr = Field(..., description="Client secret for authentication")
+class SqlServerDataSource(DataSourceBase, ABC):
+    type: Literal["sqlserver"] = Field("sqlserver")
+    connection_properties: Union[
+        SqlServerPasswordAuth,
+        SqlServerActiveDirectoryInteractiveAuthentication,
+        SqlServerActiveDirectoryPasswordAuthentication,
+        SqlServerActiveDirectoryServicePrincipalAuthentication,
+    ] = Field(..., alias="connection", description="SQL Server connection configuration")
+def handle_datetime(dto_value):
+    tup = struct.unpack("<6hI2h", dto_value)  # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0)
+    return datetime(tup[0], tup[1], tup[2], tup[3], tup[4], tup[5], tup[6] // 1000)
+def handle_datetimeoffset(dto_value):
+    tup = struct.unpack("<6hI2h", dto_value)  # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0)
+    return datetime(
+        tup[0],
+        tup[1],
+        tup[2],
+        tup[3],
+        tup[4],
+        tup[5],
+        tup[6] // 1000,
+        timezone(timedelta(hours=tup[7], minutes=tup[8])),
+    )
+class SqlServerDataSourceConnection(DataSourceConnection):
+    def __init__(self, name: str, connection_properties: DataSourceConnectionProperties):
+        super().__init__(name, connection_properties)
+    def build_connection_string(self, config: SqlServerConnectionProperties):
+        conn_params = []
+        conn_params.append(f"DRIVER={{{config.driver}}}")
+        conn_params.append(f"DATABASE={config.database}")
+        if "\\" in config.host:
+            # If there is a backslash in the host name, the host is a
+            # SQL Server named instance. In this case then port number has to be omitted.
+            conn_params.append(f"SERVER={config.host}")
+        else:
+            conn_params.append(f"SERVER={config.host},{int(config.port)}")
+        if config.trusted_connection:
+            conn_params.append("Trusted_Connection=YES")
+        if config.trust_server_certificate:
+            conn_params.append("TrustServerCertificate=YES")
+        if config.encrypt:
+            conn_params.append("Encrypt=YES")
+        if int(config.connection_max_retries) > 0:
+            conn_params.append(f"ConnectRetryCount={int(self.connection_max_retries)}")
+        if config.enable_tracing:
+            conn_params.append("SQL_ATTR_TRACE=SQL_OPT_TRACE_ON")
+        if config.authentication.lower() == "sql":
+            conn_params.append(f"UID={{{config.user}}}")
+            conn_params.append(f"PWD={{{config.password.get_secret_value()}}}")
+        elif config.authentication.lower() == "activedirectoryinteractive":
+            conn_params.append("Authentication=ActiveDirectoryInteractive")
+            conn_params.append(f"UID={{{config.user}}}")
+        elif config.authentication.lower() == "activedirectorypassword":
+            conn_params.append("Authentication=ActiveDirectoryPassword")
+            conn_params.append(f"UID={{{config.user}}}")
+            conn_params.append(f"PWD={{{config.password.get_secret_value()}}}")
+        elif config.authentication.lower() == "activedirectoryserviceprincipal":
+            conn_params.append("Authentication=ActiveDirectoryServicePrincipal")
+            conn_params.append(f"UID={{{config.client_id}}}")
+            conn_params.append(f"PWD={{{config.client_secret.get_secret_value()}}}")
+        elif "activedirectory" in config.authentication.lower():
+            conn_params.append(f"Authentication={config.authentication}")
+        if config.connection_parameters:
+            for key, value in config.connection_parameters.items():
+                logger.info(f"Adding connection parameter: {key}={value}")
+                conn_params.append(f"{key}={value}")
+        conn_params.append(f"APP=soda-core-fabric/{SODA_CORE_VERSION}")
+        conn_str = ";".join(conn_params)
+        return conn_str
+    def _get_pyodbc_attrs(self) -> dict[int, bytes] | None:
+        return None
+    def _create_connection(
+        self,
+        config: SqlServerConnectionProperties,
+    ):
+        try:
+            self.connection = pyodbc.connect(
+                self.build_connection_string(config),
+                attrs_before=self._get_pyodbc_attrs(),
+                timeout=int(config.login_timeout),
+                autocommit=self._get_autocommit_setting(),
+            )
+            self.connection.add_output_converter(-155, handle_datetimeoffset)
+            self.connection.add_output_converter(-150, handle_datetime)
+            return self.connection
+        except Exception as e:
+            raise DataSourceConnectionException(e) from e
+    def _execute_query_get_result_row_column_name(self, column) -> str:
+        return column[0]
+    def _get_autocommit_setting(self) -> bool:
+        return False  # No need to set autocommit, as it is set to False by default.

soda_sqlserver-4.0.5/src/soda_sqlserver/test_helpers/sqlserver_data_source_test_helper.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+import os
+from typing import Optional
+from helpers.data_source_test_helper import DataSourceTestHelper
+from soda_core.common.sql_ast import DROP_TABLE, DROP_VIEW
+from soda_sqlserver.common.data_sources.sqlserver_data_source import (
+    SqlServerDataSourceImpl,
+    SqlServerSqlDialect,
+)
+class SqlServerDataSourceTestHelper(DataSourceTestHelper):
+    def _create_database_name(self) -> Optional[str]:
+        return os.getenv("SQLSERVER_DATABASE", "master")
+    def _create_data_source_yaml_str(self) -> str:
+        """
+        Called in _create_data_source_impl to initialized self.data_source_impl
+        self.database_name and self.schema_name are available if appropriate for the data source type
+        """
+        return f"""
+            type: sqlserver
+            name: {self.name}
+            connection:
+                host: '{os.getenv("SQLSERVER_HOST", "localhost")}'
+                port: '{os.getenv("SQLSERVER_PORT", "1433")}'
+                database: '{os.getenv("SQLSERVER_DATABASE", "master")}'
+                user: '{os.getenv("SQLSERVER_USERNAME", "SA")}'
+                password: '{os.getenv("SQLSERVER_PASSWORD", "Password1!")}'
+                trust_server_certificate: true
+                driver: '{os.getenv("SQLSERVER_DRIVER", "ODBC Driver 18 for SQL Server")}'
+        """
+    def drop_test_schema_if_exists(self) -> None:
+        """We overwrite this function because the old query in soda-library is a bit unreadable and does not work with Synapse.
+        The logic is the same: drop all tables, and then drop the schema if it exists.
+        This is a more "manual" approach, but it is more readable and works with Synapse."""
+        # First find all the tables in the schema
+        table_names: list[str] = self.query_existing_test_tables()
+        data_source_impl: SqlServerDataSourceImpl = self.data_source_impl
+        dialect: SqlServerSqlDialect = data_source_impl.sql_dialect
+        for fully_qualified_table_name in table_names:
+            table_identifier = f"{dialect.quote_default(fully_qualified_table_name.database_name)}.{dialect.quote_default(fully_qualified_table_name.schema_name)}.{dialect.quote_default(fully_qualified_table_name.table_name)}"
+            drop_table_sql = dialect.build_drop_table_sql(DROP_TABLE(table_identifier))
+            self.data_source_impl.execute_update(drop_table_sql)
+        view_names: list[str] = self.query_existing_test_views()
+        for fully_qualified_view_name in view_names:
+            view_identifier = f"{dialect.quote_default(fully_qualified_view_name.database_name)}.{dialect.quote_default(fully_qualified_view_name.schema_name)}.{dialect.quote_default(fully_qualified_view_name.view_name)}"
+            drop_view_sql = dialect.build_drop_view_sql(DROP_VIEW(view_identifier))
+            self.data_source_impl.execute_update(drop_view_sql)
+        # Drop the schema if it exists.
+        schema_name = self.extract_schema_from_prefix()
+        if self._does_schema_exist(schema_name):
+            self.data_source_impl.execute_update(f"DROP SCHEMA {dialect.quote_default(schema_name)};")
+    def _does_schema_exist(self, schema_name: str) -> bool:
+        """Check if the schema exists in the database."""
+        query_result = self.data_source_impl.execute_query(
+            f"SELECT name FROM sys.schemas WHERE name = '{schema_name}';"
+        )
+        return len(query_result.rows) > 0

soda_sqlserver-4.0.5/src/soda_sqlserver.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,6 @@
+Metadata-Version: 2.4
+Name: soda-sqlserver
+Version: 4.0.5
+Requires-Dist: soda-core==4.0.5
+Requires-Dist: pyodbc
+Dynamic: requires-dist

soda_sqlserver-4.0.5/src/soda_sqlserver.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,10 @@
+setup.py
+src/soda_sqlserver.egg-info/PKG-INFO
+src/soda_sqlserver.egg-info/SOURCES.txt
+src/soda_sqlserver.egg-info/dependency_links.txt
+src/soda_sqlserver.egg-info/entry_points.txt
+src/soda_sqlserver.egg-info/requires.txt
+src/soda_sqlserver.egg-info/top_level.txt
+src/soda_sqlserver/common/data_sources/sqlserver_data_source.py
+src/soda_sqlserver/common/data_sources/sqlserver_data_source_connection.py
+src/soda_sqlserver/test_helpers/sqlserver_data_source_test_helper.py

soda_sqlserver-4.0.5/src/soda_sqlserver.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

soda_sqlserver-4.0.5/src/soda_sqlserver.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [soda.plugins.data_source.sqlserver]
2	+ SqlServerDataSourceImpl = soda_sqlserver.common.data_sources.sqlserver_data_source:SqlServerDataSourceImpl

soda_sqlserver-4.0.5/src/soda_sqlserver.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ soda-core==4.0.5
2	+ pyodbc

soda_sqlserver-4.0.5/src/soda_sqlserver.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ soda_sqlserver