snowflake-data-validation 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowflake_data_validation/__init__.py +110 -0
- snowflake/snowflake_data_validation/__main__.py +35 -0
- snowflake/snowflake_data_validation/__version__.py +16 -0
- snowflake/snowflake_data_validation/comparison_orchestrator.py +338 -0
- snowflake/snowflake_data_validation/configuration/__init__.py +16 -0
- snowflake/snowflake_data_validation/configuration/configuration_loader.py +69 -0
- snowflake/snowflake_data_validation/configuration/model/configuration_model.py +42 -0
- snowflake/snowflake_data_validation/configuration/model/connection_types.py +39 -0
- snowflake/snowflake_data_validation/configuration/model/connections/__init__.py +32 -0
- snowflake/snowflake_data_validation/configuration/model/table_configuration.py +71 -0
- snowflake/snowflake_data_validation/configuration/model/validation_configuration.py +44 -0
- snowflake/snowflake_data_validation/configuration/singleton.py +16 -0
- snowflake/snowflake_data_validation/connector/__init__.py +29 -0
- snowflake/snowflake_data_validation/connector/connector_base.py +64 -0
- snowflake/snowflake_data_validation/executer/__init__.py +45 -0
- snowflake/snowflake_data_validation/executer/async_generation_executor.py +157 -0
- snowflake/snowflake_data_validation/executer/async_validation_executor.py +378 -0
- snowflake/snowflake_data_validation/executer/base_validation_executor.py +317 -0
- snowflake/snowflake_data_validation/executer/executor_factory.py +226 -0
- snowflake/snowflake_data_validation/executer/extractor_types.py +24 -0
- snowflake/snowflake_data_validation/executer/sync_validation_executor.py +226 -0
- snowflake/snowflake_data_validation/extractor/__init__.py +28 -0
- snowflake/snowflake_data_validation/extractor/metadata_extractor_base.py +146 -0
- snowflake/snowflake_data_validation/extractor/sql_queries_template_generator.py +169 -0
- snowflake/snowflake_data_validation/main_cli.py +43 -0
- snowflake/snowflake_data_validation/mappings/__init__.py +5 -0
- snowflake/snowflake_data_validation/mappings/datatypes_mapping_base.py +95 -0
- snowflake/snowflake_data_validation/query/__init__.py +22 -0
- snowflake/snowflake_data_validation/query/query_generator_base.py +107 -0
- snowflake/snowflake_data_validation/script_writer/__init__.py +22 -0
- snowflake/snowflake_data_validation/script_writer/script_writer_base.py +122 -0
- snowflake/snowflake_data_validation/snowflake/__init__.py +38 -0
- snowflake/snowflake_data_validation/snowflake/connector/connector_snowflake.py +197 -0
- snowflake/snowflake_data_validation/snowflake/extractor/metadata_extractor_snowflake.py +268 -0
- snowflake/snowflake_data_validation/snowflake/extractor/snowflake_cte_generator.py +120 -0
- snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_column_metrics_templates.yaml +143 -0
- snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_columns_cte_template.sql.j2 +8 -0
- snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_datatypes_normalization_templates.yaml +9 -0
- snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_get_columns_metadata.sql.j2 +76 -0
- snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_table_metadata_query.sql.j2 +29 -0
- snowflake/snowflake_data_validation/snowflake/model/__init__.py +25 -0
- snowflake/snowflake_data_validation/snowflake/model/snowflake_default_connection.py +31 -0
- snowflake/snowflake_data_validation/snowflake/model/snowflake_named_connection.py +36 -0
- snowflake/snowflake_data_validation/snowflake/query/__init__.py +22 -0
- snowflake/snowflake_data_validation/snowflake/query/query_generator_snowflake.py +206 -0
- snowflake/snowflake_data_validation/snowflake/script_writer/__init__.py +22 -0
- snowflake/snowflake_data_validation/snowflake/script_writer/script_writer_snowflake.py +50 -0
- snowflake/snowflake_data_validation/snowflake/snowflake_arguments_manager.py +359 -0
- snowflake/snowflake_data_validation/snowflake/snowflake_cli.py +374 -0
- snowflake/snowflake_data_validation/sqlserver/__init__.py +50 -0
- snowflake/snowflake_data_validation/sqlserver/connector/__init__.py +17 -0
- snowflake/snowflake_data_validation/sqlserver/connector/connector_sql_server.py +169 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/__init__.py +16 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/metadata_extractor_sqlserver.py +238 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/sqlserver_cte_generator.py +126 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_column_metrics_templates.yaml +496 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_columns_cte_template.sql.j2 +8 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_datatypes_mapping_template.yaml +64 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_datatypes_normalization_templates.yaml +26 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_get_columns_metadata.sql.j2 +70 -0
- snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_table_metadata_query.sql.j2 +28 -0
- snowflake/snowflake_data_validation/sqlserver/mappings/__init__.py +16 -0
- snowflake/snowflake_data_validation/sqlserver/mappings/sqlserver_datatypes_mapping.py +51 -0
- snowflake/snowflake_data_validation/sqlserver/model/__init__.py +21 -0
- snowflake/snowflake_data_validation/sqlserver/model/sqlserver_credentials_connection.py +48 -0
- snowflake/snowflake_data_validation/sqlserver/query/__init__.py +22 -0
- snowflake/snowflake_data_validation/sqlserver/query/query_generator_sqlserver.py +193 -0
- snowflake/snowflake_data_validation/sqlserver/script_writer/__init__.py +22 -0
- snowflake/snowflake_data_validation/sqlserver/script_writer/script_writer_sqlserver.py +50 -0
- snowflake/snowflake_data_validation/sqlserver/sqlserver_arguments_manager.py +377 -0
- snowflake/snowflake_data_validation/sqlserver/sqlserver_cli.py +362 -0
- snowflake/snowflake_data_validation/utils/__init__.py +16 -0
- snowflake/snowflake_data_validation/utils/arguments_manager_base.py +335 -0
- snowflake/snowflake_data_validation/utils/arguments_manager_factory.py +114 -0
- snowflake/snowflake_data_validation/utils/base_output_handler.py +60 -0
- snowflake/snowflake_data_validation/utils/console_output_handler.py +88 -0
- snowflake/snowflake_data_validation/utils/constants.py +166 -0
- snowflake/snowflake_data_validation/utils/context.py +79 -0
- snowflake/snowflake_data_validation/utils/helpers.py +608 -0
- snowflake/snowflake_data_validation/utils/logging_config.py +156 -0
- snowflake/snowflake_data_validation/utils/logging_utils.py +67 -0
- snowflake/snowflake_data_validation/utils/model/column_metadata.py +30 -0
- snowflake/snowflake_data_validation/utils/model/table_column_metadata.py +67 -0
- snowflake/snowflake_data_validation/utils/model/templates_loader_manager.py +78 -0
- snowflake/snowflake_data_validation/utils/progress_reporter.py +32 -0
- snowflake/snowflake_data_validation/utils/thread_safe_singleton.py +49 -0
- snowflake/snowflake_data_validation/validation/__init__.py +16 -0
- snowflake/snowflake_data_validation/validation/data_validator.py +727 -0
- snowflake/snowflake_data_validation/validation/validation_report_buffer.py +163 -0
- snowflake_data_validation-0.0.1.dist-info/METADATA +89 -0
- snowflake_data_validation-0.0.1.dist-info/RECORD +94 -0
- snowflake_data_validation-0.0.1.dist-info/WHEEL +4 -0
- snowflake_data_validation-0.0.1.dist-info/entry_points.txt +3 -0
- snowflake_data_validation-0.0.1.dist-info/licenses/LICENSE +177 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
Snowflake Data Validation
|
|
18
|
+
=========================
|
|
19
|
+
|
|
20
|
+
This package provides comprehensive data validation functionality for Snowflake,
|
|
21
|
+
enabling robust data quality checks and migration validation between different
|
|
22
|
+
database systems.
|
|
23
|
+
|
|
24
|
+
Features
|
|
25
|
+
--------
|
|
26
|
+
- Multi-level data validation (Level 1: table metadata, Level 2: column metadata)
|
|
27
|
+
- Support for multiple source systems (SQL Server, Snowflake)
|
|
28
|
+
- CLI interface for validation operations
|
|
29
|
+
- Configurable validation processes
|
|
30
|
+
- Progress reporting and detailed validation reports
|
|
31
|
+
|
|
32
|
+
Main Components
|
|
33
|
+
---------------
|
|
34
|
+
ComparisonOrchestrator
|
|
35
|
+
Main class for orchestrating data comparisons between source and target systems
|
|
36
|
+
|
|
37
|
+
CLI Interface
|
|
38
|
+
Command-line interface available via `main_cli` module for:
|
|
39
|
+
- Setting up database connections
|
|
40
|
+
- Running validation operations
|
|
41
|
+
- Managing configuration files
|
|
42
|
+
|
|
43
|
+
Supported Dialects
|
|
44
|
+
------------------
|
|
45
|
+
- Snowflake to Snowflake validation
|
|
46
|
+
- SQL Server to Snowflake migration validation
|
|
47
|
+
|
|
48
|
+
Example Usage
|
|
49
|
+
-------------
|
|
50
|
+
Programmatic API:
|
|
51
|
+
>>> from snowflake.snowflake_data_validation import ComparisonOrchestrator
|
|
52
|
+
>>> # Set up extractors, context, and configuration
|
|
53
|
+
>>> orchestrator = ComparisonOrchestrator(
|
|
54
|
+
... source_extractor=source_extractor,
|
|
55
|
+
... target_extractor=target_extractor,
|
|
56
|
+
... context=context
|
|
57
|
+
... )
|
|
58
|
+
>>> orchestrator.run_sync_comparison()
|
|
59
|
+
|
|
60
|
+
Command Line Interface:
|
|
61
|
+
|
|
62
|
+
.. code-block:: bash
|
|
63
|
+
|
|
64
|
+
# Set up SQL Server connection
|
|
65
|
+
python -m snowflake.snowflake_data_validation sqlserver source-connection \
|
|
66
|
+
--host localhost --port 1433 --username user --password pass --database mydb
|
|
67
|
+
|
|
68
|
+
# Run validation
|
|
69
|
+
python -m snowflake.snowflake_data_validation sqlserver run-validation \
|
|
70
|
+
--data-validation-config-file config.json
|
|
71
|
+
|
|
72
|
+
For more detailed examples and usage, please refer to the documentation.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
from snowflake.snowflake_data_validation.comparison_orchestrator import (
|
|
76
|
+
ComparisonOrchestrator,
|
|
77
|
+
)
|
|
78
|
+
from snowflake.snowflake_data_validation.__version__ import __version__
|
|
79
|
+
|
|
80
|
+
# Import submodules and make them available at the package level
|
|
81
|
+
# This helps Sphinx properly document the submodules
|
|
82
|
+
import snowflake.snowflake_data_validation.validation as validation
|
|
83
|
+
import snowflake.snowflake_data_validation.extractor as extractor
|
|
84
|
+
import snowflake.snowflake_data_validation.connector as connector
|
|
85
|
+
import snowflake.snowflake_data_validation.utils as utils
|
|
86
|
+
import snowflake.snowflake_data_validation.snowflake as snowflake
|
|
87
|
+
import snowflake.snowflake_data_validation.sqlserver as sqlserver
|
|
88
|
+
import logging
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# Add a NullHandler to prevent logging messages from being output to
|
|
92
|
+
# sys.stderr if no logging configuration is provided.
|
|
93
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
94
|
+
|
|
95
|
+
# Define the public API
|
|
96
|
+
__all__ = [
|
|
97
|
+
"__version__",
|
|
98
|
+
"ComparisonOrchestrator",
|
|
99
|
+
# Submodules
|
|
100
|
+
"validation",
|
|
101
|
+
"extractor",
|
|
102
|
+
"connector",
|
|
103
|
+
"utils",
|
|
104
|
+
"snowflake",
|
|
105
|
+
"sqlserver",
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# Version information
|
|
110
|
+
__version_info__ = tuple(int(i) for i in __version__.split(".") if i.isdigit())
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Main entry point for the Snowflake Data Validation CLI when run as a module.
|
|
3
|
+
|
|
4
|
+
This allows the package to be run with: python -m snowflake.snowflake_data_validation
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
from snowflake.snowflake_data_validation.main_cli import data_validation_app
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
"""Provide main entry point with error handling."""
|
|
16
|
+
try:
|
|
17
|
+
data_validation_app()
|
|
18
|
+
except KeyboardInterrupt:
|
|
19
|
+
typer.secho("\nOperation cancelled by user", fg=typer.colors.YELLOW, err=True)
|
|
20
|
+
sys.exit(1)
|
|
21
|
+
except ImportError as e:
|
|
22
|
+
typer.secho(f"Import error: {e}", fg=typer.colors.RED, err=True)
|
|
23
|
+
typer.secho(
|
|
24
|
+
"Please ensure all dependencies are installed correctly.",
|
|
25
|
+
fg=typer.colors.RED,
|
|
26
|
+
err=True,
|
|
27
|
+
)
|
|
28
|
+
sys.exit(1)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
typer.secho(f"Unexpected error: {e}", fg=typer.colors.RED, err=True)
|
|
31
|
+
sys.exit(1)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
main()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
__version__ = "0.0.1"
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
from snowflake.snowflake_data_validation.configuration.model.table_configuration import (
|
|
21
|
+
TableConfiguration,
|
|
22
|
+
)
|
|
23
|
+
from snowflake.snowflake_data_validation.configuration.model.validation_configuration import (
|
|
24
|
+
ValidationConfiguration,
|
|
25
|
+
)
|
|
26
|
+
from snowflake.snowflake_data_validation.connector.connector_base import (
|
|
27
|
+
ConnectorBase,
|
|
28
|
+
)
|
|
29
|
+
from snowflake.snowflake_data_validation.executer import (
|
|
30
|
+
ExecutorFactory,
|
|
31
|
+
)
|
|
32
|
+
from snowflake.snowflake_data_validation.executer.base_validation_executor import (
|
|
33
|
+
BaseValidationExecutor,
|
|
34
|
+
)
|
|
35
|
+
from snowflake.snowflake_data_validation.executer.extractor_types import (
|
|
36
|
+
ExtractorType,
|
|
37
|
+
)
|
|
38
|
+
from snowflake.snowflake_data_validation.extractor.metadata_extractor_base import (
|
|
39
|
+
MetadataExtractorBase,
|
|
40
|
+
)
|
|
41
|
+
from snowflake.snowflake_data_validation.script_writer.script_writer_base import (
|
|
42
|
+
ScriptWriterBase,
|
|
43
|
+
)
|
|
44
|
+
from snowflake.snowflake_data_validation.utils.arguments_manager_base import (
|
|
45
|
+
ValidationEnvironmentObject,
|
|
46
|
+
)
|
|
47
|
+
from snowflake.snowflake_data_validation.utils.constants import (
|
|
48
|
+
ExecutionMode,
|
|
49
|
+
Platform,
|
|
50
|
+
)
|
|
51
|
+
from snowflake.snowflake_data_validation.utils.context import Context
|
|
52
|
+
from snowflake.snowflake_data_validation.utils.logging_utils import log
|
|
53
|
+
from snowflake.snowflake_data_validation.utils.progress_reporter import (
|
|
54
|
+
ProgressMetadata,
|
|
55
|
+
report_progress,
|
|
56
|
+
)
|
|
57
|
+
from snowflake.snowflake_data_validation.validation.validation_report_buffer import (
|
|
58
|
+
ValidationReportBuffer,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
LOGGER = logging.getLogger(__name__)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ComparisonOrchestrator:
|
|
66
|
+
|
|
67
|
+
"""Orchestrator for validation operations that creates appropriate components based on command type."""
|
|
68
|
+
|
|
69
|
+
@log
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
source_connector: ConnectorBase,
|
|
73
|
+
target_connector: ConnectorBase,
|
|
74
|
+
context: Context,
|
|
75
|
+
):
|
|
76
|
+
"""Initialize the orchestrator.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
source_connector: Source database connector
|
|
80
|
+
target_connector: Target database connector
|
|
81
|
+
context: Validation context containing configuration and runtime info
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
LOGGER.debug("Initializing ComparisonOrchestrator")
|
|
85
|
+
self.source_connector = source_connector
|
|
86
|
+
self.target_connector = target_connector
|
|
87
|
+
self.context = context
|
|
88
|
+
self.executor_factory = ExecutorFactory()
|
|
89
|
+
LOGGER.debug("ComparisonOrchestrator initialized successfully")
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
@log
|
|
93
|
+
def from_validation_environment(
|
|
94
|
+
cls, validation_env: ValidationEnvironmentObject
|
|
95
|
+
) -> "ComparisonOrchestrator":
|
|
96
|
+
"""Create a ComparisonOrchestrator from a ValidationEnvironmentObject.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
validation_env: ValidationEnvironmentObject instance containing all required components
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
ComparisonOrchestrator: Configured orchestrator ready to run validation
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
LOGGER.debug("Creating ComparisonOrchestrator from validation environment")
|
|
106
|
+
return cls(
|
|
107
|
+
source_connector=validation_env.source_connector,
|
|
108
|
+
target_connector=validation_env.target_connector,
|
|
109
|
+
context=validation_env.context,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
@log
|
|
113
|
+
def run_sync_comparison(self) -> None:
|
|
114
|
+
"""Run the complete synchronous validation comparison process.
|
|
115
|
+
|
|
116
|
+
Uses the sync validation executor with metadata extractors for real-time validation.
|
|
117
|
+
"""
|
|
118
|
+
LOGGER.info("Starting synchronous validation comparison")
|
|
119
|
+
# Create metadata extractors for validation command
|
|
120
|
+
source_extractor = self._create_metadata_extractor(
|
|
121
|
+
self.source_connector, self.context.source_platform
|
|
122
|
+
)
|
|
123
|
+
target_extractor = self._create_metadata_extractor(
|
|
124
|
+
self.target_connector, self.context.target_platform
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
executor = self.executor_factory.create_executor(
|
|
128
|
+
ExecutionMode.SYNC_VALIDATION,
|
|
129
|
+
source_extractor=source_extractor,
|
|
130
|
+
target_extractor=target_extractor,
|
|
131
|
+
context=self.context,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
self._orchestrate_tables_execution(executor)
|
|
135
|
+
LOGGER.info("Synchronous validation comparison completed")
|
|
136
|
+
|
|
137
|
+
@log
|
|
138
|
+
def run_async_generation(self) -> None:
|
|
139
|
+
"""Generate validation scripts for all tables defined in the configuration.
|
|
140
|
+
|
|
141
|
+
Uses script printers to write SQL queries to files.
|
|
142
|
+
"""
|
|
143
|
+
LOGGER.info("Starting async script generation")
|
|
144
|
+
# Create script printers for script generation command
|
|
145
|
+
source_printer = self._create_script_printer(
|
|
146
|
+
self.source_connector, self.context.source_platform
|
|
147
|
+
)
|
|
148
|
+
target_printer = self._create_script_printer(
|
|
149
|
+
self.target_connector, self.context.target_platform
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
executor = self.executor_factory.create_executor(
|
|
153
|
+
ExecutionMode.ASYNC_GENERATION,
|
|
154
|
+
source_extractor=source_printer,
|
|
155
|
+
target_extractor=target_printer,
|
|
156
|
+
context=self.context,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self._orchestrate_tables_execution(executor)
|
|
160
|
+
LOGGER.info("Async script generation completed")
|
|
161
|
+
|
|
162
|
+
@log
|
|
163
|
+
def run_async_comparison(self) -> None:
|
|
164
|
+
"""Run the asynchronous validation comparison process.
|
|
165
|
+
|
|
166
|
+
Uses the async validation executor with metadata extractors for deferred validation.
|
|
167
|
+
"""
|
|
168
|
+
LOGGER.info("Starting asynchronous validation comparison")
|
|
169
|
+
# Create metadata extractors for async validation command
|
|
170
|
+
source_extractor = self._create_metadata_extractor(
|
|
171
|
+
self.source_connector, self.context.source_platform
|
|
172
|
+
)
|
|
173
|
+
target_extractor = self._create_metadata_extractor(
|
|
174
|
+
self.target_connector, self.context.configuration.target_platform
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
executor = self.executor_factory.create_executor(
|
|
178
|
+
ExecutionMode.ASYNC_VALIDATION,
|
|
179
|
+
source_extractor=source_extractor,
|
|
180
|
+
target_extractor=target_extractor,
|
|
181
|
+
context=self.context,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
self._orchestrate_tables_execution(executor)
|
|
185
|
+
LOGGER.info("Asynchronous validation comparison completed")
|
|
186
|
+
|
|
187
|
+
@log
|
|
188
|
+
def _create_metadata_extractor(
|
|
189
|
+
self, connector: ConnectorBase, platform: Platform
|
|
190
|
+
) -> MetadataExtractorBase:
|
|
191
|
+
"""Create the appropriate metadata extractor based on platform.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
connector: Database connector instance
|
|
195
|
+
platform: Platform enum (e.g., Platform.SNOWFLAKE, Platform.SQLSERVER)
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
MetadataExtractorBase: Platform-specific metadata extractor
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
LOGGER.debug("Creating metadata extractor for platform: %s", platform)
|
|
202
|
+
return self.executor_factory.create_extractor_from_connector(
|
|
203
|
+
connector=connector,
|
|
204
|
+
extractor_type=ExtractorType.METADATA_EXTRACTOR,
|
|
205
|
+
platform=platform,
|
|
206
|
+
report_path=self.context.report_path,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
@log
|
|
210
|
+
def _create_script_printer(
|
|
211
|
+
self, connector: ConnectorBase, platform: Platform
|
|
212
|
+
) -> ScriptWriterBase:
|
|
213
|
+
"""Create the appropriate script printer based on platform.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
connector: Database connector instance
|
|
217
|
+
platform: Platform enum (e.g., Platform.SNOWFLAKE, Platform.SQLSERVER)
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
ScriptWriterBase: Platform-specific script printer
|
|
221
|
+
|
|
222
|
+
"""
|
|
223
|
+
LOGGER.debug("Creating script printer for platform: %s", platform)
|
|
224
|
+
return self.executor_factory.create_extractor_from_connector(
|
|
225
|
+
connector=connector,
|
|
226
|
+
extractor_type=ExtractorType.SCRIPT_WRITER,
|
|
227
|
+
platform=platform,
|
|
228
|
+
report_path=self.context.report_path,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
@log
|
|
232
|
+
def _orchestrate_tables_execution(self, executor: BaseValidationExecutor) -> None:
|
|
233
|
+
"""Execute validation for all tables using the provided executor.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
executor: The validation executor to use for processing tables
|
|
237
|
+
|
|
238
|
+
"""
|
|
239
|
+
tables = self.context.configuration.tables
|
|
240
|
+
default_configuration = self.context.configuration.validation_configuration
|
|
241
|
+
|
|
242
|
+
LOGGER.info("Starting validation execution for %d tables", len(tables))
|
|
243
|
+
|
|
244
|
+
# Process all tables
|
|
245
|
+
for table in tables:
|
|
246
|
+
LOGGER.info("Processing table: %s", table.fully_qualified_name)
|
|
247
|
+
self._report_progress_for_table(
|
|
248
|
+
table.fully_qualified_name, table.column_selection_list
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
validation_config = self._get_validation_configuration(
|
|
252
|
+
table, default_configuration
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
executor.execute_validation_levels(validation_config, table)
|
|
256
|
+
LOGGER.info("Completed processing table: %s", table.fully_qualified_name)
|
|
257
|
+
|
|
258
|
+
LOGGER.info("Completed validation execution for all tables")
|
|
259
|
+
|
|
260
|
+
# Flush all buffered validation data to file after processing all tables
|
|
261
|
+
self._flush_validation_reports()
|
|
262
|
+
|
|
263
|
+
@log
|
|
264
|
+
def _flush_validation_reports(self) -> None:
|
|
265
|
+
"""Flush all buffered validation data to the report file.
|
|
266
|
+
|
|
267
|
+
This method is called after all tables have been processed to write
|
|
268
|
+
all accumulated validation results to the CSV report file.
|
|
269
|
+
"""
|
|
270
|
+
buffer = ValidationReportBuffer()
|
|
271
|
+
|
|
272
|
+
if buffer.has_data():
|
|
273
|
+
report_file_path = buffer.flush_to_file(self.context)
|
|
274
|
+
LOGGER.info(
|
|
275
|
+
"Successfully flushed validation report buffer to: %s", report_file_path
|
|
276
|
+
)
|
|
277
|
+
else:
|
|
278
|
+
LOGGER.info("No validation data to flush - buffer is empty")
|
|
279
|
+
|
|
280
|
+
@log
|
|
281
|
+
def _get_validation_configuration(
|
|
282
|
+
self,
|
|
283
|
+
table: TableConfiguration,
|
|
284
|
+
default_configuration: Optional[ValidationConfiguration],
|
|
285
|
+
) -> ValidationConfiguration:
|
|
286
|
+
"""Get the validation configuration for a table.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
table: Table configuration object
|
|
290
|
+
default_configuration: Default validation configuration
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
ValidationConfiguration: The configuration to use for this table
|
|
294
|
+
|
|
295
|
+
"""
|
|
296
|
+
if table.validation_configuration:
|
|
297
|
+
LOGGER.debug(
|
|
298
|
+
"Using table-specific validation configuration for %s",
|
|
299
|
+
table.fully_qualified_name,
|
|
300
|
+
)
|
|
301
|
+
return table.validation_configuration
|
|
302
|
+
elif default_configuration:
|
|
303
|
+
LOGGER.debug(
|
|
304
|
+
"Using default validation configuration for %s",
|
|
305
|
+
table.fully_qualified_name,
|
|
306
|
+
)
|
|
307
|
+
return default_configuration
|
|
308
|
+
else:
|
|
309
|
+
LOGGER.debug(
|
|
310
|
+
"Using empty validation configuration for %s",
|
|
311
|
+
table.fully_qualified_name,
|
|
312
|
+
)
|
|
313
|
+
return ValidationConfiguration()
|
|
314
|
+
|
|
315
|
+
def _report_progress_for_table(
|
|
316
|
+
self, object_name: str, columns_to_validate: list[str]
|
|
317
|
+
) -> None:
|
|
318
|
+
"""Report progress for table validation.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
object_name: The fully qualified name of the database object
|
|
322
|
+
columns_to_validate: List of columns being validated
|
|
323
|
+
|
|
324
|
+
"""
|
|
325
|
+
if not self.context.output_handler.console_output_enabled:
|
|
326
|
+
LOGGER.debug(
|
|
327
|
+
"Reporting progress for table: %s with %d columns",
|
|
328
|
+
object_name,
|
|
329
|
+
len(columns_to_validate),
|
|
330
|
+
)
|
|
331
|
+
report_progress(
|
|
332
|
+
ProgressMetadata(
|
|
333
|
+
table=object_name,
|
|
334
|
+
columns=columns_to_validate,
|
|
335
|
+
run_id=self.context.run_id,
|
|
336
|
+
run_start_time=self.context.run_start_time,
|
|
337
|
+
)
|
|
338
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# This file is intentionally left blank.
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from pydantic_yaml import parse_yaml_raw_as
|
|
4
|
+
|
|
5
|
+
from snowflake.snowflake_data_validation.configuration.model.configuration_model import (
|
|
6
|
+
ConfigurationModel,
|
|
7
|
+
)
|
|
8
|
+
from snowflake.snowflake_data_validation.configuration.singleton import Singleton
|
|
9
|
+
from snowflake.snowflake_data_validation.utils.constants import (
|
|
10
|
+
DATA_VALIDATION_CONFIGURATION_FILE_NAME,
|
|
11
|
+
DATA_VALIDATION_CONFIGURATION_FILE_YAML,
|
|
12
|
+
DATA_VALIDATION_CONFIGURATION_FILE_YML,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ConfigurationLoader(metaclass=Singleton):
|
|
17
|
+
|
|
18
|
+
"""ConfigurationLoader class.
|
|
19
|
+
|
|
20
|
+
This is a singleton class that reads the configuration.yaml file
|
|
21
|
+
and provides an interface to get the configuration settings model.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
metaclass (Singleton, optional): Defaults to Singleton.
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, file_path: Path) -> None:
|
|
29
|
+
self.configuration_model: ConfigurationModel = ConfigurationModel(
|
|
30
|
+
source_platform="",
|
|
31
|
+
target_platform="",
|
|
32
|
+
output_directory_path="",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if file_path is None:
|
|
36
|
+
raise ValueError("The configuration file path cannot be None value")
|
|
37
|
+
|
|
38
|
+
if file_path.name not in DATA_VALIDATION_CONFIGURATION_FILE_NAME:
|
|
39
|
+
raise Exception(
|
|
40
|
+
f"{file_path.name} is not a valid configuration file name. "
|
|
41
|
+
f"The correct file name are {DATA_VALIDATION_CONFIGURATION_FILE_YAML} "
|
|
42
|
+
f"and {DATA_VALIDATION_CONFIGURATION_FILE_YML}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if not file_path.exists():
|
|
46
|
+
raise FileNotFoundError(f"Configuration file not found in {file_path}")
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
file_content = file_path.read_text()
|
|
50
|
+
self.configuration_model = parse_yaml_raw_as(
|
|
51
|
+
ConfigurationModel, file_content
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
except Exception as exception:
|
|
55
|
+
error_msg = (
|
|
56
|
+
f"An error occurred while loading the "
|
|
57
|
+
f"{DATA_VALIDATION_CONFIGURATION_FILE_YAML} "
|
|
58
|
+
f"or {DATA_VALIDATION_CONFIGURATION_FILE_YML} file:"
|
|
59
|
+
)
|
|
60
|
+
raise Exception(f"{error_msg}\n{exception}") from None
|
|
61
|
+
|
|
62
|
+
def get_configuration_model(self) -> ConfigurationModel:
|
|
63
|
+
"""Get the configuration model.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
ConfigurationModel: The configuration model instance.
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
return self.configuration_model
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from snowflake.snowflake_data_validation.configuration.model.connection_types import (
|
|
6
|
+
Connection,
|
|
7
|
+
)
|
|
8
|
+
from snowflake.snowflake_data_validation.configuration.model.table_configuration import (
|
|
9
|
+
TableConfiguration,
|
|
10
|
+
)
|
|
11
|
+
from snowflake.snowflake_data_validation.configuration.model.validation_configuration import (
|
|
12
|
+
ValidationConfiguration,
|
|
13
|
+
)
|
|
14
|
+
from snowflake.snowflake_data_validation.utils.constants import (
|
|
15
|
+
VALIDATION_CONFIGURATION_DEFAULT_VALUE,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ConfigurationModel(BaseModel):
|
|
20
|
+
|
|
21
|
+
"""Configuration model.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
pydantic.BaseModel (pydantic.BaseModel): pydantic BaseModel
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
source_platform: str
|
|
29
|
+
target_platform: str
|
|
30
|
+
output_directory_path: str
|
|
31
|
+
parallelization: bool = False
|
|
32
|
+
source_connection: Optional[Connection] = None
|
|
33
|
+
target_connection: Optional[Connection] = None
|
|
34
|
+
source_validation_files_path: Optional[str] = None
|
|
35
|
+
target_validation_files_path: Optional[str] = None
|
|
36
|
+
validation_configuration: ValidationConfiguration = ValidationConfiguration(
|
|
37
|
+
**VALIDATION_CONFIGURATION_DEFAULT_VALUE
|
|
38
|
+
)
|
|
39
|
+
comparison_configuration: Optional[dict[str, Union[str, float]]] = None
|
|
40
|
+
database_mappings: Optional[dict[str, str]] = None
|
|
41
|
+
schema_mappings: Optional[dict[str, str]] = None
|
|
42
|
+
tables: list[TableConfiguration] = []
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
"""Connection type definitions for YAML configuration.
|
|
14
|
+
|
|
15
|
+
This file imports connection models from the connections module and creates
|
|
16
|
+
the union types used by the main configuration model for both source and
|
|
17
|
+
target connections.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from typing import Union
|
|
21
|
+
|
|
22
|
+
# Import connection models from the dedicated connections module
|
|
23
|
+
from snowflake.snowflake_data_validation.configuration.model.connections import (
|
|
24
|
+
SnowflakeDefaultConnection,
|
|
25
|
+
SnowflakeNamedConnection,
|
|
26
|
+
SqlServerCredentialsConnection,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Union type for YAML configuration
|
|
31
|
+
# Note: This is used for both source_connection and target_connection fields
|
|
32
|
+
# SnowflakeCredentialsConnection is not included as it's only used for IPC commands
|
|
33
|
+
Connection = Union[
|
|
34
|
+
# Snowflake connections for YAML (name and default modes only)
|
|
35
|
+
SnowflakeNamedConnection,
|
|
36
|
+
SnowflakeDefaultConnection,
|
|
37
|
+
# SQL Server connections
|
|
38
|
+
SqlServerCredentialsConnection,
|
|
39
|
+
]
|