snowflake-data-validation 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/snowflake_data_validation/__init__.py +112 -0
  2. snowflake/snowflake_data_validation/__main__.py +35 -0
  3. snowflake/snowflake_data_validation/__version__.py +16 -0
  4. snowflake/snowflake_data_validation/comparison_orchestrator.py +250 -0
  5. snowflake/snowflake_data_validation/configuration/__init__.py +16 -0
  6. snowflake/snowflake_data_validation/configuration/configuration_loader.py +66 -0
  7. snowflake/snowflake_data_validation/configuration/model/configuration_model.py +122 -0
  8. snowflake/snowflake_data_validation/configuration/model/connection_types.py +46 -0
  9. snowflake/snowflake_data_validation/configuration/model/connections/__init__.py +47 -0
  10. snowflake/snowflake_data_validation/configuration/model/logging_configuration.py +63 -0
  11. snowflake/snowflake_data_validation/configuration/model/table_configuration.py +210 -0
  12. snowflake/snowflake_data_validation/configuration/model/validation_configuration.py +59 -0
  13. snowflake/snowflake_data_validation/configuration/singleton.py +16 -0
  14. snowflake/snowflake_data_validation/connector/__init__.py +32 -0
  15. snowflake/snowflake_data_validation/connector/connector_base.py +183 -0
  16. snowflake/snowflake_data_validation/connector/connector_factory_base.py +161 -0
  17. snowflake/snowflake_data_validation/executer/__init__.py +45 -0
  18. snowflake/snowflake_data_validation/executer/async_generation_executor.py +167 -0
  19. snowflake/snowflake_data_validation/executer/async_validation_executor.py +409 -0
  20. snowflake/snowflake_data_validation/executer/base_validation_executor.py +451 -0
  21. snowflake/snowflake_data_validation/executer/executor_factory.py +248 -0
  22. snowflake/snowflake_data_validation/executer/extractor_types.py +24 -0
  23. snowflake/snowflake_data_validation/executer/sync_validation_executor.py +713 -0
  24. snowflake/snowflake_data_validation/extractor/__init__.py +28 -0
  25. snowflake/snowflake_data_validation/extractor/metadata_extractor_base.py +374 -0
  26. snowflake/snowflake_data_validation/extractor/sql_queries_template_generator.py +570 -0
  27. snowflake/snowflake_data_validation/main_cli.py +92 -0
  28. snowflake/snowflake_data_validation/orchestration/parallel_execution_engine.py +337 -0
  29. snowflake/snowflake_data_validation/orchestration/table_metadata_processor.py +400 -0
  30. snowflake/snowflake_data_validation/orchestration/validation_progress_reporter.py +116 -0
  31. snowflake/snowflake_data_validation/query/__init__.py +22 -0
  32. snowflake/snowflake_data_validation/query/query_generator_base.py +346 -0
  33. snowflake/snowflake_data_validation/redshift/__init__.py +36 -0
  34. snowflake/snowflake_data_validation/redshift/connector/__init__.py +17 -0
  35. snowflake/snowflake_data_validation/redshift/connector/connector_factory_redshift.py +91 -0
  36. snowflake/snowflake_data_validation/redshift/connector/connector_redshift.py +237 -0
  37. snowflake/snowflake_data_validation/redshift/extractor/metadata_extractor_redshift.py +257 -0
  38. snowflake/snowflake_data_validation/redshift/extractor/redshift_cte_generator.py +163 -0
  39. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_concatenated_insert_template.sql.j2 +26 -0
  40. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_concatenated_table_template.sql.j2 +6 -0
  41. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_md5_insert_template.sql.j2 +8 -0
  42. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_md5_table_template.sql.j2 +6 -0
  43. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunks_md5_table_template.sql.j2 +4 -0
  44. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_column_metrics_templates.yaml +696 -0
  45. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_columns_cte_template.sql.j2 +8 -0
  46. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_datatypes_normalization_templates.yaml +46 -0
  47. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_extract_chunks_md5_table_template.sql.j2 +1 -0
  48. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_extract_md5_rows_chunk.sql.j2 +10 -0
  49. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_get_columns_metadata.sql.j2 +85 -0
  50. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_insert_chunk_row_md5_template.sql.j2 +5 -0
  51. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_row_count_query.sql.j2 +1 -0
  52. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_table_metadata_query.sql.j2 +29 -0
  53. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_to_snowflake_datatypes_mapping_template.yaml +44 -0
  54. snowflake/snowflake_data_validation/redshift/model/__init__.py +21 -0
  55. snowflake/snowflake_data_validation/redshift/model/redshift_credentials_connection.py +69 -0
  56. snowflake/snowflake_data_validation/redshift/query/__init__.py +22 -0
  57. snowflake/snowflake_data_validation/redshift/query/query_generator_redshift.py +315 -0
  58. snowflake/snowflake_data_validation/redshift/redshift_arguments_manager.py +129 -0
  59. snowflake/snowflake_data_validation/redshift/redshift_cli.py +618 -0
  60. snowflake/snowflake_data_validation/redshift/script_writer/__init__.py +22 -0
  61. snowflake/snowflake_data_validation/redshift/script_writer/script_writer_redshift.py +142 -0
  62. snowflake/snowflake_data_validation/script_writer/__init__.py +22 -0
  63. snowflake/snowflake_data_validation/script_writer/script_writer_base.py +152 -0
  64. snowflake/snowflake_data_validation/snowflake/__init__.py +38 -0
  65. snowflake/snowflake_data_validation/snowflake/connector/connector_factory_snowflake.py +159 -0
  66. snowflake/snowflake_data_validation/snowflake/connector/connector_snowflake.py +327 -0
  67. snowflake/snowflake_data_validation/snowflake/extractor/metadata_extractor_snowflake.py +352 -0
  68. snowflake/snowflake_data_validation/snowflake/extractor/snowflake_cte_generator.py +155 -0
  69. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_chunk_row_concatenated_template.sql.j2 +33 -0
  70. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_chunk_row_md5_template.sql.j2 +11 -0
  71. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_chunks_md5_table_template.sql.j2 +4 -0
  72. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_column_metrics_templates.yaml +151 -0
  73. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_columns_cte_template.sql.j2 +8 -0
  74. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_datatypes_normalization_templates.yaml +9 -0
  75. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_extract_chunks_md5_table_template.sql.j2 +5 -0
  76. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_extract_md5_rows_chunk.sql.j2 +10 -0
  77. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_get_case_sensitive_columns.sql.j2 +8 -0
  78. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_get_columns_metadata.sql.j2 +76 -0
  79. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_insert_chunk_row_md5_template.sql.j2 +1 -0
  80. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_row_count_query.sql.j2 +1 -0
  81. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_table_metadata_query.sql.j2 +30 -0
  82. snowflake/snowflake_data_validation/snowflake/model/__init__.py +25 -0
  83. snowflake/snowflake_data_validation/snowflake/model/snowflake_credentials_connection.py +62 -0
  84. snowflake/snowflake_data_validation/snowflake/model/snowflake_default_connection.py +31 -0
  85. snowflake/snowflake_data_validation/snowflake/model/snowflake_named_connection.py +36 -0
  86. snowflake/snowflake_data_validation/snowflake/query/__init__.py +22 -0
  87. snowflake/snowflake_data_validation/snowflake/query/query_generator_snowflake.py +223 -0
  88. snowflake/snowflake_data_validation/snowflake/script_writer/__init__.py +22 -0
  89. snowflake/snowflake_data_validation/snowflake/script_writer/script_writer_snowflake.py +96 -0
  90. snowflake/snowflake_data_validation/snowflake/snowflake_arguments_manager.py +204 -0
  91. snowflake/snowflake_data_validation/snowflake/snowflake_cli.py +392 -0
  92. snowflake/snowflake_data_validation/sqlserver/__init__.py +46 -0
  93. snowflake/snowflake_data_validation/sqlserver/connector/__init__.py +17 -0
  94. snowflake/snowflake_data_validation/sqlserver/connector/connector_factory_sql_server.py +92 -0
  95. snowflake/snowflake_data_validation/sqlserver/connector/connector_sql_server.py +312 -0
  96. snowflake/snowflake_data_validation/sqlserver/extractor/__init__.py +16 -0
  97. snowflake/snowflake_data_validation/sqlserver/extractor/metadata_extractor_sqlserver.py +257 -0
  98. snowflake/snowflake_data_validation/sqlserver/extractor/sqlserver_cte_generator.py +161 -0
  99. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_chunks_md5_table_template.sql.j2 +4 -0
  100. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_column_metrics_templates.yaml +537 -0
  101. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_columns_cte_template.sql.j2 +8 -0
  102. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_compute_md5_sql.j2 +55 -0
  103. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_datatypes_normalization_templates.yaml +26 -0
  104. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_extract_chunks_md5_table_template.sql.j2 +1 -0
  105. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_extract_md5_rows_chunk.sql.j2 +10 -0
  106. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_get_columns_metadata.sql.j2 +70 -0
  107. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_row_count_query.sql.j2 +1 -0
  108. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_table_metadata_query.sql.j2 +23 -0
  109. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_to_snowflake_datatypes_mapping_template.yaml +32 -0
  110. snowflake/snowflake_data_validation/sqlserver/model/__init__.py +21 -0
  111. snowflake/snowflake_data_validation/sqlserver/model/sqlserver_credentials_connection.py +71 -0
  112. snowflake/snowflake_data_validation/sqlserver/query/__init__.py +22 -0
  113. snowflake/snowflake_data_validation/sqlserver/query/query_generator_sqlserver.py +197 -0
  114. snowflake/snowflake_data_validation/sqlserver/script_writer/__init__.py +22 -0
  115. snowflake/snowflake_data_validation/sqlserver/script_writer/script_writer_sqlserver.py +177 -0
  116. snowflake/snowflake_data_validation/sqlserver/sqlserver_arguments_manager.py +147 -0
  117. snowflake/snowflake_data_validation/sqlserver/sqlserver_cli.py +701 -0
  118. snowflake/snowflake_data_validation/table_partitioning_strategy.md +96 -0
  119. snowflake/snowflake_data_validation/teradata/__init__.py +14 -0
  120. snowflake/snowflake_data_validation/teradata/connector/__init__.py +14 -0
  121. snowflake/snowflake_data_validation/teradata/connector/connector_factory_teradata.py +79 -0
  122. snowflake/snowflake_data_validation/teradata/connector/connector_teradata.py +264 -0
  123. snowflake/snowflake_data_validation/teradata/extractor/__init__.py +19 -0
  124. snowflake/snowflake_data_validation/teradata/extractor/metadata_extractor_teradata.py +264 -0
  125. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_chunks_md5_table_template.sql.j2 +4 -0
  126. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_column_metrics_templates.yaml +497 -0
  127. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_columns_cte_template.sql.j2 +8 -0
  128. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_compute_md5_sql.j2 +62 -0
  129. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_create_row_concatenated.sql.j2 +7 -0
  130. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_create_row_md5.sql.j2 +7 -0
  131. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_datatypes_normalization_templates.yaml +15 -0
  132. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_extract_chunks_md5_table_template.sql.j2 +1 -0
  133. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_extract_md5_rows_chunk.sql.j2 +10 -0
  134. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_get_columns_metadata.sql.j2 +70 -0
  135. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_row_count_query.sql.j2 +1 -0
  136. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_table_metadata_query.sql.j2 +92 -0
  137. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_to_snowflake_datatypes_mapping_template.yaml +48 -0
  138. snowflake/snowflake_data_validation/teradata/extractor/teradata_cte_generator.py +190 -0
  139. snowflake/snowflake_data_validation/teradata/model/__init__.py +21 -0
  140. snowflake/snowflake_data_validation/teradata/model/teradata_credentials_connection.py +53 -0
  141. snowflake/snowflake_data_validation/teradata/query/__init__.py +19 -0
  142. snowflake/snowflake_data_validation/teradata/query/query_generator_teradata.py +231 -0
  143. snowflake/snowflake_data_validation/teradata/script_writer/__init__.py +19 -0
  144. snowflake/snowflake_data_validation/teradata/script_writer/script_writer_teradata.py +50 -0
  145. snowflake/snowflake_data_validation/teradata/teradata_arguments_manager.py +143 -0
  146. snowflake/snowflake_data_validation/teradata/teradata_cli.py +745 -0
  147. snowflake/snowflake_data_validation/utils/__init__.py +16 -0
  148. snowflake/snowflake_data_validation/utils/arguments_manager_base.py +482 -0
  149. snowflake/snowflake_data_validation/utils/arguments_manager_factory.py +130 -0
  150. snowflake/snowflake_data_validation/utils/base_output_handler.py +66 -0
  151. snowflake/snowflake_data_validation/utils/configuration_file_editor.py +86 -0
  152. snowflake/snowflake_data_validation/utils/configuration_file_generator.py +165 -0
  153. snowflake/snowflake_data_validation/utils/connection_pool.py +349 -0
  154. snowflake/snowflake_data_validation/utils/connector_factory.py +71 -0
  155. snowflake/snowflake_data_validation/utils/console_output_handler.py +95 -0
  156. snowflake/snowflake_data_validation/utils/constants.py +350 -0
  157. snowflake/snowflake_data_validation/utils/context.py +126 -0
  158. snowflake/snowflake_data_validation/utils/cpu_optimizer.py +156 -0
  159. snowflake/snowflake_data_validation/utils/helper.py +30 -0
  160. snowflake/snowflake_data_validation/utils/helpers/helper_database.py +54 -0
  161. snowflake/snowflake_data_validation/utils/helpers/helper_dataframe.py +83 -0
  162. snowflake/snowflake_data_validation/utils/helpers/helper_io.py +85 -0
  163. snowflake/snowflake_data_validation/utils/helpers/helper_misc.py +95 -0
  164. snowflake/snowflake_data_validation/utils/helpers/helper_templates.py +294 -0
  165. snowflake/snowflake_data_validation/utils/logging_config.py +197 -0
  166. snowflake/snowflake_data_validation/utils/logging_utils.py +68 -0
  167. snowflake/snowflake_data_validation/utils/model/chunk.py +16 -0
  168. snowflake/snowflake_data_validation/utils/model/column_metadata.py +45 -0
  169. snowflake/snowflake_data_validation/utils/model/table_column_metadata.py +94 -0
  170. snowflake/snowflake_data_validation/utils/model/table_context.py +351 -0
  171. snowflake/snowflake_data_validation/utils/model/templates_loader_manager.py +123 -0
  172. snowflake/snowflake_data_validation/utils/progress_reporter.py +61 -0
  173. snowflake/snowflake_data_validation/utils/run_context.py +59 -0
  174. snowflake/snowflake_data_validation/utils/table_partitioning_strategy.py +148 -0
  175. snowflake/snowflake_data_validation/utils/telemetry.py +863 -0
  176. snowflake/snowflake_data_validation/utils/templates/configuration_file_templates.py +95 -0
  177. snowflake/snowflake_data_validation/utils/thread_safe_singleton.py +49 -0
  178. snowflake/snowflake_data_validation/utils/validation_utils.py +142 -0
  179. snowflake/snowflake_data_validation/validation/__init__.py +16 -0
  180. snowflake/snowflake_data_validation/validation/data_validator_base.py +452 -0
  181. snowflake/snowflake_data_validation/validation/metrics_data_validator.py +251 -0
  182. snowflake/snowflake_data_validation/validation/row_data_validator.py +479 -0
  183. snowflake/snowflake_data_validation/validation/schema_data_validator.py +197 -0
  184. snowflake/snowflake_data_validation/validation/validation_report_buffer.py +196 -0
  185. snowflake_data_validation-1.0.1.dist-info/METADATA +228 -0
  186. snowflake_data_validation-1.0.1.dist-info/RECORD +189 -0
  187. snowflake_data_validation-1.0.1.dist-info/WHEEL +4 -0
  188. snowflake_data_validation-1.0.1.dist-info/entry_points.txt +3 -0
  189. snowflake_data_validation-1.0.1.dist-info/licenses/LICENSE +177 -0
@@ -0,0 +1,112 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """
17
+ Snowflake Data Validation
18
+ =========================
19
+
20
+ This package provides comprehensive data validation functionality for Snowflake,
21
+ enabling robust data quality checks and migration validation between different
22
+ database systems.
23
+
24
+ Features
25
+ --------
26
+ - Multi-level data validation (Level 1: table metadata, Level 2: column metadata)
27
+ - Support for multiple source systems (SQL Server, Snowflake)
28
+ - CLI interface for validation operations
29
+ - Configurable validation processes
30
+ - Progress reporting and detailed validation reports
31
+
32
+ Main Components
33
+ ---------------
34
+ ComparisonOrchestrator
35
+ Main class for orchestrating data comparisons between source and target systems
36
+
37
+ CLI Interface
38
+ Command-line interface available via `main_cli` module for:
39
+ - Setting up database connections
40
+ - Running validation operations
41
+ - Managing configuration files
42
+
43
+ Supported Dialects
44
+ ------------------
45
+ - Snowflake to Snowflake validation
46
+ - SQL Server to Snowflake migration validation
47
+
48
+ Example Usage
49
+ -------------
50
+ Programmatic API:
51
+ >>> from snowflake.snowflake_data_validation import ComparisonOrchestrator
52
+ >>> # Set up extractors, context, and configuration
53
+ >>> orchestrator = ComparisonOrchestrator(
54
+ ... source_extractor=source_extractor,
55
+ ... target_extractor=target_extractor,
56
+ ... context=context
57
+ ... )
58
+ >>> orchestrator.run_sync_comparison()
59
+
60
+ Command Line Interface:
61
+
62
+ .. code-block:: bash
63
+
64
+ # Set up SQL Server connection
65
+ python -m snowflake.snowflake_data_validation sqlserver source-connection \
66
+ --host localhost --port 1433 --username user --password pass --database mydb
67
+
68
+ # Run validation
69
+ python -m snowflake.snowflake_data_validation sqlserver run-validation \
70
+ --data-validation-config-file config.json
71
+
72
+ For more detailed examples and usage, please refer to the documentation.
73
+ """
74
+
75
+ from snowflake.snowflake_data_validation.comparison_orchestrator import (
76
+ ComparisonOrchestrator,
77
+ )
78
+ from snowflake.snowflake_data_validation.__version__ import __version__
79
+
80
+ # Import submodules and make them available at the package level
81
+ # This helps Sphinx properly document the submodules
82
+ import snowflake.snowflake_data_validation.validation as validation
83
+ import snowflake.snowflake_data_validation.extractor as extractor
84
+ import snowflake.snowflake_data_validation.connector as connector
85
+ import snowflake.snowflake_data_validation.utils as utils
86
+ import snowflake.snowflake_data_validation.snowflake as snowflake
87
+ import snowflake.snowflake_data_validation.sqlserver as sqlserver
88
+ import snowflake.snowflake_data_validation.redshift as redshift
89
+ import logging
90
+
91
+
92
+ # Add a NullHandler to prevent logging messages from being output to
93
+ # sys.stderr if no logging configuration is provided.
94
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
95
+
96
+ # Define the public API
97
+ __all__ = [
98
+ "__version__",
99
+ "ComparisonOrchestrator",
100
+ # Submodules
101
+ "validation",
102
+ "extractor",
103
+ "connector",
104
+ "utils",
105
+ "snowflake",
106
+ "sqlserver",
107
+ "redshift"
108
+ ]
109
+
110
+
111
+ # Version information
112
+ __version_info__ = tuple(int(i) for i in __version__.split(".") if i.isdigit())
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env python3
2
+ """Main entry point for the Snowflake Data Validation CLI when run as a module.
3
+
4
+ This allows the package to be run with: python -m snowflake.snowflake_data_validation
5
+ """
6
+
7
+ import sys
8
+
9
+ import typer
10
+
11
+ from snowflake.snowflake_data_validation.main_cli import data_validation_app
12
+
13
+
14
+ def main():
15
+ """Provide main entry point with error handling."""
16
+ try:
17
+ data_validation_app()
18
+ except KeyboardInterrupt:
19
+ typer.secho("\nOperation cancelled by user", fg=typer.colors.YELLOW, err=True)
20
+ sys.exit(1)
21
+ except ImportError as e:
22
+ typer.secho(f"Import error: {e}", fg=typer.colors.RED, err=True)
23
+ typer.secho(
24
+ "Please ensure all dependencies are installed correctly.",
25
+ fg=typer.colors.RED,
26
+ err=True,
27
+ )
28
+ sys.exit(1)
29
+ except Exception as e:
30
+ typer.secho(f"Unexpected error: {e}", fg=typer.colors.RED, err=True)
31
+ sys.exit(1)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ main()
@@ -0,0 +1,16 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ __version__ = "1.0.1"
@@ -0,0 +1,250 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+
18
+ from typing import Optional, Union
19
+
20
+ from snowflake.snowflake_data_validation.configuration.model.table_configuration import (
21
+ TableConfiguration,
22
+ )
23
+ from snowflake.snowflake_data_validation.configuration.model.validation_configuration import (
24
+ ValidationConfiguration,
25
+ )
26
+ from snowflake.snowflake_data_validation.executer import (
27
+ ExecutorFactory,
28
+ )
29
+ from snowflake.snowflake_data_validation.orchestration.parallel_execution_engine import (
30
+ ParallelExecutionEngine,
31
+ )
32
+ from snowflake.snowflake_data_validation.orchestration.table_metadata_processor import (
33
+ TableMetadataProcessor,
34
+ )
35
+ from snowflake.snowflake_data_validation.orchestration.validation_progress_reporter import (
36
+ ValidationProgressReporter,
37
+ )
38
+ from snowflake.snowflake_data_validation.utils.arguments_manager_base import (
39
+ ValidationEnvironmentObject,
40
+ )
41
+ from snowflake.snowflake_data_validation.utils.connection_pool import (
42
+ ConnectionPoolManager,
43
+ )
44
+ from snowflake.snowflake_data_validation.utils.constants import (
45
+ TEMPORARY_DEFAULT_OPTIMAL_LIMIT,
46
+ ExecutionMode,
47
+ )
48
+ from snowflake.snowflake_data_validation.utils.context import Context
49
+ from snowflake.snowflake_data_validation.utils.logging_utils import log
50
+ from snowflake.snowflake_data_validation.utils.telemetry import (
51
+ report_telemetry,
52
+ )
53
+
54
+
55
+ LOGGER = logging.getLogger(__name__)
56
+
57
+
58
+ class ComparisonOrchestrator:
59
+
60
+ """Orchestrator for validation operations that creates appropriate components based on command type."""
61
+
62
+ @log
63
+ def __init__(
64
+ self,
65
+ connection_pool_manager: ConnectionPoolManager,
66
+ context: Context,
67
+ max_threads: int,
68
+ ):
69
+ """Initialize the orchestrator.
70
+
71
+ Args:
72
+ connection_pool_manager: Connection pool manager for threaded processing
73
+ context: Validation context containing configuration and runtime info
74
+ max_threads: Maximum number of threads for parallel table processing
75
+
76
+ """
77
+ LOGGER.debug("Initializing ComparisonOrchestrator")
78
+
79
+ self.connection_pool_manager = connection_pool_manager
80
+ self.context = context
81
+ self.executor_factory = ExecutorFactory()
82
+ self.max_threads = max_threads
83
+
84
+ # Initialize orchestration modules
85
+ self.metadata_processor = TableMetadataProcessor()
86
+ self.execution_engine = ParallelExecutionEngine(max_threads=self.max_threads)
87
+ self.progress_reporter = ValidationProgressReporter()
88
+
89
+ LOGGER.debug(
90
+ "ComparisonOrchestrator initialized with max_threads=%d", self.max_threads
91
+ )
92
+
93
+ @classmethod
94
+ @log(log_args=False)
95
+ def from_validation_environment(
96
+ cls,
97
+ validation_env: ValidationEnvironmentObject,
98
+ ) -> "ComparisonOrchestrator":
99
+ """Create a ComparisonOrchestrator from a ValidationEnvironmentObject.
100
+
101
+ Args:
102
+ validation_env: ValidationEnvironmentObject instance containing all required components
103
+
104
+ Returns:
105
+ ComparisonOrchestrator: Configured orchestrator ready to run validation
106
+
107
+ """
108
+ LOGGER.debug("Creating ComparisonOrchestrator from validation environment")
109
+
110
+ config = validation_env.context.configuration
111
+ num_tables: int = len(config.tables)
112
+ max_threads_config: Union[str, int] = config.max_threads
113
+
114
+ # Hardcoded optimal pool size for now, can be adjusted later on based on changes in SnowConvert AI
115
+ optimal_max_threads: int = TEMPORARY_DEFAULT_OPTIMAL_LIMIT
116
+ # CpuOptimizer.get_optimal_thread_count(num_tables=num_tables, max_threads=max_threads_config)
117
+ optimal_pool_size: int = TEMPORARY_DEFAULT_OPTIMAL_LIMIT
118
+ # optimal_max_threads
119
+
120
+ LOGGER.debug(
121
+ "Using CPUOptimizer: num_tables=%d, config_max_threads=%s, optimal_max_threads=%d, optimal_pool_size=%d",
122
+ num_tables,
123
+ max_threads_config,
124
+ optimal_max_threads,
125
+ optimal_pool_size,
126
+ )
127
+
128
+ connection_pool_manager: ConnectionPoolManager = (
129
+ validation_env.create_connection_pool_manager(pool_size=optimal_pool_size)
130
+ )
131
+
132
+ return cls(
133
+ connection_pool_manager=connection_pool_manager,
134
+ context=validation_env.context,
135
+ max_threads=optimal_max_threads,
136
+ )
137
+
138
+ @log
139
+ @report_telemetry()
140
+ def run_sync_comparison(self) -> None:
141
+ """Run the complete synchronous validation comparison process using parallel execution.
142
+
143
+ Uses the sync validation executor with metadata extractors for real-time validation.
144
+ Each table is processed in a separate thread.
145
+ """
146
+ LOGGER.info("Starting synchronous validation comparison")
147
+ self._execute_parallel_validation(ExecutionMode.SYNC_VALIDATION)
148
+ self.progress_reporter.flush_validation_reports(self.context)
149
+ LOGGER.info("Synchronous validation comparison completed")
150
+
151
+ @log
152
+ @report_telemetry()
153
+ def run_async_generation(self) -> None:
154
+ """Generate validation scripts for all tables in parallel.
155
+
156
+ Uses script printers to write SQL queries to files.
157
+ Each table is processed in a separate thread for maximum throughput.
158
+ """
159
+ LOGGER.info("Starting async script generation")
160
+ self._execute_parallel_validation(
161
+ ExecutionMode.ASYNC_GENERATION, use_script_printer=True
162
+ )
163
+ self.progress_reporter.flush_validation_reports(self.context)
164
+ LOGGER.info("Async script generation completed")
165
+
166
+ @log
167
+ @report_telemetry()
168
+ def run_async_comparison(self) -> None:
169
+ """Run the asynchronous validation comparison process using parallel execution.
170
+
171
+ Uses the async validation executor with metadata extractors for deferred validation.
172
+ Each table is processed in a separate thread for maximum throughput.
173
+ """
174
+ LOGGER.info("Starting asynchronous validation comparison")
175
+ self._execute_parallel_validation(ExecutionMode.ASYNC_VALIDATION)
176
+ self.progress_reporter.flush_validation_reports(self.context)
177
+ LOGGER.info("Asynchronous validation comparison completed")
178
+
179
+ @log(log_args=False)
180
+ def _get_validation_configuration(
181
+ self,
182
+ table: TableConfiguration,
183
+ default_configuration: Optional[ValidationConfiguration],
184
+ ) -> ValidationConfiguration:
185
+ """Get the validation configuration for a table.
186
+
187
+ Args:
188
+ table: Table configuration object
189
+ default_configuration: Default validation configuration
190
+
191
+ Returns:
192
+ ValidationConfiguration: The configuration to use for this table
193
+
194
+ """
195
+ if table.validation_configuration:
196
+ LOGGER.debug(
197
+ "Using table-specific validation configuration for %s",
198
+ table.fully_qualified_name,
199
+ )
200
+ return table.validation_configuration
201
+ elif default_configuration:
202
+ LOGGER.debug(
203
+ "Using default validation configuration for %s",
204
+ table.fully_qualified_name,
205
+ )
206
+ return default_configuration
207
+ else:
208
+ LOGGER.debug(
209
+ "Using empty validation configuration for %s",
210
+ table.fully_qualified_name,
211
+ )
212
+ return ValidationConfiguration()
213
+
214
+ @log
215
+ def _execute_parallel_validation(
216
+ self, execution_mode: ExecutionMode, use_script_printer: bool = False
217
+ ) -> None:
218
+ """Execute validation using parallel threads with connection pool.
219
+
220
+ Args:
221
+ execution_mode: The execution mode to use
222
+ use_script_printer: Whether to use script printers instead of metadata extractors
223
+
224
+ """
225
+ self.connection_pool_manager.initialize_pool()
226
+
227
+ LOGGER.debug(
228
+ "Executing parallel validation with execution_mode=%s, use_script_printer=%s",
229
+ execution_mode,
230
+ use_script_printer,
231
+ )
232
+
233
+ self.execution_engine.execute_parallel_validation(
234
+ tables=self.context.configuration.tables,
235
+ default_configuration=self.context.configuration.validation_configuration,
236
+ execution_mode=execution_mode,
237
+ context=self.context,
238
+ connection_pool=self.connection_pool_manager,
239
+ use_script_printer=use_script_printer,
240
+ progress_callback=lambda table_name, columns: self.progress_reporter.report_progress_for_table(
241
+ table_name=table_name,
242
+ column_selection_list=columns,
243
+ context=self.context,
244
+ ),
245
+ validation_config_callback=lambda table, default_config: (
246
+ self._get_validation_configuration(
247
+ table=table, default_configuration=default_config
248
+ )
249
+ ),
250
+ )
@@ -0,0 +1,16 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # This file is intentionally left blank.
@@ -0,0 +1,66 @@
1
+ from pathlib import Path
2
+
3
+ from pydantic_yaml import parse_yaml_raw_as
4
+
5
+ from snowflake.snowflake_data_validation.configuration.model.configuration_model import (
6
+ ConfigurationModel,
7
+ )
8
+ from snowflake.snowflake_data_validation.configuration.singleton import Singleton
9
+
10
+
11
+ class ConfigurationLoader(metaclass=Singleton):
12
+ """
13
+ ConfigurationLoader class.
14
+
15
+ This is a singleton class that reads the configuration.yaml file
16
+ and provides an interface to get the configuration settings model.
17
+
18
+ Args:
19
+ metaclass (Singleton, optional): Defaults to Singleton.
20
+
21
+ """
22
+
23
+ def __init__(self, file_path: Path) -> None:
24
+ """
25
+ Initialize the ConfigurationLoader with a configuration file path.
26
+
27
+ Args:
28
+ file_path (Path): The path to the configuration file to load.
29
+
30
+ """
31
+ self.configuration_model: ConfigurationModel = ConfigurationModel(
32
+ source_platform="",
33
+ target_platform="",
34
+ output_directory_path="",
35
+ )
36
+
37
+ if file_path is None:
38
+ raise ValueError("The configuration file path cannot be None value")
39
+
40
+ if not (file_path.suffix == ".yaml" or file_path.suffix == ".yml"):
41
+ raise Exception(
42
+ "The configuration file must have a .yaml or .yml extension"
43
+ )
44
+
45
+ if not file_path.exists():
46
+ raise FileNotFoundError(f"Configuration file not found in {file_path}")
47
+
48
+ try:
49
+ file_content = file_path.read_text()
50
+ self.configuration_model = parse_yaml_raw_as(
51
+ ConfigurationModel, file_content
52
+ )
53
+
54
+ except Exception as exception:
55
+ error_msg = "An error occurred while loading the configuration file:"
56
+ raise Exception(f"{error_msg}\n{exception}") from None
57
+
58
+ def get_configuration_model(self) -> ConfigurationModel:
59
+ """
60
+ Get the configuration model.
61
+
62
+ Returns:
63
+ ConfigurationModel: The configuration model instance.
64
+
65
+ """
66
+ return self.configuration_model
@@ -0,0 +1,122 @@
1
+ from pydantic import BaseModel, field_validator, model_validator
2
+ from typing_extensions import Self
3
+
4
+ from snowflake.snowflake_data_validation.configuration.model.connection_types import (
5
+ Connection,
6
+ )
7
+ from snowflake.snowflake_data_validation.configuration.model.logging_configuration import (
8
+ LoggingConfiguration,
9
+ )
10
+ from snowflake.snowflake_data_validation.configuration.model.table_configuration import (
11
+ TableConfiguration,
12
+ )
13
+ from snowflake.snowflake_data_validation.configuration.model.validation_configuration import (
14
+ ValidationConfiguration,
15
+ )
16
+ from snowflake.snowflake_data_validation.utils.constants import (
17
+ DEFAULT_THREAD_COUNT_OPTION,
18
+ VALIDATION_CONFIGURATION_DEFAULT_VALUE,
19
+ )
20
+
21
+
22
+ class ConfigurationModel(BaseModel):
23
+ """Configuration model.
24
+
25
+ Args:
26
+ pydantic.BaseModel (pydantic.BaseModel): pydantic BaseModel
27
+
28
+ """
29
+
30
+ source_platform: str
31
+ target_platform: str
32
+ output_directory_path: str
33
+ max_threads: str | int = (
34
+ "auto" # "auto" for auto-detection or number for specific thread count
35
+ )
36
+ source_connection: Connection | None = None
37
+ target_connection: Connection | None = None
38
+ source_validation_files_path: str | None = None
39
+ target_validation_files_path: str | None = None
40
+ target_database: str | None = None
41
+ validation_configuration: ValidationConfiguration = ValidationConfiguration(
42
+ **VALIDATION_CONFIGURATION_DEFAULT_VALUE
43
+ )
44
+ comparison_configuration: dict[str, str | float | int] | None = None
45
+ database_mappings: dict[str, str] = {}
46
+ schema_mappings: dict[str, str] = {}
47
+ tables: list[TableConfiguration] = []
48
+ logging_configuration: LoggingConfiguration | None = None
49
+
50
+ @field_validator("max_threads")
51
+ @classmethod
52
+ def validate_max_threads(cls, value: str | int) -> str | int:
53
+ """Validate max_threads field accepts only 'auto' or positive integers."""
54
+ if isinstance(value, str):
55
+ if value.lower() != DEFAULT_THREAD_COUNT_OPTION:
56
+ raise ValueError(
57
+ f"String value for max_threads must be '{DEFAULT_THREAD_COUNT_OPTION}'"
58
+ )
59
+ return value.lower()
60
+ elif isinstance(value, int):
61
+ if value < 1:
62
+ raise ValueError(
63
+ "Numeric value for max_threads must be a positive integer"
64
+ )
65
+ return value
66
+ else:
67
+ raise ValueError(
68
+ f"max_threads must be either '{DEFAULT_THREAD_COUNT_OPTION}' or a positive integer"
69
+ )
70
+
71
+ @model_validator(mode="after")
72
+ def load(self) -> Self:
73
+ self.check_tables()
74
+ return self
75
+
76
+ def check_tables(self) -> None:
77
+ table: TableConfiguration
78
+ for table in self.tables:
79
+ self._load_target_fully_qualified_name(table)
80
+ self._check_max_failed_rows_number(table)
81
+ self._check_chunk_number(table)
82
+ self.set_exclude_metrics(table)
83
+ self.set_apply_metric_column_modifier(table)
84
+
85
+ def _load_target_fully_qualified_name(self, table: TableConfiguration) -> None:
86
+ # If target_database is set in config, use it for Teradata sources
87
+ if self.target_database is not None and table.source_database is None:
88
+ table.target_database = self.target_database
89
+ # Otherwise use database mappings if available
90
+ elif (
91
+ table.source_database is not None
92
+ and self.database_mappings.get(table.source_database) is not None
93
+ ):
94
+ table.target_database = self.database_mappings[table.source_database]
95
+
96
+ if (
97
+ table.source_schema is not None
98
+ and self.schema_mappings.get(table.source_schema) is not None
99
+ ):
100
+ table.target_schema = self.schema_mappings[table.source_schema]
101
+
102
+ table._load_target_fully_qualified_name()
103
+
104
+ def _check_max_failed_rows_number(self, table: TableConfiguration) -> None:
105
+ if table.max_failed_rows_number is None:
106
+ table.max_failed_rows_number = (
107
+ self.validation_configuration.max_failed_rows_number
108
+ )
109
+
110
+ def _check_chunk_number(self, table: TableConfiguration) -> None:
111
+ if table.chunk_number is None:
112
+ table.chunk_number = 0
113
+
114
+ def set_exclude_metrics(self, table: TableConfiguration) -> None:
115
+ if table.exclude_metrics is None:
116
+ table.exclude_metrics = self.validation_configuration.exclude_metrics
117
+
118
+ def set_apply_metric_column_modifier(self, table: TableConfiguration) -> None:
119
+ if table.apply_metric_column_modifier is None:
120
+ table.apply_metric_column_modifier = (
121
+ self.validation_configuration.apply_metric_column_modifier
122
+ )
@@ -0,0 +1,46 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Connection type definitions for YAML configuration.
14
+
15
+ This file imports connection models from the connections module and creates
16
+ the union types used by the main configuration model for both source and
17
+ target connections.
18
+ """
19
+
20
+ from typing import Union
21
+
22
+ # Import connection models from the dedicated connections module
23
+ from snowflake.snowflake_data_validation.configuration.model.connections import (
24
+ RedshiftCredentialsConnection,
25
+ SnowflakeCredentialsConnection,
26
+ SnowflakeDefaultConnection,
27
+ SnowflakeNamedConnection,
28
+ SqlServerCredentialsConnection,
29
+ TeradataCredentialsConnection,
30
+ )
31
+
32
+
33
+ # Union type for configuration (YAML and IPC)
34
+ # Note: This is used for both source_connection and target_connection fields
35
+ Connection = Union[
36
+ # Snowflake connections (all modes)
37
+ SnowflakeNamedConnection,
38
+ SnowflakeDefaultConnection,
39
+ SnowflakeCredentialsConnection,
40
+ # SQL Server connections
41
+ SqlServerCredentialsConnection,
42
+ # Teradata connections
43
+ TeradataCredentialsConnection,
44
+ # Redshift connections
45
+ RedshiftCredentialsConnection,
46
+ ]