snowflake-data-validation 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/snowflake_data_validation/__init__.py +112 -0
  2. snowflake/snowflake_data_validation/__main__.py +35 -0
  3. snowflake/snowflake_data_validation/__version__.py +16 -0
  4. snowflake/snowflake_data_validation/comparison_orchestrator.py +250 -0
  5. snowflake/snowflake_data_validation/configuration/__init__.py +16 -0
  6. snowflake/snowflake_data_validation/configuration/configuration_loader.py +66 -0
  7. snowflake/snowflake_data_validation/configuration/model/configuration_model.py +122 -0
  8. snowflake/snowflake_data_validation/configuration/model/connection_types.py +46 -0
  9. snowflake/snowflake_data_validation/configuration/model/connections/__init__.py +47 -0
  10. snowflake/snowflake_data_validation/configuration/model/logging_configuration.py +63 -0
  11. snowflake/snowflake_data_validation/configuration/model/table_configuration.py +210 -0
  12. snowflake/snowflake_data_validation/configuration/model/validation_configuration.py +59 -0
  13. snowflake/snowflake_data_validation/configuration/singleton.py +16 -0
  14. snowflake/snowflake_data_validation/connector/__init__.py +32 -0
  15. snowflake/snowflake_data_validation/connector/connector_base.py +183 -0
  16. snowflake/snowflake_data_validation/connector/connector_factory_base.py +161 -0
  17. snowflake/snowflake_data_validation/executer/__init__.py +45 -0
  18. snowflake/snowflake_data_validation/executer/async_generation_executor.py +167 -0
  19. snowflake/snowflake_data_validation/executer/async_validation_executor.py +409 -0
  20. snowflake/snowflake_data_validation/executer/base_validation_executor.py +451 -0
  21. snowflake/snowflake_data_validation/executer/executor_factory.py +248 -0
  22. snowflake/snowflake_data_validation/executer/extractor_types.py +24 -0
  23. snowflake/snowflake_data_validation/executer/sync_validation_executor.py +713 -0
  24. snowflake/snowflake_data_validation/extractor/__init__.py +28 -0
  25. snowflake/snowflake_data_validation/extractor/metadata_extractor_base.py +374 -0
  26. snowflake/snowflake_data_validation/extractor/sql_queries_template_generator.py +570 -0
  27. snowflake/snowflake_data_validation/main_cli.py +92 -0
  28. snowflake/snowflake_data_validation/orchestration/parallel_execution_engine.py +337 -0
  29. snowflake/snowflake_data_validation/orchestration/table_metadata_processor.py +400 -0
  30. snowflake/snowflake_data_validation/orchestration/validation_progress_reporter.py +116 -0
  31. snowflake/snowflake_data_validation/query/__init__.py +22 -0
  32. snowflake/snowflake_data_validation/query/query_generator_base.py +346 -0
  33. snowflake/snowflake_data_validation/redshift/__init__.py +36 -0
  34. snowflake/snowflake_data_validation/redshift/connector/__init__.py +17 -0
  35. snowflake/snowflake_data_validation/redshift/connector/connector_factory_redshift.py +91 -0
  36. snowflake/snowflake_data_validation/redshift/connector/connector_redshift.py +237 -0
  37. snowflake/snowflake_data_validation/redshift/extractor/metadata_extractor_redshift.py +257 -0
  38. snowflake/snowflake_data_validation/redshift/extractor/redshift_cte_generator.py +163 -0
  39. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_concatenated_insert_template.sql.j2 +26 -0
  40. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_concatenated_table_template.sql.j2 +6 -0
  41. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_md5_insert_template.sql.j2 +8 -0
  42. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunk_row_md5_table_template.sql.j2 +6 -0
  43. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_chunks_md5_table_template.sql.j2 +4 -0
  44. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_column_metrics_templates.yaml +696 -0
  45. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_columns_cte_template.sql.j2 +8 -0
  46. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_datatypes_normalization_templates.yaml +46 -0
  47. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_extract_chunks_md5_table_template.sql.j2 +1 -0
  48. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_extract_md5_rows_chunk.sql.j2 +10 -0
  49. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_get_columns_metadata.sql.j2 +85 -0
  50. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_insert_chunk_row_md5_template.sql.j2 +5 -0
  51. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_row_count_query.sql.j2 +1 -0
  52. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_table_metadata_query.sql.j2 +29 -0
  53. snowflake/snowflake_data_validation/redshift/extractor/templates/redshift_to_snowflake_datatypes_mapping_template.yaml +44 -0
  54. snowflake/snowflake_data_validation/redshift/model/__init__.py +21 -0
  55. snowflake/snowflake_data_validation/redshift/model/redshift_credentials_connection.py +69 -0
  56. snowflake/snowflake_data_validation/redshift/query/__init__.py +22 -0
  57. snowflake/snowflake_data_validation/redshift/query/query_generator_redshift.py +315 -0
  58. snowflake/snowflake_data_validation/redshift/redshift_arguments_manager.py +129 -0
  59. snowflake/snowflake_data_validation/redshift/redshift_cli.py +618 -0
  60. snowflake/snowflake_data_validation/redshift/script_writer/__init__.py +22 -0
  61. snowflake/snowflake_data_validation/redshift/script_writer/script_writer_redshift.py +142 -0
  62. snowflake/snowflake_data_validation/script_writer/__init__.py +22 -0
  63. snowflake/snowflake_data_validation/script_writer/script_writer_base.py +152 -0
  64. snowflake/snowflake_data_validation/snowflake/__init__.py +38 -0
  65. snowflake/snowflake_data_validation/snowflake/connector/connector_factory_snowflake.py +159 -0
  66. snowflake/snowflake_data_validation/snowflake/connector/connector_snowflake.py +327 -0
  67. snowflake/snowflake_data_validation/snowflake/extractor/metadata_extractor_snowflake.py +352 -0
  68. snowflake/snowflake_data_validation/snowflake/extractor/snowflake_cte_generator.py +155 -0
  69. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_chunk_row_concatenated_template.sql.j2 +33 -0
  70. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_chunk_row_md5_template.sql.j2 +11 -0
  71. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_chunks_md5_table_template.sql.j2 +4 -0
  72. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_column_metrics_templates.yaml +151 -0
  73. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_columns_cte_template.sql.j2 +8 -0
  74. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_datatypes_normalization_templates.yaml +9 -0
  75. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_extract_chunks_md5_table_template.sql.j2 +5 -0
  76. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_extract_md5_rows_chunk.sql.j2 +10 -0
  77. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_get_case_sensitive_columns.sql.j2 +8 -0
  78. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_get_columns_metadata.sql.j2 +76 -0
  79. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_insert_chunk_row_md5_template.sql.j2 +1 -0
  80. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_row_count_query.sql.j2 +1 -0
  81. snowflake/snowflake_data_validation/snowflake/extractor/templates/snowflake_table_metadata_query.sql.j2 +30 -0
  82. snowflake/snowflake_data_validation/snowflake/model/__init__.py +25 -0
  83. snowflake/snowflake_data_validation/snowflake/model/snowflake_credentials_connection.py +62 -0
  84. snowflake/snowflake_data_validation/snowflake/model/snowflake_default_connection.py +31 -0
  85. snowflake/snowflake_data_validation/snowflake/model/snowflake_named_connection.py +36 -0
  86. snowflake/snowflake_data_validation/snowflake/query/__init__.py +22 -0
  87. snowflake/snowflake_data_validation/snowflake/query/query_generator_snowflake.py +223 -0
  88. snowflake/snowflake_data_validation/snowflake/script_writer/__init__.py +22 -0
  89. snowflake/snowflake_data_validation/snowflake/script_writer/script_writer_snowflake.py +96 -0
  90. snowflake/snowflake_data_validation/snowflake/snowflake_arguments_manager.py +204 -0
  91. snowflake/snowflake_data_validation/snowflake/snowflake_cli.py +392 -0
  92. snowflake/snowflake_data_validation/sqlserver/__init__.py +46 -0
  93. snowflake/snowflake_data_validation/sqlserver/connector/__init__.py +17 -0
  94. snowflake/snowflake_data_validation/sqlserver/connector/connector_factory_sql_server.py +92 -0
  95. snowflake/snowflake_data_validation/sqlserver/connector/connector_sql_server.py +312 -0
  96. snowflake/snowflake_data_validation/sqlserver/extractor/__init__.py +16 -0
  97. snowflake/snowflake_data_validation/sqlserver/extractor/metadata_extractor_sqlserver.py +257 -0
  98. snowflake/snowflake_data_validation/sqlserver/extractor/sqlserver_cte_generator.py +161 -0
  99. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_chunks_md5_table_template.sql.j2 +4 -0
  100. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_column_metrics_templates.yaml +537 -0
  101. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_columns_cte_template.sql.j2 +8 -0
  102. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_compute_md5_sql.j2 +55 -0
  103. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_datatypes_normalization_templates.yaml +26 -0
  104. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_extract_chunks_md5_table_template.sql.j2 +1 -0
  105. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_extract_md5_rows_chunk.sql.j2 +10 -0
  106. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_get_columns_metadata.sql.j2 +70 -0
  107. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_row_count_query.sql.j2 +1 -0
  108. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_table_metadata_query.sql.j2 +23 -0
  109. snowflake/snowflake_data_validation/sqlserver/extractor/templates/sqlserver_to_snowflake_datatypes_mapping_template.yaml +32 -0
  110. snowflake/snowflake_data_validation/sqlserver/model/__init__.py +21 -0
  111. snowflake/snowflake_data_validation/sqlserver/model/sqlserver_credentials_connection.py +71 -0
  112. snowflake/snowflake_data_validation/sqlserver/query/__init__.py +22 -0
  113. snowflake/snowflake_data_validation/sqlserver/query/query_generator_sqlserver.py +197 -0
  114. snowflake/snowflake_data_validation/sqlserver/script_writer/__init__.py +22 -0
  115. snowflake/snowflake_data_validation/sqlserver/script_writer/script_writer_sqlserver.py +177 -0
  116. snowflake/snowflake_data_validation/sqlserver/sqlserver_arguments_manager.py +147 -0
  117. snowflake/snowflake_data_validation/sqlserver/sqlserver_cli.py +701 -0
  118. snowflake/snowflake_data_validation/table_partitioning_strategy.md +96 -0
  119. snowflake/snowflake_data_validation/teradata/__init__.py +14 -0
  120. snowflake/snowflake_data_validation/teradata/connector/__init__.py +14 -0
  121. snowflake/snowflake_data_validation/teradata/connector/connector_factory_teradata.py +79 -0
  122. snowflake/snowflake_data_validation/teradata/connector/connector_teradata.py +264 -0
  123. snowflake/snowflake_data_validation/teradata/extractor/__init__.py +19 -0
  124. snowflake/snowflake_data_validation/teradata/extractor/metadata_extractor_teradata.py +264 -0
  125. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_chunks_md5_table_template.sql.j2 +4 -0
  126. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_column_metrics_templates.yaml +497 -0
  127. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_columns_cte_template.sql.j2 +8 -0
  128. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_compute_md5_sql.j2 +62 -0
  129. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_create_row_concatenated.sql.j2 +7 -0
  130. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_create_row_md5.sql.j2 +7 -0
  131. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_datatypes_normalization_templates.yaml +15 -0
  132. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_extract_chunks_md5_table_template.sql.j2 +1 -0
  133. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_extract_md5_rows_chunk.sql.j2 +10 -0
  134. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_get_columns_metadata.sql.j2 +70 -0
  135. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_row_count_query.sql.j2 +1 -0
  136. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_table_metadata_query.sql.j2 +92 -0
  137. snowflake/snowflake_data_validation/teradata/extractor/templates/teradata_to_snowflake_datatypes_mapping_template.yaml +48 -0
  138. snowflake/snowflake_data_validation/teradata/extractor/teradata_cte_generator.py +190 -0
  139. snowflake/snowflake_data_validation/teradata/model/__init__.py +21 -0
  140. snowflake/snowflake_data_validation/teradata/model/teradata_credentials_connection.py +53 -0
  141. snowflake/snowflake_data_validation/teradata/query/__init__.py +19 -0
  142. snowflake/snowflake_data_validation/teradata/query/query_generator_teradata.py +231 -0
  143. snowflake/snowflake_data_validation/teradata/script_writer/__init__.py +19 -0
  144. snowflake/snowflake_data_validation/teradata/script_writer/script_writer_teradata.py +50 -0
  145. snowflake/snowflake_data_validation/teradata/teradata_arguments_manager.py +143 -0
  146. snowflake/snowflake_data_validation/teradata/teradata_cli.py +745 -0
  147. snowflake/snowflake_data_validation/utils/__init__.py +16 -0
  148. snowflake/snowflake_data_validation/utils/arguments_manager_base.py +482 -0
  149. snowflake/snowflake_data_validation/utils/arguments_manager_factory.py +130 -0
  150. snowflake/snowflake_data_validation/utils/base_output_handler.py +66 -0
  151. snowflake/snowflake_data_validation/utils/configuration_file_editor.py +86 -0
  152. snowflake/snowflake_data_validation/utils/configuration_file_generator.py +165 -0
  153. snowflake/snowflake_data_validation/utils/connection_pool.py +349 -0
  154. snowflake/snowflake_data_validation/utils/connector_factory.py +71 -0
  155. snowflake/snowflake_data_validation/utils/console_output_handler.py +95 -0
  156. snowflake/snowflake_data_validation/utils/constants.py +350 -0
  157. snowflake/snowflake_data_validation/utils/context.py +126 -0
  158. snowflake/snowflake_data_validation/utils/cpu_optimizer.py +156 -0
  159. snowflake/snowflake_data_validation/utils/helper.py +30 -0
  160. snowflake/snowflake_data_validation/utils/helpers/helper_database.py +54 -0
  161. snowflake/snowflake_data_validation/utils/helpers/helper_dataframe.py +83 -0
  162. snowflake/snowflake_data_validation/utils/helpers/helper_io.py +85 -0
  163. snowflake/snowflake_data_validation/utils/helpers/helper_misc.py +95 -0
  164. snowflake/snowflake_data_validation/utils/helpers/helper_templates.py +294 -0
  165. snowflake/snowflake_data_validation/utils/logging_config.py +197 -0
  166. snowflake/snowflake_data_validation/utils/logging_utils.py +68 -0
  167. snowflake/snowflake_data_validation/utils/model/chunk.py +16 -0
  168. snowflake/snowflake_data_validation/utils/model/column_metadata.py +45 -0
  169. snowflake/snowflake_data_validation/utils/model/table_column_metadata.py +94 -0
  170. snowflake/snowflake_data_validation/utils/model/table_context.py +351 -0
  171. snowflake/snowflake_data_validation/utils/model/templates_loader_manager.py +123 -0
  172. snowflake/snowflake_data_validation/utils/progress_reporter.py +61 -0
  173. snowflake/snowflake_data_validation/utils/run_context.py +59 -0
  174. snowflake/snowflake_data_validation/utils/table_partitioning_strategy.py +148 -0
  175. snowflake/snowflake_data_validation/utils/telemetry.py +863 -0
  176. snowflake/snowflake_data_validation/utils/templates/configuration_file_templates.py +95 -0
  177. snowflake/snowflake_data_validation/utils/thread_safe_singleton.py +49 -0
  178. snowflake/snowflake_data_validation/utils/validation_utils.py +142 -0
  179. snowflake/snowflake_data_validation/validation/__init__.py +16 -0
  180. snowflake/snowflake_data_validation/validation/data_validator_base.py +452 -0
  181. snowflake/snowflake_data_validation/validation/metrics_data_validator.py +251 -0
  182. snowflake/snowflake_data_validation/validation/row_data_validator.py +479 -0
  183. snowflake/snowflake_data_validation/validation/schema_data_validator.py +197 -0
  184. snowflake/snowflake_data_validation/validation/validation_report_buffer.py +196 -0
  185. snowflake_data_validation-1.0.1.dist-info/METADATA +228 -0
  186. snowflake_data_validation-1.0.1.dist-info/RECORD +189 -0
  187. snowflake_data_validation-1.0.1.dist-info/WHEEL +4 -0
  188. snowflake_data_validation-1.0.1.dist-info/entry_points.txt +3 -0
  189. snowflake_data_validation-1.0.1.dist-info/licenses/LICENSE +177 -0
@@ -0,0 +1,294 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pathlib import Path
17
+
18
+ import pandas as pd
19
+ import yaml
20
+
21
+ from snowflake.snowflake_data_validation.utils.constants import (
22
+ COL_NAME_QUOTES_PLACEHOLDER,
23
+ COLUMN_MODIFIER_COLUMN_KEY,
24
+ EDITABLE_YAML_FILE_FORMAT_ERROR,
25
+ METRIC_COLUMN_KEY,
26
+ METRIC_METRIC_COLUMN_MODIFIER_KEY,
27
+ METRIC_NORMALIZATION_KEY,
28
+ METRIC_QUERY_KEY,
29
+ METRIC_QUERY_PLACEHOLDER,
30
+ METRIC_RETURN_DATATYPE_KEY,
31
+ NORMALIZATION_COLUMN_KEY,
32
+ TEMPLATE_COLUMN_KEY,
33
+ TYPE_COLUMN_KEY,
34
+ )
35
+
36
+
37
+ class HelperTemplates:
38
+ """Helper class for loading and processing templates from YAML files."""
39
+
40
+ @staticmethod
41
+ def load_metrics_templates_from_yaml(
42
+ yaml_path: Path, datatypes_normalization_templates: dict[str, str]
43
+ ) -> pd.DataFrame:
44
+ """Load metrics templates from a YAML file into a pandas DataFrame.
45
+
46
+ Args:
47
+ yaml_path (Path): The file path to the YAML file.
48
+ datatypes_normalization_templates (dict[str, str]): A dictionary containing
49
+ normalization templates for data types.
50
+
51
+ Raises:
52
+ FileNotFoundError: If the specified YAML file does not exist.
53
+ KeyError: If a required key is missing in the YAML data.
54
+ RuntimeError: If there is an error in the format of the YAML file.
55
+
56
+ Returns:
57
+ pd.DataFrame: A DataFrame containing the data from the YAML file.
58
+
59
+ """
60
+ if not yaml_path.exists():
61
+ raise FileNotFoundError(f"Template file not found at: {yaml_path}")
62
+
63
+ try:
64
+ file_content = yaml_path.read_text()
65
+ yaml_data = yaml.safe_load(file_content)
66
+ yaml_data_reformatted = HelperTemplates._reformat_metrics_yaml_data(
67
+ yaml_data=yaml_data,
68
+ datatypes_normalization_templates=datatypes_normalization_templates,
69
+ )
70
+ df = pd.DataFrame.from_dict(yaml_data_reformatted)
71
+
72
+ except KeyError as e:
73
+ error_message = (
74
+ f"Missing {e.args[0]} datatype in datatypes normalization file."
75
+ )
76
+ raise RuntimeError(error_message) from e
77
+
78
+ except Exception as e:
79
+ error_message = EDITABLE_YAML_FILE_FORMAT_ERROR.format(
80
+ file_name=yaml_path.name
81
+ )
82
+ raise RuntimeError(error_message) from e
83
+
84
+ return df
85
+
86
+ @staticmethod
87
+ def load_datatypes_templates_from_yaml(
88
+ yaml_path: Path, platform: str
89
+ ) -> pd.DataFrame:
90
+ """Load datatypes templates from a YAML file into a pandas DataFrame.
91
+
92
+ Args:
93
+ yaml_path (Path): The file path to the YAML file.
94
+ platform (str): The platform identifier to use.
95
+
96
+ Raises:
97
+ FileNotFoundError: If the specified YAML file does not exist.
98
+ RuntimeError: If there is an error in the format of the YAML file.
99
+
100
+ Returns:
101
+ pd.DataFrame: A DataFrame containing the data from the YAML file.
102
+
103
+ """
104
+ if not yaml_path.exists():
105
+ raise FileNotFoundError(f"Template file not found at: {yaml_path}")
106
+
107
+ try:
108
+ file_content = yaml_path.read_text()
109
+ yaml_data = yaml.safe_load(file_content)
110
+ yaml_data_reformatted = HelperTemplates._reformat_datatypes_yaml_data(
111
+ yaml_data, platform
112
+ )
113
+ df = pd.DataFrame.from_dict(yaml_data_reformatted)
114
+
115
+ except Exception as e:
116
+ error_message = EDITABLE_YAML_FILE_FORMAT_ERROR.format(
117
+ file_name=yaml_path.name
118
+ )
119
+ raise RuntimeError(error_message) from e
120
+
121
+ return df
122
+
123
+ @staticmethod
124
+ def load_datatypes_normalization_templates_from_yaml(
125
+ yaml_path: Path,
126
+ ) -> dict[str, str]:
127
+ """Load datatypes normalization templates from a YAML file into a pandas DataFrame.
128
+
129
+ Args:
130
+ yaml_path (Path): The file path to the YAML file.
131
+
132
+ Raises:
133
+ FileNotFoundError: If the specified YAML file does not exist.
134
+ RuntimeError: If there is an error in the format of the YAML file.
135
+
136
+ Returns:
137
+ dict[str, str]: A dictionary containing the data from the YAML file.
138
+
139
+ """
140
+ if not yaml_path.exists():
141
+ raise FileNotFoundError(f"Template file not found at: {yaml_path}")
142
+
143
+ try:
144
+ file_content = yaml_path.read_text()
145
+ yaml_data = yaml.safe_load(file_content)
146
+ yaml_data_reformatted = {
147
+ key.upper(): yaml_data[key].replace('\\"', '"')
148
+ for key in yaml_data.keys()
149
+ }
150
+
151
+ except Exception as e:
152
+ error_message = EDITABLE_YAML_FILE_FORMAT_ERROR.format(
153
+ file_name=yaml_path.name
154
+ )
155
+ raise RuntimeError(error_message) from e
156
+
157
+ return yaml_data_reformatted
158
+
159
+ @staticmethod
160
+ def _reformat_datatypes_yaml_data(
161
+ yaml_data: dict, platform: str
162
+ ) -> dict[str, list[str]]:
163
+ """Reformat YAML data to ensure it is in a consistent format.
164
+
165
+ Args:
166
+ yaml_data (dict): The original YAML data.
167
+ platform (str): The platform identifier to use.
168
+
169
+ Raises:
170
+ ValueError: If the YAML data does not contain the expected structure.
171
+
172
+ Returns:
173
+ dict: The reformatted YAML data.
174
+
175
+ """
176
+ source_platform_data_types_collection = list(yaml_data.keys())
177
+ platform_data_types_dict_collection = {
178
+ str.lower(platform): source_platform_data_types_collection
179
+ }
180
+ temporal_platform_data_types_dict_collection = {}
181
+ for source_data_type in source_platform_data_types_collection:
182
+ for target_data_type in yaml_data[source_data_type]:
183
+ if (
184
+ temporal_platform_data_types_dict_collection.get(target_data_type)
185
+ is None
186
+ ):
187
+ temporal_platform_data_types_dict_collection[target_data_type] = []
188
+ current_data_type = yaml_data[source_data_type][target_data_type]
189
+ temporal_platform_data_types_dict_collection[target_data_type].append(
190
+ current_data_type
191
+ )
192
+
193
+ platform_data_types_dict_collection.update(
194
+ temporal_platform_data_types_dict_collection
195
+ )
196
+ return platform_data_types_dict_collection
197
+
198
+ @staticmethod
199
+ def _reformat_metrics_yaml_data(
200
+ yaml_data: dict, datatypes_normalization_templates: dict[str, str]
201
+ ) -> dict[str, list[str]]:
202
+ """Reformat YAML data to ensure it is in a consistent format.
203
+
204
+ Args:
205
+ yaml_data (dict): The original YAML data.
206
+ datatypes_normalization_templates (dict[str, str]): A dictionary containing
207
+ normalization templates for data types.
208
+
209
+ Raises:
210
+ ValueError: If the YAML data does not contain the expected structure.
211
+
212
+ Returns:
213
+ dict: The reformatted YAML data.
214
+
215
+ """
216
+ type_column = []
217
+ metric_column = []
218
+ metric_template_column = []
219
+ metric_normalization_template_column = []
220
+ column_modifier_column = []
221
+
222
+ data_type_collection = list(yaml_data.keys())
223
+ for data_type in data_type_collection:
224
+ metric_name_collection = list(yaml_data[data_type].keys())
225
+ template_collection = list(yaml_data[data_type].values())
226
+ for metric_name, templates in zip(
227
+ metric_name_collection, template_collection, strict=False
228
+ ):
229
+ type_column.append(data_type.upper())
230
+ metric_column.append(metric_name)
231
+ metric_template_column.append(templates[METRIC_QUERY_KEY])
232
+
233
+ if templates.get(METRIC_NORMALIZATION_KEY, None) is not None:
234
+ metric_normalization_template_column.append(
235
+ templates[METRIC_NORMALIZATION_KEY]
236
+ )
237
+ else:
238
+ metric_return_datatype = templates[METRIC_RETURN_DATATYPE_KEY]
239
+ normalization_template = datatypes_normalization_templates[
240
+ metric_return_datatype.upper()
241
+ ]
242
+ normalization_template_normalized = normalization_template.replace(
243
+ COL_NAME_QUOTES_PLACEHOLDER, METRIC_QUERY_PLACEHOLDER
244
+ )
245
+ metric_normalization_template_column.append(
246
+ normalization_template_normalized
247
+ )
248
+
249
+ column_modifier_column.append(
250
+ templates.get(METRIC_METRIC_COLUMN_MODIFIER_KEY, None)
251
+ )
252
+
253
+ type_metric_dict_collection = {
254
+ TYPE_COLUMN_KEY: type_column,
255
+ METRIC_COLUMN_KEY: metric_column,
256
+ TEMPLATE_COLUMN_KEY: metric_template_column,
257
+ NORMALIZATION_COLUMN_KEY: metric_normalization_template_column,
258
+ COLUMN_MODIFIER_COLUMN_KEY: column_modifier_column,
259
+ }
260
+
261
+ return type_metric_dict_collection
262
+
263
+ @staticmethod
264
+ def load_datatypes_mapping_templates_from_yaml(yaml_path: Path) -> dict[str, str]:
265
+ """Load datatypes mapping templates from a YAML file into a dictionary.
266
+
267
+ Args:
268
+ yaml_path (Path): The file path to the YAML file.
269
+
270
+ Raises:
271
+ FileNotFoundError: If the specified YAML file does not exist.
272
+ RuntimeError: If there is an error in the format of the YAML file.
273
+
274
+ Returns:
275
+ dict[str, str]: A dictionary containing the data from the YAML file.
276
+
277
+ """
278
+ if not yaml_path.exists():
279
+ raise FileNotFoundError(f"Template file not found at: {yaml_path}")
280
+
281
+ try:
282
+ file_content = yaml_path.read_text()
283
+ if len(file_content) == 0:
284
+ return {}
285
+
286
+ yaml_data = yaml.safe_load(file_content)
287
+
288
+ except Exception as e:
289
+ error_message = EDITABLE_YAML_FILE_FORMAT_ERROR.format(
290
+ file_name=yaml_path.name
291
+ )
292
+ raise RuntimeError(error_message) from e
293
+
294
+ return yaml_data
@@ -0,0 +1,197 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Logging configuration for Snowflake Data Validation CLI."""
14
+
15
+ import logging.config
16
+ import os
17
+ import shutil
18
+
19
+ from datetime import datetime
20
+
21
+
22
+ class LoggingManager:
23
+ """Manages logging configuration and log file relocation."""
24
+
25
+ # Logger names that need file handler management
26
+ _MANAGED_LOGGER_NAMES = ["", "snowflake.snowflake_data_validation"]
27
+
28
+ def __init__(self):
29
+ """Initialize the logging manager."""
30
+ self._log_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
31
+ self._current_log_file: str | None = None
32
+
33
+ def setup_logging(
34
+ self,
35
+ log_level: str = "INFO",
36
+ console_level: str | None = None,
37
+ file_level: str | None = None,
38
+ ) -> str:
39
+ """Set up logging configuration.
40
+
41
+ Args:
42
+ log_level: Default logging level for both console and file
43
+ console_level: Console-specific logging level (overrides log_level)
44
+ file_level: File-specific logging level (overrides log_level)
45
+
46
+ Returns:
47
+ str: Path to the initial log file
48
+
49
+ """
50
+ # Create initial log file in current directory
51
+ log_filename = f"data_validation_{self._log_timestamp}.log"
52
+ self._current_log_file = log_filename
53
+
54
+ # Determine actual log levels
55
+ actual_console_level = console_level or log_level
56
+ actual_file_level = file_level or log_level
57
+
58
+ logging.config.dictConfig(
59
+ {
60
+ "version": 1,
61
+ "disable_existing_loggers": False,
62
+ "formatters": {
63
+ "standard": {
64
+ "format": "{asctime} - {name} - {levelname} - {message}",
65
+ "style": "{",
66
+ "datefmt": "%Y-%m-%d %H:%M:%S",
67
+ },
68
+ },
69
+ "handlers": {
70
+ "console": {
71
+ "class": "logging.StreamHandler",
72
+ "formatter": "standard",
73
+ "level": actual_console_level,
74
+ },
75
+ "file": {
76
+ "class": "logging.FileHandler",
77
+ "formatter": "standard",
78
+ "filename": log_filename,
79
+ "level": actual_file_level,
80
+ "encoding": "utf-8",
81
+ },
82
+ },
83
+ "loggers": {
84
+ "snowflake.snowflake_data_validation": {
85
+ "handlers": ["file"],
86
+ "level": actual_file_level,
87
+ "propagate": False,
88
+ },
89
+ },
90
+ "root": {
91
+ "handlers": ["console", "file"],
92
+ "level": min(actual_console_level, actual_file_level),
93
+ },
94
+ }
95
+ )
96
+
97
+ return log_filename
98
+
99
+ def relocate_log_file(self, output_directory_path: str) -> bool:
100
+ """Move the log file to the output directory.
101
+
102
+ Args:
103
+ output_directory_path: Target directory for the log file
104
+
105
+ Returns:
106
+ bool: True if successful, False otherwise
107
+
108
+ """
109
+ if not self._current_log_file or not output_directory_path:
110
+ return False
111
+
112
+ try:
113
+ os.makedirs(output_directory_path, exist_ok=True)
114
+ new_log_path = os.path.join(
115
+ output_directory_path, f"data_validation_{self._log_timestamp}.log"
116
+ )
117
+ if os.path.exists(self._current_log_file):
118
+ self._close_file_handlers()
119
+
120
+ shutil.move(self._current_log_file, new_log_path)
121
+ self._current_log_file = new_log_path
122
+
123
+ self._update_file_handler(new_log_path)
124
+
125
+ logging.getLogger(__name__).info(
126
+ f"Log file relocated to: {new_log_path}"
127
+ )
128
+ return True
129
+
130
+ except Exception as e:
131
+ logging.getLogger(__name__).error(
132
+ f"Failed to relocate log file: {e}", exc_info=True
133
+ )
134
+
135
+ return False
136
+
137
+ def get_current_log_file(self) -> str:
138
+ """Get the current log file path."""
139
+ return self._current_log_file or ""
140
+
141
+ def _close_file_handlers(self):
142
+ """Close all file handlers to release file locks (important for Windows)."""
143
+ for logger_name in self._MANAGED_LOGGER_NAMES:
144
+ logger = logging.getLogger(logger_name)
145
+ for handler in logger.handlers[:]:
146
+ if isinstance(handler, logging.FileHandler):
147
+ handler.flush()
148
+ handler.close()
149
+
150
+ def _update_file_handler(self, new_log_path: str):
151
+ """Update file handlers to use the new log file path."""
152
+ for logger_name in self._MANAGED_LOGGER_NAMES:
153
+ logger = logging.getLogger(logger_name)
154
+ # Iterate over a shallow copy of the logger's handlers list.
155
+ # This allows us to safely modify (remove) handlers from the original list during iteration.
156
+ for handler in logger.handlers[:]:
157
+ if isinstance(handler, logging.FileHandler):
158
+ new_handler = logging.FileHandler(new_log_path, encoding="utf-8")
159
+ new_handler.setLevel(handler.level)
160
+ new_handler.setFormatter(handler.formatter)
161
+
162
+ # Replace old handler
163
+ logger.removeHandler(handler)
164
+ logger.addHandler(new_handler)
165
+
166
+
167
+ # Singleton instance
168
+ _logging_manager = LoggingManager()
169
+
170
+
171
+ def setup_logging(
172
+ log_level: str = "INFO",
173
+ console_level: str | None = None,
174
+ file_level: str | None = None,
175
+ ) -> str:
176
+ """Set up logging configuration.
177
+
178
+ Args:
179
+ log_level: Default logging level for both console and file
180
+ console_level: Console-specific logging level (overrides log_level)
181
+ file_level: File-specific logging level (overrides log_level)
182
+
183
+ Returns:
184
+ str: Path to the initial log file
185
+
186
+ """
187
+ return _logging_manager.setup_logging(log_level, console_level, file_level)
188
+
189
+
190
+ def relocate_log_file(output_directory_path: str) -> bool:
191
+ """Relocate the log file to the output directory."""
192
+ return _logging_manager.relocate_log_file(output_directory_path)
193
+
194
+
195
+ def get_current_log_file() -> str:
196
+ """Get the current log file path."""
197
+ return _logging_manager.get_current_log_file()
@@ -0,0 +1,68 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+
18
+ from collections.abc import Callable
19
+ from functools import wraps
20
+ from typing import TypeVar
21
+
22
+ from typing_extensions import ParamSpec
23
+
24
+
25
+ P = ParamSpec("P")
26
+ R = TypeVar("R")
27
+
28
+
29
+ def log(
30
+ _func: Callable[P, R] | None = None,
31
+ *,
32
+ logger: logging.Logger | None = None,
33
+ log_args: bool = True,
34
+ ) -> Callable[[Callable[P, R]], Callable[P, R]]:
35
+ """Log the function call and any exceptions that occur.
36
+
37
+ Args:
38
+ _func: The function to log.
39
+ logger: The logger to use for logging. If not provided, a logger will be created using the
40
+ function's module name.
41
+ log_args: Whether to log the arguments passed to the function.
42
+
43
+ Returns:
44
+ A decorator that logs the function call and any exceptions that occur.
45
+
46
+ """
47
+
48
+ def decorator(func: Callable[P, R]) -> Callable[P, R]:
49
+ @wraps(func)
50
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
51
+ _logger = logging.getLogger(func.__module__) if logger is None else logger
52
+ if log_args:
53
+ args_repr = [repr(a) for a in args]
54
+ kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()]
55
+ formatted_args = ", ".join([*args_repr, *kwargs_repr])
56
+ _logger.debug("%s called with args %s", func.__name__, formatted_args)
57
+ try:
58
+ return func(*args, **kwargs)
59
+ except Exception:
60
+ _logger.exception("An error occurred in %s", func.__name__)
61
+ raise
62
+
63
+ return wrapper
64
+
65
+ # Handle the case where the decorator is used without parentheses
66
+ if _func is None:
67
+ return decorator
68
+ return decorator(_func)
@@ -0,0 +1,16 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class Chunk:
6
+
7
+ """Represents a chunk of data to be processed.
8
+
9
+ Attributes:
10
+ fetch (int): The number of records to fetch in this chunk.
11
+ offset (int): The starting point from which to fetch the records.
12
+
13
+ """
14
+
15
+ fetch: int
16
+ offset: int
@@ -0,0 +1,45 @@
1
+ class ColumnMetadata:
2
+
3
+ """Represents metadata for a column in a Snowflake table."""
4
+
5
+ def __init__(
6
+ self,
7
+ name: str,
8
+ data_type: str,
9
+ nullable: bool,
10
+ is_primary_key: bool,
11
+ calculated_column_size_in_bytes: int,
12
+ properties: dict[str, any],
13
+ ):
14
+ """Initialize the ColumnMetadata object.
15
+
16
+ Args:
17
+ name (str): The name of the column.
18
+ data_type (str): The data type of the column.
19
+ nullable (bool): Indicates if the column can contain NULL values.
20
+ is_primary_key (bool): Indicates if the column is part of the primary key.
21
+ calculated_column_size_in_bytes (int): The calculated size of the column in bytes.
22
+ properties (dict[str, any]): Additional properties of the column.
23
+
24
+ """
25
+ self.name: str = name
26
+ self.data_type: str = data_type
27
+ self.nullable: bool = nullable
28
+ self.is_primary_key: bool = is_primary_key
29
+ self.calculated_column_size_in_bytes: int = calculated_column_size_in_bytes
30
+ self.properties: dict[str, any] = properties
31
+
32
+ def copy(self) -> "ColumnMetadata":
33
+ """Create a copy of the ColumnMetadata instance."""
34
+ return ColumnMetadata(
35
+ self.name,
36
+ self.data_type,
37
+ self.nullable,
38
+ self.is_primary_key,
39
+ self.calculated_column_size_in_bytes,
40
+ self.properties.copy(),
41
+ )
42
+
43
+ def to_upper_name(self) -> None:
44
+ """Convert the column name to uppercase."""
45
+ self.name = self.name.upper()