xoverrr 1.1.5__tar.gz → 1.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {xoverrr-1.1.5/src/xoverrr.egg-info → xoverrr-1.1.6}/PKG-INFO +3 -3
  2. {xoverrr-1.1.5 → xoverrr-1.1.6}/pyproject.toml +10 -4
  3. xoverrr-1.1.6/src/xoverrr/__init__.py +13 -0
  4. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/adapters/__init__.py +7 -2
  5. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/adapters/base.py +61 -32
  6. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/adapters/clickhouse.py +62 -37
  7. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/adapters/oracle.py +65 -36
  8. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/adapters/postgres.py +67 -35
  9. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/constants.py +4 -4
  10. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/core.py +296 -198
  11. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/exceptions.py +8 -1
  12. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/logger.py +4 -2
  13. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr/models.py +11 -5
  14. xoverrr-1.1.6/src/xoverrr/utils.py +740 -0
  15. {xoverrr-1.1.5 → xoverrr-1.1.6/src/xoverrr.egg-info}/PKG-INFO +3 -3
  16. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr.egg-info/requires.txt +2 -2
  17. xoverrr-1.1.5/src/xoverrr/__init__.py +0 -17
  18. xoverrr-1.1.5/src/xoverrr/utils.py +0 -664
  19. {xoverrr-1.1.5 → xoverrr-1.1.6}/LICENSE +0 -0
  20. {xoverrr-1.1.5 → xoverrr-1.1.6}/README.md +0 -0
  21. {xoverrr-1.1.5 → xoverrr-1.1.6}/setup.cfg +0 -0
  22. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr.egg-info/SOURCES.txt +0 -0
  23. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr.egg-info/dependency_links.txt +0 -0
  24. {xoverrr-1.1.5 → xoverrr-1.1.6}/src/xoverrr.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xoverrr
3
- Version: 1.1.5
3
+ Version: 1.1.6
4
4
  Summary: A tool for cross-database and intra-source data comparison with detailed discrepancy analysis and reporting.
5
5
  Author-email: Dmitry Ischenko <hotmori@gmail.com>
6
6
  License: MIT
@@ -21,7 +21,7 @@ Requires-Dist: clickhouse-sqlalchemy>=0.2.0
21
21
  Provides-Extra: dev
22
22
  Requires-Dist: pytest>=7.0.0; extra == "dev"
23
23
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
24
- Requires-Dist: black>=23.0.0; extra == "dev"
24
+ Requires-Dist: ruff>=0.15.0; extra == "dev"
25
25
  Requires-Dist: isort>=5.12.0; extra == "dev"
26
26
  Requires-Dist: mypy>=1.0.0; extra == "dev"
27
27
  Requires-Dist: pre-commit>=3.0.0; extra == "dev"
@@ -31,7 +31,7 @@ Requires-Dist: pytest>=7.0.0; extra == "test"
31
31
  Requires-Dist: pytest-cov>=4.0.0; extra == "test"
32
32
  Requires-Dist: tenacity>=8.2.0; extra == "test"
33
33
  Provides-Extra: lint
34
- Requires-Dist: black>=23.0.0; extra == "lint"
34
+ Requires-Dist: ruff>=0.15.0; extra == "lint"
35
35
  Requires-Dist: isort>=5.12.0; extra == "lint"
36
36
  Requires-Dist: flake8>=6.0.0; extra == "lint"
37
37
  Dynamic: license-file
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "xoverrr"
8
- version = "1.1.5"
8
+ version = "1.1.6"
9
9
  description = "A tool for cross-database and intra-source data comparison with detailed discrepancy analysis and reporting."
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.9"
@@ -36,7 +36,7 @@ Homepage = "https://github.com/dima-ischenko/xoverrr"
36
36
  dev = [
37
37
  "pytest>=7.0.0",
38
38
  "pytest-cov>=4.0.0",
39
- "black>=23.0.0",
39
+ "ruff>=0.15.0",
40
40
  "isort>=5.12.0",
41
41
  "mypy>=1.0.0",
42
42
  "pre-commit>=3.0.0",
@@ -48,7 +48,7 @@ test = [
48
48
  "tenacity>=8.2.0"
49
49
  ]
50
50
  lint = [
51
- "black>=23.0.0",
51
+ "ruff>=0.15.0",
52
52
  "isort>=5.12.0",
53
53
  "flake8>=6.0.0",
54
54
  ]
@@ -59,4 +59,10 @@ where = ["src"]
59
59
  [tool.pytest.ini_options]
60
60
  pythonpath = ["src"]
61
61
  testpaths = ["tests"]
62
- addopts = "-v"
62
+ addopts = "-v"
63
+
64
+ [tool.ruff]
65
+ target-version = "py39"
66
+
67
+ [tool.ruff.format]
68
+ quote-style = "single"
@@ -0,0 +1,13 @@
1
+ from .constants import (COMPARISON_FAILED, COMPARISON_SKIPPED,
2
+ COMPARISON_SUCCESS)
3
+ from .core import DataQualityComparator, DataReference
4
+
5
+ __all__ = [
6
+ 'DataQualityComparator',
7
+ 'DataReference',
8
+ 'COMPARISON_SUCCESS',
9
+ 'COMPARISON_FAILED',
10
+ 'COMPARISON_SKIPPED',
11
+ ]
12
+
13
+ __version__ = '1.1.6'
@@ -1,6 +1,11 @@
1
1
  from .base import BaseDatabaseAdapter
2
+ from .clickhouse import ClickHouseAdapter
2
3
  from .oracle import OracleAdapter
3
4
  from .postgres import PostgresAdapter
4
- from .clickhouse import ClickHouseAdapter
5
5
 
6
- __all__ = ['BaseDatabaseAdapter', 'OracleAdapter', 'PostgresAdapter', 'ClickHouseAdapter']
6
+ __all__ = [
7
+ 'BaseDatabaseAdapter',
8
+ 'OracleAdapter',
9
+ 'PostgresAdapter',
10
+ 'ClickHouseAdapter',
11
+ ]
@@ -1,18 +1,23 @@
1
- from abc import ABC, abstractmethod
2
- import pandas as pd
3
- from typing import Dict, Callable, List, Tuple, Optional, Union
4
1
  import re
2
+ from abc import ABC, abstractmethod
5
3
  from datetime import datetime, timedelta
6
- from ..models import DataReference, ObjectType
7
- from ..constants import RESERVED_WORDS
4
+ from typing import Callable, Dict, List, Optional, Tuple, Union
5
+
6
+ import pandas as pd
8
7
  from sqlalchemy.engine import Engine
8
+
9
+ from ..constants import RESERVED_WORDS
9
10
  from ..logger import app_logger
10
- from ..logger import app_logger
11
+ from ..models import DataReference, ObjectType
12
+
11
13
 
12
14
  class BaseDatabaseAdapter(ABC):
13
15
  """Abstract base class with updated method signatures for parameterized queries"""
16
+
14
17
  @abstractmethod
15
- def _execute_query(self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone:str) -> pd.DataFrame:
18
+ def _execute_query(
19
+ self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str
20
+ ) -> pd.DataFrame:
16
21
  """Execute query with DBMS-specific optimizations"""
17
22
  pass
18
23
 
@@ -30,42 +35,66 @@ class BaseDatabaseAdapter(ABC):
30
35
  pass
31
36
 
32
37
  @abstractmethod
33
- def build_count_query(self, data_ref: DataReference, date_column: str,
34
- start_date: Optional[str], end_date: Optional[str]
35
- ) -> Tuple[str, Dict]:
38
+ def build_count_query(
39
+ self,
40
+ data_ref: DataReference,
41
+ date_column: str,
42
+ start_date: Optional[str],
43
+ end_date: Optional[str],
44
+ ) -> Tuple[str, Dict]:
36
45
  """Returns tuple of (query, params) with recent data exclusion"""
37
46
  pass
38
47
 
39
- def build_data_query_common(self, data_ref: DataReference, columns: List[str],
40
- date_column: Optional[str], update_column: Optional[str],
41
- start_date: Optional[str], end_date: Optional[str],
42
- exclude_recent_hours: Optional[int] = None) -> Tuple[str, Dict]:
48
+ def build_data_query_common(
49
+ self,
50
+ data_ref: DataReference,
51
+ columns: List[str],
52
+ date_column: Optional[str],
53
+ update_column: Optional[str],
54
+ start_date: Optional[str],
55
+ end_date: Optional[str],
56
+ exclude_recent_hours: Optional[int] = None,
57
+ ) -> Tuple[str, Dict]:
43
58
  """Build data query for the DBMS with recent data exclusion"""
44
59
  # Handle reserved words
45
60
  cols_select = [
46
- f'"{col}"' if col.lower() in RESERVED_WORDS
47
- else col
48
- for col in columns
61
+ f'"{col}"' if col.lower() in RESERVED_WORDS else col for col in columns
49
62
  ]
50
63
 
51
- result = self.build_data_query(data_ref, cols_select, date_column, update_column,
52
- start_date, end_date, exclude_recent_hours)
64
+ result = self.build_data_query(
65
+ data_ref,
66
+ cols_select,
67
+ date_column,
68
+ update_column,
69
+ start_date,
70
+ end_date,
71
+ exclude_recent_hours,
72
+ )
53
73
  return result
54
74
 
55
75
  @abstractmethod
56
- def build_data_query(self, data_ref: DataReference, columns: List[str],
57
- date_column: Optional[str], update_column: Optional[str],
58
- start_date: Optional[str], end_date: Optional[str],
59
- exclude_recent_hours: Optional[int] = None) -> Tuple[str, Dict]:
76
+ def build_data_query(
77
+ self,
78
+ data_ref: DataReference,
79
+ columns: List[str],
80
+ date_column: Optional[str],
81
+ update_column: Optional[str],
82
+ start_date: Optional[str],
83
+ end_date: Optional[str],
84
+ exclude_recent_hours: Optional[int] = None,
85
+ ) -> Tuple[str, Dict]:
60
86
  pass
61
87
 
62
88
  @abstractmethod
63
- def _build_exclusion_condition(self, update_column: str,
64
- exclude_recent_hours: int) -> Tuple[str, Dict]:
89
+ def _build_exclusion_condition(
90
+ self, update_column: str, exclude_recent_hours: int
91
+ ) -> Tuple[str, Dict]:
65
92
  """DBMS-specific implementation for recent data exclusion"""
66
93
  pass
67
94
 
68
- def convert_types(self, df: pd.DataFrame, metadata: pd.DataFrame, timezone: str) -> pd.DataFrame:
95
+ def convert_types(
96
+ self, df: pd.DataFrame, metadata: pd.DataFrame, timezone: str
97
+ ) -> pd.DataFrame:
69
98
  """Convert DBMS-specific types to standardized formats"""
70
99
  # there is need to specify timezone for covnersion as
71
100
  # pandas implicitly converts to UTC tz aware cols
@@ -78,8 +107,9 @@ class BaseDatabaseAdapter(ABC):
78
107
  """Get type conversion rules for specific DBMS"""
79
108
  pass
80
109
 
81
- def _apply_type_conversion(self, df: pd.DataFrame, metadata: pd.DataFrame,
82
- type_rules: Dict[str, Callable]) -> pd.DataFrame:
110
+ def _apply_type_conversion(
111
+ self, df: pd.DataFrame, metadata: pd.DataFrame, type_rules: Dict[str, Callable]
112
+ ) -> pd.DataFrame:
83
113
  """Apply type conversion rules to DataFrame"""
84
114
  if df.empty:
85
115
  return df
@@ -94,7 +124,6 @@ class BaseDatabaseAdapter(ABC):
94
124
  if col_name not in df.columns:
95
125
  continue
96
126
 
97
-
98
127
  col_type = col_info['data_type'].lower()
99
128
  # Find matching conversion rule
100
129
  converter = None
@@ -105,15 +134,15 @@ class BaseDatabaseAdapter(ABC):
105
134
  break
106
135
 
107
136
  if converter is None:
108
- continue # Skip columns without converters
137
+ continue # Skip columns without converters
109
138
 
110
139
  try:
111
140
  df[col_name] = converter(df[col_name])
112
141
  except Exception as e:
113
- app_logger.warning(f"Type conversion failed for {col_name}: {str(e)}")
142
+ app_logger.warning(f'Type conversion failed for {col_name}: {str(e)}')
114
143
  df[col_name] = df[col_name].astype(str)
115
144
 
116
145
  new_type = df[col_name].dtype
117
146
  app_logger.debug(f'old: {col_type}, new: {new_type}')
118
147
 
119
- return df
148
+ return df
@@ -1,15 +1,21 @@
1
+ import time
2
+ from typing import Callable, Dict, List, Optional, Tuple, Union
3
+
1
4
  import pandas as pd
2
- from typing import Optional, Dict, Callable, List, Tuple, Union
5
+
3
6
  from ..constants import DATE_FORMAT, DATETIME_FORMAT
4
- from .base import BaseDatabaseAdapter, Engine
5
- from ..models import DataReference, ObjectType
6
7
  from ..exceptions import QueryExecutionError
7
- import time
8
8
  from ..logger import app_logger
9
+ from ..models import DataReference, ObjectType
10
+ from .base import BaseDatabaseAdapter, Engine
11
+
9
12
 
10
13
  class ClickHouseAdapter(BaseDatabaseAdapter):
11
14
  """ClickHouse adapter with parameterized queries"""
12
- def _execute_query(self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str) -> pd.DataFrame:
15
+
16
+ def _execute_query(
17
+ self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str
18
+ ) -> pd.DataFrame:
13
19
  df = None
14
20
  tz_set = None
15
21
  start_time = time.time()
@@ -32,14 +38,16 @@ class ClickHouseAdapter(BaseDatabaseAdapter):
32
38
  df = pd.read_sql(query, engine)
33
39
 
34
40
  execution_time = time.time() - start_time
35
- app_logger.info(f"Query executed in {execution_time:.2f}s")
41
+ app_logger.info(f'Query executed in {execution_time:.2f}s')
36
42
  return df
37
43
 
38
44
  except Exception as e:
39
45
  execution_time = time.time() - start_time
40
- app_logger.error(f"Query execution failed after {execution_time:.2f}s: {str(e)}")
46
+ app_logger.error(
47
+ f'Query execution failed after {execution_time:.2f}s: {str(e)}'
48
+ )
41
49
 
42
- raise QueryExecutionError(f"Query failed: {str(e)}")
50
+ raise QueryExecutionError(f'Query failed: {str(e)}')
43
51
 
44
52
  def get_object_type(self, data_ref: DataReference, engine: Engine) -> ObjectType:
45
53
  """Determine if object is table or view in ClickHouse"""
@@ -67,7 +75,9 @@ class ClickHouseAdapter(BaseDatabaseAdapter):
67
75
  else:
68
76
  return ObjectType.TABLE
69
77
  except Exception as e:
70
- app_logger.warning(f"Could not determine object type for {data_ref.full_name}: {str(e)}")
78
+ app_logger.warning(
79
+ f'Could not determine object type for {data_ref.full_name}: {str(e)}'
80
+ )
71
81
 
72
82
  return ObjectType.UNKNOWN
73
83
 
@@ -97,8 +107,13 @@ class ClickHouseAdapter(BaseDatabaseAdapter):
97
107
  params = {'schema': data_ref.schema, 'table': data_ref.name}
98
108
  return query, params
99
109
 
100
- def build_count_query(self, data_ref: DataReference, date_column: str,
101
- start_date: Optional[str], end_date: Optional[str]) -> Tuple[str, Dict]:
110
+ def build_count_query(
111
+ self,
112
+ data_ref: DataReference,
113
+ date_column: str,
114
+ start_date: Optional[str],
115
+ end_date: Optional[str],
116
+ ) -> Tuple[str, Dict]:
102
117
  query = f"""
103
118
  SELECT
104
119
  formatDateTime(toDate({date_column}), '%%Y-%%m-%%d') as dt,
@@ -108,24 +123,29 @@ class ClickHouseAdapter(BaseDatabaseAdapter):
108
123
  """
109
124
  params = {}
110
125
 
111
-
112
126
  if start_date:
113
- query += f" AND {date_column} >= toDate(%(start_date)s)"
127
+ query += f' AND {date_column} >= toDate(%(start_date)s)'
114
128
  params['start_date'] = start_date
115
129
  if end_date:
116
- query += f" AND {date_column} < toDate(%(end_date)s) + INTERVAL 1 day"
130
+ query += f' AND {date_column} < toDate(%(end_date)s) + INTERVAL 1 day'
117
131
  params['end_date'] = end_date
118
132
 
119
- query += " GROUP BY dt ORDER BY dt DESC"
133
+ query += ' GROUP BY dt ORDER BY dt DESC'
120
134
  return query, params
121
135
 
122
- def build_data_query(self, data_ref: DataReference, columns: List[str],
123
- date_column: Optional[str], update_column: str,
124
- start_date: Optional[str], end_date: Optional[str],
125
- exclude_recent_hours: Optional[int] = None) -> Tuple[str, Dict]:
136
+ def build_data_query(
137
+ self,
138
+ data_ref: DataReference,
139
+ columns: List[str],
140
+ date_column: Optional[str],
141
+ update_column: str,
142
+ start_date: Optional[str],
143
+ end_date: Optional[str],
144
+ exclude_recent_hours: Optional[int] = None,
145
+ ) -> Tuple[str, Dict]:
126
146
  params = {}
127
147
  # Add recent data exclusion flag
128
- exclusion_condition, exclusion_params = self._build_exclusion_condition(
148
+ exclusion_condition, exclusion_params = self._build_exclusion_condition(
129
149
  update_column, exclude_recent_hours
130
150
  )
131
151
 
@@ -139,36 +159,41 @@ class ClickHouseAdapter(BaseDatabaseAdapter):
139
159
  WHERE 1=1\n"""
140
160
 
141
161
  if start_date and date_column:
142
- query += f" AND {date_column} >= toDate(%(start_date)s)\n"
162
+ query += f' AND {date_column} >= toDate(%(start_date)s)\n'
143
163
  params['start_date'] = start_date
144
164
  if end_date and date_column:
145
- query += f" AND {date_column} < toDate(%(end_date)s) + INTERVAL 1 day\n"
165
+ query += f' AND {date_column} < toDate(%(end_date)s) + INTERVAL 1 day\n'
146
166
  params['end_date'] = end_date
147
167
 
148
168
  return query, params
149
169
 
150
- def _build_exclusion_condition(self, update_column: str,
151
- exclude_recent_hours: int) -> Tuple[str, Dict]:
170
+ def _build_exclusion_condition(
171
+ self, update_column: str, exclude_recent_hours: int
172
+ ) -> Tuple[str, Dict]:
152
173
  """ClickHouse-specific implementation for recent data exclusion"""
153
- if update_column and exclude_recent_hours:
154
-
155
-
174
+ if update_column and exclude_recent_hours:
156
175
  exclude_recent_hours = exclude_recent_hours
157
176
 
158
177
  condition = f"""case when {update_column} > (now() - INTERVAL %(exclude_recent_hours)s HOUR) then 'y' end as xrecently_changed"""
159
- params = {'exclude_recent_hours': exclude_recent_hours}
178
+ params = {'exclude_recent_hours': exclude_recent_hours}
160
179
  return condition, params
161
180
 
162
181
  return None, None
163
182
 
164
183
  def _get_type_conversion_rules(self, timezone: str) -> Dict[str, Callable]:
165
184
  return {
166
- r'datetime64|datetime': lambda x: pd.to_datetime(x, utc=True, errors='coerce')
167
- .dt.tz_convert(timezone)
168
- .dt.strftime(DATETIME_FORMAT)
169
- .str.replace(r'\s00:00:00$', '', regex=True),
170
- r'date': lambda x: pd.to_datetime(x, errors='coerce')
171
- .dt.strftime(DATE_FORMAT)
172
- .str.replace(r'\s00:00:00$', '', regex=True),
173
- r'uint64|uint8|float|decimal|int32': lambda x: x.astype(str).str.replace(r'\.0+$', '', regex=True),
174
- }
185
+ r'datetime64|datetime': lambda x: (
186
+ pd.to_datetime(x, utc=True, errors='coerce')
187
+ .dt.tz_convert(timezone)
188
+ .dt.strftime(DATETIME_FORMAT)
189
+ .str.replace(r'\s00:00:00$', '', regex=True)
190
+ ),
191
+ r'date': lambda x: (
192
+ pd.to_datetime(x, errors='coerce')
193
+ .dt.strftime(DATE_FORMAT)
194
+ .str.replace(r'\s00:00:00$', '', regex=True)
195
+ ),
196
+ r'uint64|uint8|float|decimal|int32': lambda x: x.astype(str).str.replace(
197
+ r'\.0+$', '', regex=True
198
+ ),
199
+ }
@@ -1,16 +1,19 @@
1
+ import time
2
+ from typing import Callable, Dict, List, Optional, Tuple, Union
3
+
1
4
  import pandas as pd
2
- from typing import Optional, Dict, Callable, List, Tuple, Union
3
5
 
4
6
  from ..constants import DATETIME_FORMAT
5
- from .base import BaseDatabaseAdapter, Engine
6
- from ..models import DataReference, ObjectType
7
7
  from ..exceptions import QueryExecutionError
8
8
  from ..logger import app_logger
9
- import time
9
+ from ..models import DataReference, ObjectType
10
+ from .base import BaseDatabaseAdapter, Engine
10
11
 
11
- class OracleAdapter(BaseDatabaseAdapter):
12
12
 
13
- def _execute_query(self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str) -> pd.DataFrame:
13
+ class OracleAdapter(BaseDatabaseAdapter):
14
+ def _execute_query(
15
+ self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str
16
+ ) -> pd.DataFrame:
14
17
  tz_set = None
15
18
  raw_conn = None
16
19
  cursor = None
@@ -40,12 +43,11 @@ class OracleAdapter(BaseDatabaseAdapter):
40
43
  app_logger.info(f'query\n {query}')
41
44
  cursor.execute(query)
42
45
 
43
-
44
46
  columns = [col[0].lower() for col in cursor.description]
45
47
  data = cursor.fetchall()
46
48
 
47
49
  execution_time = time.time() - start_time
48
- app_logger.info(f"Query executed in {execution_time:.2f}s")
50
+ app_logger.info(f'Query executed in {execution_time:.2f}s')
49
51
 
50
52
  app_logger.info('complete')
51
53
 
@@ -57,20 +59,22 @@ class OracleAdapter(BaseDatabaseAdapter):
57
59
 
58
60
  except Exception as e:
59
61
  execution_time = time.time() - start_time
60
- app_logger.error(f"Query execution failed after {execution_time:.2f}s: {str(e)}")
62
+ app_logger.error(
63
+ f'Query execution failed after {execution_time:.2f}s: {str(e)}'
64
+ )
61
65
 
62
66
  if raw_conn:
63
67
  try:
64
68
  raw_conn.rollback()
65
69
  except Exception as rollback_error:
66
- app_logger.warning(f"Rollback failed: {rollback_error}")
70
+ app_logger.warning(f'Rollback failed: {rollback_error}')
67
71
  try:
68
72
  if cursor:
69
73
  cursor.close()
70
74
  except Exception as close_error:
71
- app_logger.warning(f"Cursor close failed: {close_error}")
75
+ app_logger.warning(f'Cursor close failed: {close_error}')
72
76
 
73
- raise QueryExecutionError(f"Query failed: {str(e)}")
77
+ raise QueryExecutionError(f'Query failed: {str(e)}')
74
78
 
75
79
  def get_object_type(self, data_ref: DataReference, engine: Engine) -> ObjectType:
76
80
  """Determine if object is table or view in Oracle"""
@@ -95,10 +99,12 @@ class OracleAdapter(BaseDatabaseAdapter):
95
99
  return {
96
100
  'table': ObjectType.TABLE,
97
101
  'view': ObjectType.VIEW,
98
- 'materialized_view': ObjectType.MATERIALIZED_VIEW
102
+ 'materialized_view': ObjectType.MATERIALIZED_VIEW,
99
103
  }.get(type_str, ObjectType.UNKNOWN)
100
104
  except Exception as e:
101
- app_logger.warning(f"Could not determine object type for {data_ref.full_name}: {str(e)}")
105
+ app_logger.warning(
106
+ f'Could not determine object type for {data_ref.full_name}: {str(e)}'
107
+ )
102
108
 
103
109
  return ObjectType.UNKNOWN
104
110
 
@@ -121,7 +127,7 @@ class OracleAdapter(BaseDatabaseAdapter):
121
127
 
122
128
  def build_primary_key_query(self, data_ref: DataReference) -> pd.DataFrame:
123
129
 
124
- #todo add suport of unique indexes when no pk?
130
+ # todo add suport of unique indexes when no pk?
125
131
  query = """
126
132
  SELECT lower(cols.column_name) as pk_column_name
127
133
  FROM all_constraints cons
@@ -139,9 +145,13 @@ class OracleAdapter(BaseDatabaseAdapter):
139
145
  params['table_name'] = data_ref.name
140
146
  return query, params
141
147
 
142
-
143
- def build_count_query(self, data_ref: DataReference, date_column: str,
144
- start_date: Optional[str], end_date: Optional[str]) -> Tuple[str, Dict]:
148
+ def build_count_query(
149
+ self,
150
+ data_ref: DataReference,
151
+ date_column: str,
152
+ start_date: Optional[str],
153
+ end_date: Optional[str],
154
+ ) -> Tuple[str, Dict]:
145
155
  query = f"""
146
156
  SELECT
147
157
  to_char(trunc({date_column}, 'dd'),'YYYY-MM-DD') as dt,
@@ -150,7 +160,6 @@ class OracleAdapter(BaseDatabaseAdapter):
150
160
  WHERE 1=1\n"""
151
161
  params = {}
152
162
 
153
-
154
163
  if start_date:
155
164
  query += f" AND {date_column} >= trunc(to_date(:start_date, 'YYYY-MM-DD'), 'dd')\n"
156
165
  params['start_date'] = start_date
@@ -161,14 +170,20 @@ class OracleAdapter(BaseDatabaseAdapter):
161
170
  query += f" GROUP BY to_char(trunc({date_column}, 'dd'),'YYYY-MM-DD') ORDER BY dt DESC"
162
171
  return query, params
163
172
 
164
- def build_data_query(self, data_ref: DataReference, columns: List[str],
165
- date_column: Optional[str], update_column: str,
166
- start_date: Optional[str], end_date: Optional[str],
167
- exclude_recent_hours: Optional[int] = None) -> Tuple[str, Dict]:
173
+ def build_data_query(
174
+ self,
175
+ data_ref: DataReference,
176
+ columns: List[str],
177
+ date_column: Optional[str],
178
+ update_column: str,
179
+ start_date: Optional[str],
180
+ end_date: Optional[str],
181
+ exclude_recent_hours: Optional[int] = None,
182
+ ) -> Tuple[str, Dict]:
168
183
 
169
184
  params = {}
170
185
  # Add recent data exclusion flag
171
- exclusion_condition, exclusion_params = self._build_exclusion_condition(
186
+ exclusion_condition, exclusion_params = self._build_exclusion_condition(
172
187
  update_column, exclude_recent_hours
173
188
  )
174
189
 
@@ -191,25 +206,39 @@ class OracleAdapter(BaseDatabaseAdapter):
191
206
 
192
207
  return query, params
193
208
 
194
- def _build_exclusion_condition(self, update_column: str,
195
- exclude_recent_hours: int) -> Tuple[str, Dict]:
209
+ def _build_exclusion_condition(
210
+ self, update_column: str, exclude_recent_hours: int
211
+ ) -> Tuple[str, Dict]:
196
212
  """Oracle-specific implementation for recent data exclusion"""
197
- if update_column and exclude_recent_hours:
198
-
199
-
200
-
213
+ if update_column and exclude_recent_hours:
201
214
  condition = f"""case when {update_column} > (sysdate - :exclude_recent_hours/24) then 'y' end as xrecently_changed"""
202
- params = {'exclude_recent_hours': exclude_recent_hours}
215
+ params = {'exclude_recent_hours': exclude_recent_hours}
203
216
  return condition, params
204
217
 
205
218
  return None, None
206
219
 
207
220
  def _get_type_conversion_rules(self, timezone: str) -> Dict[str, Callable]:
208
221
  return {
209
- #errors='coerce' is needed as workaround for >= 2262 year: Out of bounds nanosecond timestamp (3023-04-04 00:00:00)
222
+ # errors='coerce' is needed as workaround for >= 2262 year: Out of bounds nanosecond timestamp (3023-04-04 00:00:00)
210
223
  # todo need specify explicit dateformat (nls params) in sessions, for the correct string conversion to datetime
211
- r'date': lambda x: pd.to_datetime(x, errors='coerce').dt.strftime(DATETIME_FORMAT).str.replace(r'\s00:00:00$', '', regex=True),
212
- r'timestamp.*\bwith\b.*time\szone': lambda x: pd.to_datetime(x, utc=True, errors='coerce').dt.tz_convert(timezone).dt.tz_localize(None).dt.strftime(DATETIME_FORMAT).str.replace(r'\s00:00:00$', '', regex=True),
213
- r'timestamp': lambda x: pd.to_datetime(x, errors='coerce').dt.strftime(DATETIME_FORMAT).str.replace(r'\s00:00:00$', '', regex=True),
214
- r'number|float|double': lambda x: x.astype(str).str.replace(r'\.0+$', '', regex=True).str.lower(), #lower case for exponential form compare
224
+ r'date': lambda x: (
225
+ pd.to_datetime(x, errors='coerce')
226
+ .dt.strftime(DATETIME_FORMAT)
227
+ .str.replace(r'\s00:00:00$', '', regex=True)
228
+ ),
229
+ r'timestamp.*\bwith\b.*time\szone': lambda x: (
230
+ pd.to_datetime(x, utc=True, errors='coerce')
231
+ .dt.tz_convert(timezone)
232
+ .dt.tz_localize(None)
233
+ .dt.strftime(DATETIME_FORMAT)
234
+ .str.replace(r'\s00:00:00$', '', regex=True)
235
+ ),
236
+ r'timestamp': lambda x: (
237
+ pd.to_datetime(x, errors='coerce')
238
+ .dt.strftime(DATETIME_FORMAT)
239
+ .str.replace(r'\s00:00:00$', '', regex=True)
240
+ ),
241
+ r'number|float|double': lambda x: (
242
+ x.astype(str).str.replace(r'\.0+$', '', regex=True).str.lower()
243
+ ), # lower case for exponential form compare
215
244
  }