xoverrr 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xoverrr/__init__.py +8 -12
- xoverrr/adapters/__init__.py +7 -2
- xoverrr/adapters/base.py +61 -32
- xoverrr/adapters/clickhouse.py +64 -35
- xoverrr/adapters/oracle.py +67 -38
- xoverrr/adapters/postgres.py +67 -35
- xoverrr/constants.py +4 -4
- xoverrr/core.py +299 -197
- xoverrr/exceptions.py +8 -1
- xoverrr/logger.py +4 -2
- xoverrr/models.py +11 -5
- xoverrr/utils.py +331 -259
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/METADATA +67 -71
- xoverrr-1.1.6.dist-info/RECORD +17 -0
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/WHEEL +1 -1
- xoverrr-1.1.4.dist-info/RECORD +0 -17
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/top_level.txt +0 -0
xoverrr/adapters/postgres.py
CHANGED
|
@@ -1,18 +1,20 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from json import dumps
|
|
3
|
+
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
4
|
+
|
|
1
5
|
import pandas as pd
|
|
2
|
-
|
|
6
|
+
|
|
3
7
|
from ..constants import DATETIME_FORMAT
|
|
4
|
-
from .base import BaseDatabaseAdapter, Engine
|
|
5
|
-
from ..models import DataReference, ObjectType
|
|
6
8
|
from ..exceptions import QueryExecutionError
|
|
7
|
-
from json import dumps
|
|
8
|
-
|
|
9
9
|
from ..logger import app_logger
|
|
10
|
-
import
|
|
11
|
-
|
|
12
|
-
class PostgresAdapter(BaseDatabaseAdapter):
|
|
10
|
+
from ..models import DataReference, ObjectType
|
|
11
|
+
from .base import BaseDatabaseAdapter, Engine
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
class PostgresAdapter(BaseDatabaseAdapter):
|
|
15
|
+
def _execute_query(
|
|
16
|
+
self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str
|
|
17
|
+
) -> pd.DataFrame:
|
|
16
18
|
|
|
17
19
|
df = None
|
|
18
20
|
tz_set = None
|
|
@@ -36,14 +38,15 @@ class PostgresAdapter(BaseDatabaseAdapter):
|
|
|
36
38
|
app_logger.info(f'query\n {query}')
|
|
37
39
|
df = pd.read_sql(query, engine)
|
|
38
40
|
execution_time = time.time() - start_time
|
|
39
|
-
app_logger.info(f
|
|
41
|
+
app_logger.info(f'Query executed in {execution_time:.2f}s')
|
|
40
42
|
app_logger.info('complete')
|
|
41
43
|
return df
|
|
42
44
|
except Exception as e:
|
|
43
45
|
execution_time = time.time() - start_time
|
|
44
|
-
app_logger.error(
|
|
45
|
-
|
|
46
|
-
|
|
46
|
+
app_logger.error(
|
|
47
|
+
f'Query execution failed after {execution_time:.2f}s: {str(e)}'
|
|
48
|
+
)
|
|
49
|
+
raise QueryExecutionError(f'Query failed: {str(e)}')
|
|
47
50
|
|
|
48
51
|
def get_object_type(self, data_ref: DataReference, engine: Engine) -> ObjectType:
|
|
49
52
|
"""Determine if object is table, view, or materialized view"""
|
|
@@ -69,10 +72,12 @@ class PostgresAdapter(BaseDatabaseAdapter):
|
|
|
69
72
|
return {
|
|
70
73
|
'table': ObjectType.TABLE,
|
|
71
74
|
'view': ObjectType.VIEW,
|
|
72
|
-
'materialized_view': ObjectType.MATERIALIZED_VIEW
|
|
75
|
+
'materialized_view': ObjectType.MATERIALIZED_VIEW,
|
|
73
76
|
}.get(type_str, ObjectType.UNKNOWN)
|
|
74
77
|
except Exception as e:
|
|
75
|
-
app_logger.warning(
|
|
78
|
+
app_logger.warning(
|
|
79
|
+
f'Could not determine object type for {data_ref.full_name}: {str(e)}'
|
|
80
|
+
)
|
|
76
81
|
|
|
77
82
|
return ObjectType.UNKNOWN
|
|
78
83
|
|
|
@@ -110,9 +115,13 @@ class PostgresAdapter(BaseDatabaseAdapter):
|
|
|
110
115
|
params = {'schema': data_ref.schema, 'table': data_ref.name}
|
|
111
116
|
return query, params
|
|
112
117
|
|
|
113
|
-
def build_count_query(
|
|
114
|
-
|
|
115
|
-
|
|
118
|
+
def build_count_query(
|
|
119
|
+
self,
|
|
120
|
+
data_ref: DataReference,
|
|
121
|
+
date_column: str,
|
|
122
|
+
start_date: Optional[str],
|
|
123
|
+
end_date: Optional[str],
|
|
124
|
+
) -> Tuple[str, Dict]:
|
|
116
125
|
query = f"""
|
|
117
126
|
SELECT
|
|
118
127
|
to_char(date_trunc('day', {date_column}),'YYYY-MM-DD') as dt,
|
|
@@ -131,14 +140,20 @@ class PostgresAdapter(BaseDatabaseAdapter):
|
|
|
131
140
|
query += f" GROUP BY to_char(date_trunc('day', {date_column}),'YYYY-MM-DD') ORDER BY dt DESC"
|
|
132
141
|
return query, params
|
|
133
142
|
|
|
134
|
-
def build_data_query(
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
143
|
+
def build_data_query(
|
|
144
|
+
self,
|
|
145
|
+
data_ref: DataReference,
|
|
146
|
+
columns: List[str],
|
|
147
|
+
date_column: Optional[str],
|
|
148
|
+
update_column: str,
|
|
149
|
+
start_date: Optional[str],
|
|
150
|
+
end_date: Optional[str],
|
|
151
|
+
exclude_recent_hours: Optional[int] = None,
|
|
152
|
+
) -> Tuple[str, Dict]:
|
|
138
153
|
|
|
139
154
|
params = {}
|
|
140
155
|
# Add recent data exclusion flag
|
|
141
|
-
exclusion_condition,
|
|
156
|
+
exclusion_condition, exclusion_params = self._build_exclusion_condition(
|
|
142
157
|
update_column, exclude_recent_hours
|
|
143
158
|
)
|
|
144
159
|
|
|
@@ -160,26 +175,43 @@ class PostgresAdapter(BaseDatabaseAdapter):
|
|
|
160
175
|
|
|
161
176
|
return query, params
|
|
162
177
|
|
|
163
|
-
def _build_exclusion_condition(
|
|
164
|
-
|
|
178
|
+
def _build_exclusion_condition(
|
|
179
|
+
self, update_column: str, exclude_recent_hours: int
|
|
180
|
+
) -> Tuple[str, Dict]:
|
|
165
181
|
"""PostgreSQL-specific implementation for recent data exclusion"""
|
|
166
|
-
if
|
|
167
|
-
|
|
168
|
-
|
|
182
|
+
if update_column and exclude_recent_hours:
|
|
169
183
|
exclude_recent_hours = exclude_recent_hours
|
|
170
184
|
|
|
171
185
|
condition = f"""case when {update_column} > (now() - INTERVAL '%(exclude_recent_hours)s hours') then 'y' end as xrecently_changed"""
|
|
172
|
-
params = {'exclude_recent_hours':
|
|
186
|
+
params = {'exclude_recent_hours': exclude_recent_hours}
|
|
173
187
|
return condition, params
|
|
174
188
|
|
|
175
189
|
return None, None
|
|
176
190
|
|
|
177
191
|
def _get_type_conversion_rules(self, timezone) -> Dict[str, Callable]:
|
|
178
192
|
return {
|
|
179
|
-
r'date': lambda x:
|
|
193
|
+
r'date': lambda x: (
|
|
194
|
+
pd.to_datetime(x, errors='coerce')
|
|
195
|
+
.dt.strftime(DATETIME_FORMAT)
|
|
196
|
+
.str.replace(r'\s00:00:00$', '', regex=True)
|
|
197
|
+
),
|
|
180
198
|
r'boolean': lambda x: x.map({True: '1', False: '0', None: ''}),
|
|
181
|
-
r'timestamptz|timestamp.*\bwith\b.*time\szone': lambda x:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
199
|
+
r'timestamptz|timestamp.*\bwith\b.*time\szone': lambda x: (
|
|
200
|
+
pd.to_datetime(x, utc=True, errors='coerce')
|
|
201
|
+
.dt.tz_convert(timezone)
|
|
202
|
+
.dt.tz_localize(None)
|
|
203
|
+
.dt.strftime(DATETIME_FORMAT)
|
|
204
|
+
.str.replace(r'\s00:00:00$', '', regex=True)
|
|
205
|
+
),
|
|
206
|
+
r'timestamp': lambda x: (
|
|
207
|
+
pd.to_datetime(x, errors='coerce')
|
|
208
|
+
.dt.strftime(DATETIME_FORMAT)
|
|
209
|
+
.str.replace(r'\s00:00:00$', '', regex=True)
|
|
210
|
+
),
|
|
211
|
+
r'integer|numeric|double|float|double precision|real': lambda x: x.astype(
|
|
212
|
+
str
|
|
213
|
+
).str.replace(r'\.0+$', '', regex=True),
|
|
214
|
+
r'json': lambda x: (
|
|
215
|
+
'"' + x.astype(str).str.replace(r'"', '\\"', regex=True) + '"'
|
|
216
|
+
),
|
|
217
|
+
}
|
xoverrr/constants.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# Date and time formats
|
|
2
|
-
DATE_FORMAT =
|
|
3
|
-
DATETIME_FORMAT = f
|
|
2
|
+
DATE_FORMAT = '%Y-%m-%d'
|
|
3
|
+
DATETIME_FORMAT = f'{DATE_FORMAT} %H:%M:%S'
|
|
4
4
|
|
|
5
5
|
# Default values
|
|
6
|
-
NULL_REPLACEMENT =
|
|
6
|
+
NULL_REPLACEMENT = 'N/A'
|
|
7
7
|
DEFAULT_MAX_EXAMPLES = 3
|
|
8
8
|
DEFAULT_MAX_SAMPLE_SIZE_GB = 3 # Max size of dataframe to compare
|
|
9
9
|
|
|
@@ -15,4 +15,4 @@ DEFAULT_TZ = 'UTC'
|
|
|
15
15
|
# Comparison result statuses
|
|
16
16
|
COMPARISON_SUCCESS = 'success'
|
|
17
17
|
COMPARISON_FAILED = 'failed'
|
|
18
|
-
COMPARISON_SKIPPED = 'skipped'
|
|
18
|
+
COMPARISON_SKIPPED = 'skipped'
|