xoverrr 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,20 @@
1
+ import time
2
+ from json import dumps
3
+ from typing import Callable, Dict, List, Optional, Tuple, Union
4
+
1
5
  import pandas as pd
2
- from typing import Optional, Dict, Callable, List, Tuple, Union
6
+
3
7
  from ..constants import DATETIME_FORMAT
4
- from .base import BaseDatabaseAdapter, Engine
5
- from ..models import DataReference, ObjectType
6
8
  from ..exceptions import QueryExecutionError
7
- from json import dumps
8
-
9
9
  from ..logger import app_logger
10
- import time
11
-
12
- class PostgresAdapter(BaseDatabaseAdapter):
10
+ from ..models import DataReference, ObjectType
11
+ from .base import BaseDatabaseAdapter, Engine
13
12
 
14
13
 
15
- def _execute_query(self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str) -> pd.DataFrame:
14
+ class PostgresAdapter(BaseDatabaseAdapter):
15
+ def _execute_query(
16
+ self, query: Union[str, Tuple[str, Dict]], engine: Engine, timezone: str
17
+ ) -> pd.DataFrame:
16
18
 
17
19
  df = None
18
20
  tz_set = None
@@ -36,14 +38,15 @@ class PostgresAdapter(BaseDatabaseAdapter):
36
38
  app_logger.info(f'query\n {query}')
37
39
  df = pd.read_sql(query, engine)
38
40
  execution_time = time.time() - start_time
39
- app_logger.info(f"Query executed in {execution_time:.2f}s")
41
+ app_logger.info(f'Query executed in {execution_time:.2f}s')
40
42
  app_logger.info('complete')
41
43
  return df
42
44
  except Exception as e:
43
45
  execution_time = time.time() - start_time
44
- app_logger.error(f"Query execution failed after {execution_time:.2f}s: {str(e)}")
45
- raise QueryExecutionError(f"Query failed: {str(e)}")
46
-
46
+ app_logger.error(
47
+ f'Query execution failed after {execution_time:.2f}s: {str(e)}'
48
+ )
49
+ raise QueryExecutionError(f'Query failed: {str(e)}')
47
50
 
48
51
  def get_object_type(self, data_ref: DataReference, engine: Engine) -> ObjectType:
49
52
  """Determine if object is table, view, or materialized view"""
@@ -69,10 +72,12 @@ class PostgresAdapter(BaseDatabaseAdapter):
69
72
  return {
70
73
  'table': ObjectType.TABLE,
71
74
  'view': ObjectType.VIEW,
72
- 'materialized_view': ObjectType.MATERIALIZED_VIEW
75
+ 'materialized_view': ObjectType.MATERIALIZED_VIEW,
73
76
  }.get(type_str, ObjectType.UNKNOWN)
74
77
  except Exception as e:
75
- app_logger.warning(f"Could not determine object type for {data_ref.full_name}: {str(e)}")
78
+ app_logger.warning(
79
+ f'Could not determine object type for {data_ref.full_name}: {str(e)}'
80
+ )
76
81
 
77
82
  return ObjectType.UNKNOWN
78
83
 
@@ -110,9 +115,13 @@ class PostgresAdapter(BaseDatabaseAdapter):
110
115
  params = {'schema': data_ref.schema, 'table': data_ref.name}
111
116
  return query, params
112
117
 
113
- def build_count_query(self, data_ref: DataReference, date_column: str,
114
- start_date: Optional[str], end_date: Optional[str]
115
- ) -> Tuple[str, Dict]:
118
+ def build_count_query(
119
+ self,
120
+ data_ref: DataReference,
121
+ date_column: str,
122
+ start_date: Optional[str],
123
+ end_date: Optional[str],
124
+ ) -> Tuple[str, Dict]:
116
125
  query = f"""
117
126
  SELECT
118
127
  to_char(date_trunc('day', {date_column}),'YYYY-MM-DD') as dt,
@@ -131,14 +140,20 @@ class PostgresAdapter(BaseDatabaseAdapter):
131
140
  query += f" GROUP BY to_char(date_trunc('day', {date_column}),'YYYY-MM-DD') ORDER BY dt DESC"
132
141
  return query, params
133
142
 
134
- def build_data_query(self, data_ref: DataReference, columns: List[str],
135
- date_column: Optional[str], update_column: str,
136
- start_date: Optional[str], end_date: Optional[str],
137
- exclude_recent_hours: Optional[int] = None) -> Tuple[str, Dict]:
143
+ def build_data_query(
144
+ self,
145
+ data_ref: DataReference,
146
+ columns: List[str],
147
+ date_column: Optional[str],
148
+ update_column: str,
149
+ start_date: Optional[str],
150
+ end_date: Optional[str],
151
+ exclude_recent_hours: Optional[int] = None,
152
+ ) -> Tuple[str, Dict]:
138
153
 
139
154
  params = {}
140
155
  # Add recent data exclusion flag
141
- exclusion_condition, exclusion_params = self._build_exclusion_condition(
156
+ exclusion_condition, exclusion_params = self._build_exclusion_condition(
142
157
  update_column, exclude_recent_hours
143
158
  )
144
159
 
@@ -160,26 +175,43 @@ class PostgresAdapter(BaseDatabaseAdapter):
160
175
 
161
176
  return query, params
162
177
 
163
- def _build_exclusion_condition(self, update_column: str,
164
- exclude_recent_hours: int) -> Tuple[str, Dict]:
178
+ def _build_exclusion_condition(
179
+ self, update_column: str, exclude_recent_hours: int
180
+ ) -> Tuple[str, Dict]:
165
181
  """PostgreSQL-specific implementation for recent data exclusion"""
166
- if update_column and exclude_recent_hours:
167
-
168
-
182
+ if update_column and exclude_recent_hours:
169
183
  exclude_recent_hours = exclude_recent_hours
170
184
 
171
185
  condition = f"""case when {update_column} > (now() - INTERVAL '%(exclude_recent_hours)s hours') then 'y' end as xrecently_changed"""
172
- params = {'exclude_recent_hours': exclude_recent_hours}
186
+ params = {'exclude_recent_hours': exclude_recent_hours}
173
187
  return condition, params
174
188
 
175
189
  return None, None
176
190
 
177
191
  def _get_type_conversion_rules(self, timezone) -> Dict[str, Callable]:
178
192
  return {
179
- r'date': lambda x: pd.to_datetime(x, errors='coerce').dt.strftime(DATETIME_FORMAT).str.replace(r'\s00:00:00$', '', regex=True),
193
+ r'date': lambda x: (
194
+ pd.to_datetime(x, errors='coerce')
195
+ .dt.strftime(DATETIME_FORMAT)
196
+ .str.replace(r'\s00:00:00$', '', regex=True)
197
+ ),
180
198
  r'boolean': lambda x: x.map({True: '1', False: '0', None: ''}),
181
- r'timestamptz|timestamp.*\bwith\b.*time\szone': lambda x: pd.to_datetime(x, utc=True, errors='coerce').dt.tz_convert(timezone).dt.tz_localize(None).dt.strftime(DATETIME_FORMAT).str.replace(r'\s00:00:00$', '', regex=True),
182
- r'timestamp': lambda x: pd.to_datetime(x, errors='coerce').dt.strftime(DATETIME_FORMAT).str.replace(r'\s00:00:00$', '', regex=True),
183
- r'integer|numeric|double|float|double precision|real': lambda x: x.astype(str).str.replace(r'\.0+$', '', regex=True),
184
- r'json': lambda x: '"' + x.astype(str).str.replace(r'"', '\\"', regex=True) + '"',
185
- }
199
+ r'timestamptz|timestamp.*\bwith\b.*time\szone': lambda x: (
200
+ pd.to_datetime(x, utc=True, errors='coerce')
201
+ .dt.tz_convert(timezone)
202
+ .dt.tz_localize(None)
203
+ .dt.strftime(DATETIME_FORMAT)
204
+ .str.replace(r'\s00:00:00$', '', regex=True)
205
+ ),
206
+ r'timestamp': lambda x: (
207
+ pd.to_datetime(x, errors='coerce')
208
+ .dt.strftime(DATETIME_FORMAT)
209
+ .str.replace(r'\s00:00:00$', '', regex=True)
210
+ ),
211
+ r'integer|numeric|double|float|double precision|real': lambda x: x.astype(
212
+ str
213
+ ).str.replace(r'\.0+$', '', regex=True),
214
+ r'json': lambda x: (
215
+ '"' + x.astype(str).str.replace(r'"', '\\"', regex=True) + '"'
216
+ ),
217
+ }
xoverrr/constants.py CHANGED
@@ -1,9 +1,9 @@
1
1
  # Date and time formats
2
- DATE_FORMAT = "%Y-%m-%d"
3
- DATETIME_FORMAT = f"{DATE_FORMAT} %H:%M:%S"
2
+ DATE_FORMAT = '%Y-%m-%d'
3
+ DATETIME_FORMAT = f'{DATE_FORMAT} %H:%M:%S'
4
4
 
5
5
  # Default values
6
- NULL_REPLACEMENT = "N/A"
6
+ NULL_REPLACEMENT = 'N/A'
7
7
  DEFAULT_MAX_EXAMPLES = 3
8
8
  DEFAULT_MAX_SAMPLE_SIZE_GB = 3 # Max size of dataframe to compare
9
9
 
@@ -15,4 +15,4 @@ DEFAULT_TZ = 'UTC'
15
15
  # Comparison result statuses
16
16
  COMPARISON_SUCCESS = 'success'
17
17
  COMPARISON_FAILED = 'failed'
18
- COMPARISON_SKIPPED = 'skipped'
18
+ COMPARISON_SKIPPED = 'skipped'