xoverrr 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xoverrr/__init__.py +8 -12
- xoverrr/adapters/__init__.py +7 -2
- xoverrr/adapters/base.py +61 -32
- xoverrr/adapters/clickhouse.py +64 -35
- xoverrr/adapters/oracle.py +67 -38
- xoverrr/adapters/postgres.py +67 -35
- xoverrr/constants.py +4 -4
- xoverrr/core.py +299 -197
- xoverrr/exceptions.py +8 -1
- xoverrr/logger.py +4 -2
- xoverrr/models.py +11 -5
- xoverrr/utils.py +331 -259
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/METADATA +67 -71
- xoverrr-1.1.6.dist-info/RECORD +17 -0
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/WHEEL +1 -1
- xoverrr-1.1.4.dist-info/RECORD +0 -17
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {xoverrr-1.1.4.dist-info → xoverrr-1.1.6.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xoverrr
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.6
|
|
4
4
|
Summary: A tool for cross-database and intra-source data comparison with detailed discrepancy analysis and reporting.
|
|
5
5
|
Author-email: Dmitry Ischenko <hotmori@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -21,7 +21,7 @@ Requires-Dist: clickhouse-sqlalchemy>=0.2.0
|
|
|
21
21
|
Provides-Extra: dev
|
|
22
22
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
23
23
|
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
24
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: ruff>=0.15.0; extra == "dev"
|
|
25
25
|
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
26
26
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
27
27
|
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
@@ -31,7 +31,7 @@ Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
|
31
31
|
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
32
32
|
Requires-Dist: tenacity>=8.2.0; extra == "test"
|
|
33
33
|
Provides-Extra: lint
|
|
34
|
-
Requires-Dist:
|
|
34
|
+
Requires-Dist: ruff>=0.15.0; extra == "lint"
|
|
35
35
|
Requires-Dist: isort>=5.12.0; extra == "lint"
|
|
36
36
|
Requires-Dist: flake8>=6.0.0; extra == "lint"
|
|
37
37
|
Dynamic: license-file
|
|
@@ -40,6 +40,70 @@ Dynamic: license-file
|
|
|
40
40
|
|
|
41
41
|
A tool for cross-database and intra-source data comparison with detailed discrepancy analysis and reporting.
|
|
42
42
|
|
|
43
|
+
## Usage Example
|
|
44
|
+
**Sample comparison** (Greenplum vs Oracle):
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from xoverrr import DataQualityComparator, DataReference, COMPARISON_SUCCESS, COMPARISON_FAILED, COMPARISON_SKIPPED
|
|
48
|
+
import os
|
|
49
|
+
from datetime import date, timedelta
|
|
50
|
+
|
|
51
|
+
USER_ORA = os.getenv('USER_ORA', '')
|
|
52
|
+
PASSWORD_ORA = os.getenv('PASSWORD_ORA', '')
|
|
53
|
+
|
|
54
|
+
USER_GP = os.getenv('USER_GP', '')
|
|
55
|
+
PASSWORD_GP = os.getenv('PASSWORD_GP', '')
|
|
56
|
+
|
|
57
|
+
HOST_ORA = os.getenv('HOST_ORA', '')
|
|
58
|
+
HOST_GP = os.getenv('HOST_GP', '')
|
|
59
|
+
|
|
60
|
+
def create_src_engine(user, password, host):
|
|
61
|
+
"""Source engine (Oracle)"""
|
|
62
|
+
os.environ['NLS_LANG'] = '.AL32UTF8'
|
|
63
|
+
return create_engine(f'oracle+oracledb://{user}:{password}@{host}:1521/?service_name=dwh')
|
|
64
|
+
|
|
65
|
+
def create_trg_engine(user, password, host):
|
|
66
|
+
"""Target engine (Postgres/Greenplum)"""
|
|
67
|
+
connection_string = f'postgresql+psycopg2://{user}:{password}@{host}:5432/adb'
|
|
68
|
+
engine = create_engine(connection_string)
|
|
69
|
+
return engine
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
src_engine = create_src_engine(USER_ORA, PASSWORD_ORA, HOST_ORA)
|
|
73
|
+
trg_engine = create_trg_engine(USER_GP, PASSWORD_GP, HOST_GP)
|
|
74
|
+
|
|
75
|
+
comparator = DataQualityComparator(
|
|
76
|
+
source_engine=src_engine,
|
|
77
|
+
target_engine=trg_engine,
|
|
78
|
+
timezone='Europe/Athens'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
source = DataReference("users", "schema1")
|
|
82
|
+
target = DataReference("users", "schema2")
|
|
83
|
+
|
|
84
|
+
FORMAT = '%Y-%m-%d'
|
|
85
|
+
recent_range_end = date.today()
|
|
86
|
+
recent_range_begin = recent_range_end - timedelta(days=1)
|
|
87
|
+
|
|
88
|
+
status, report, stats, details = comparator.compare_sample(
|
|
89
|
+
source,
|
|
90
|
+
target,
|
|
91
|
+
date_column="created_at",
|
|
92
|
+
update_column="modified_date",
|
|
93
|
+
exclude_columns=["audit_timestamp", "internal_id"],
|
|
94
|
+
exclude_recent_hours=3,
|
|
95
|
+
date_range=(
|
|
96
|
+
recent_range_begin.strftime(FORMAT),
|
|
97
|
+
recent_range_end.strftime(FORMAT)
|
|
98
|
+
),
|
|
99
|
+
tolerance_percentage=0
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
print(report)
|
|
103
|
+
if status == COMPARISON_FAILED:
|
|
104
|
+
raise Exception("Sample check failed")
|
|
105
|
+
```
|
|
106
|
+
|
|
43
107
|
## Key Features
|
|
44
108
|
- **Multi‑DBMS support**: Oracle, PostgreSQL (+ Greenplum), ClickHouse (extensible via adapter layer) — tables and views.
|
|
45
109
|
- **Universal connections**: Provide SQLAlchemy Engine objects for source and target databases.
|
|
@@ -273,71 +337,3 @@ Logs include timing information and structured context:
|
|
|
273
337
|
- If `final_diff_score ≤ tolerance`: status = `COMPARISON_SUCCESS`
|
|
274
338
|
- Enables configuration of acceptable discrepancy levels.
|
|
275
339
|
|
|
276
|
-
---
|
|
277
|
-
|
|
278
|
-
## Usage Example
|
|
279
|
-
**Sample comparison** (Greenplum vs Oracle):
|
|
280
|
-
|
|
281
|
-
```python
|
|
282
|
-
from xoverrr import DataQualityComparator, DataReference, COMPARISON_SUCCESS, COMPARISON_FAILED, COMPARISON_SKIPPED
|
|
283
|
-
import os
|
|
284
|
-
from datetime import date, timedelta
|
|
285
|
-
|
|
286
|
-
USER_ORA = os.getenv('USER_ORA', '')
|
|
287
|
-
PASSWORD_ORA = os.getenv('PASSWORD_ORA', '')
|
|
288
|
-
|
|
289
|
-
USER_GP = os.getenv('USER_GP', '')
|
|
290
|
-
PASSWORD_GP = os.getenv('PASSWORD_GP', '')
|
|
291
|
-
|
|
292
|
-
HOST = os.getenv('HOST', '')
|
|
293
|
-
|
|
294
|
-
def create_src_engine(user, password, host):
|
|
295
|
-
"""Source engine (Oracle)"""
|
|
296
|
-
os.environ['NLS_LANG'] = '.AL32UTF8'
|
|
297
|
-
return create_engine(f'oracle+oracledb://{user}:{password}@{host}:1521/?service_name=dwh')
|
|
298
|
-
|
|
299
|
-
def create_trg_engine(user, password, host):
|
|
300
|
-
"""Target engine (Postgres/Greenplum)"""
|
|
301
|
-
connection_string = f'postgresql+psycopg2://{user}:{password}@{host}:5432/adb'
|
|
302
|
-
engine = create_engine(connection_string)
|
|
303
|
-
return engine
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
src_engine = create_src_engine(USER_ORA, PASSWORD_ORA, HOST)
|
|
308
|
-
trg_engine = create_trg_engine(USER_GP, PASSWORD_GP, HOST)
|
|
309
|
-
|
|
310
|
-
comparator = DataQualityComparator(
|
|
311
|
-
source_engine=src_engine,
|
|
312
|
-
target_engine=trg_engine,
|
|
313
|
-
timezone='Asia/Yekaterinburg'
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
source = DataReference("users", "schema1")
|
|
317
|
-
target = DataReference("users", "schema2")
|
|
318
|
-
|
|
319
|
-
FORMAT = '%Y-%m-%d'
|
|
320
|
-
recent_range_end = date.today()
|
|
321
|
-
recent_range_begin = recent_range_end - timedelta(days=1)
|
|
322
|
-
|
|
323
|
-
status, report, stats, details = comparator.compare_sample(
|
|
324
|
-
source,
|
|
325
|
-
target,
|
|
326
|
-
date_column="created_at",
|
|
327
|
-
update_column="modified_date",
|
|
328
|
-
exclude_columns=["audit_timestamp", "internal_id"],
|
|
329
|
-
exclude_recent_hours=24,
|
|
330
|
-
date_range=(
|
|
331
|
-
recent_range_begin.strftime(FORMAT),
|
|
332
|
-
recent_range_end.strftime(FORMAT)
|
|
333
|
-
),
|
|
334
|
-
tolerance_percentage=0
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
print(report)
|
|
338
|
-
if status == COMPARISON_FAILED:
|
|
339
|
-
raise Exception("Sample check failed")
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
---
|
|
343
|
-
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
xoverrr/__init__.py,sha256=Qf-FSLLF6gTMZmY67VOdU9SbQ6xI_Q-udzlbe3joEcU,327
|
|
2
|
+
xoverrr/constants.py,sha256=1YHnBcVC9FSKrPUtM7qpviR1B8MZBbXcMOnekFFqX8E,462
|
|
3
|
+
xoverrr/core.py,sha256=f001o5AANHzEYsc9cGS531mrgaopMzYElisOUWBjQGQ,26974
|
|
4
|
+
xoverrr/exceptions.py,sha256=KZ0MDXJ0A2pUebhac_olpYhR3257VCMPlxLTua9WU_Q,445
|
|
5
|
+
xoverrr/logger.py,sha256=c-D-RQFqjagY2Hq39pp9WhNCtmwwpES9O6wuqr-o-oc,368
|
|
6
|
+
xoverrr/models.py,sha256=44KHAkuKiip55uaEEQz14w_yaiFo3Rg5qe-IN17Bf3g,1598
|
|
7
|
+
xoverrr/utils.py,sha256=6laDs4GCaAxfO88BbKtwxIpWLvqT2OMt85zAWahfWc4,25552
|
|
8
|
+
xoverrr/adapters/__init__.py,sha256=dXNvTuvLHhZdUKZMYmuLdkzg6F6VQJage8d2FJvLyHQ,263
|
|
9
|
+
xoverrr/adapters/base.py,sha256=djyulx6TCvODE2vzQkX_9gvBwUGe9nt9bourklSEJJU,4930
|
|
10
|
+
xoverrr/adapters/clickhouse.py,sha256=KqnKmxCiLBaQO5JOl-J9NjTcTvHdHmZlqCKgfRnYGfk,7116
|
|
11
|
+
xoverrr/adapters/oracle.py,sha256=xlbtWQudBItfPtFtQZ233k0xNaDI6cCjNcMJjRbWRQo,8936
|
|
12
|
+
xoverrr/adapters/postgres.py,sha256=hB6iCZblb06Q0uEbSf6gk1vdk47UYpD2Eteom_pDhlQ,8224
|
|
13
|
+
xoverrr-1.1.6.dist-info/licenses/LICENSE,sha256=ez2uHRKRleWvcvGHEm4ei4qwyfMc1Tf857fZ9zuXsuo,1074
|
|
14
|
+
xoverrr-1.1.6.dist-info/METADATA,sha256=PEXVChdMmB7Ic0Mu605jBXDIZQxwyVk3zGNSk3J63Xw,13896
|
|
15
|
+
xoverrr-1.1.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
+
xoverrr-1.1.6.dist-info/top_level.txt,sha256=vDgDuhkwThKcnQ_CN76iR4tMljtryLGZiftc0Y6-MYI,8
|
|
17
|
+
xoverrr-1.1.6.dist-info/RECORD,,
|
xoverrr-1.1.4.dist-info/RECORD
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
xoverrr/__init__.py,sha256=x3OQ1psbjmcYpS1u1MZG9fkRAQ92CqipILQDxJJs8f0,319
|
|
2
|
-
xoverrr/constants.py,sha256=jiZ3PnI3lkkmwucqqs56kBKIBk1xWnU86_d9c-CfiAU,461
|
|
3
|
-
xoverrr/core.py,sha256=wgREeXprpqGnmuzmnckp1Q6t2OagL58G9cinfPQ1Kc0,26242
|
|
4
|
-
xoverrr/exceptions.py,sha256=wLnhWb5blFHeNh7l63tohHlVT7WVhLYSF6Guu79RFRM,437
|
|
5
|
-
xoverrr/logger.py,sha256=qROmezBVEmWT2XhWrhK0HtEMwbVsp-_w2QroeekuuOQ,361
|
|
6
|
-
xoverrr/models.py,sha256=MDeTYQEz4QnZZwJXet16hvkoAUWVayIdgn77g-ZuZ10,1591
|
|
7
|
-
xoverrr/utils.py,sha256=rai75punqHIHWVr4quRwvI9feyzxy11GdtSKEGD90JU,25414
|
|
8
|
-
xoverrr/adapters/__init__.py,sha256=83nYToYrZW_ddyP1VEPZefB-0GrrWXYaVz_0955us5Y,243
|
|
9
|
-
xoverrr/adapters/base.py,sha256=5vCDXhQ2hRXxikDu6ZoVvI_mCC6CRxi9Z_bI_jBbPIA,4934
|
|
10
|
-
xoverrr/adapters/clickhouse.py,sha256=Cn4mXbZ5Fl0ADXW_6nW50LEjcjlf-dLo7zqDPjNCefE,7041
|
|
11
|
-
xoverrr/adapters/oracle.py,sha256=1Jaw15cPmG7odJe-BJ_NRs1oFI1VVOAQBTR6JDwrSdw,8657
|
|
12
|
-
xoverrr/adapters/postgres.py,sha256=TQThA3Np3wo-WdWoUOveQfCUcIDxmuz_RCJDUo5Yph4,7888
|
|
13
|
-
xoverrr-1.1.4.dist-info/licenses/LICENSE,sha256=ez2uHRKRleWvcvGHEm4ei4qwyfMc1Tf857fZ9zuXsuo,1074
|
|
14
|
-
xoverrr-1.1.4.dist-info/METADATA,sha256=m1mBdUEoo0d3xJGWpm9vQpgUrJfCUfTVsDZNia0RMJs,13865
|
|
15
|
-
xoverrr-1.1.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
16
|
-
xoverrr-1.1.4.dist-info/top_level.txt,sha256=vDgDuhkwThKcnQ_CN76iR4tMljtryLGZiftc0Y6-MYI,8
|
|
17
|
-
xoverrr-1.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|