soda-postgres 4.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soda_postgres/common/data_sources/postgres_data_source.py +389 -0
- soda_postgres/common/data_sources/postgres_data_source_connection.py +116 -0
- soda_postgres/statements/postgres_metadata_tables_query.py +98 -0
- soda_postgres/test_helpers/postgres_data_source_test_helper.py +26 -0
- soda_postgres-4.0.5.dist-info/METADATA +6 -0
- soda_postgres-4.0.5.dist-info/RECORD +9 -0
- soda_postgres-4.0.5.dist-info/WHEEL +5 -0
- soda_postgres-4.0.5.dist-info/entry_points.txt +2 -0
- soda_postgres-4.0.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from soda_core.common.data_source_connection import DataSourceConnection
|
|
5
|
+
from soda_core.common.data_source_impl import DataSourceImpl
|
|
6
|
+
from soda_core.common.logging_constants import soda_logger
|
|
7
|
+
from soda_core.common.metadata_types import (
|
|
8
|
+
DataSourceNamespace,
|
|
9
|
+
SodaDataTypeName,
|
|
10
|
+
SqlDataType,
|
|
11
|
+
)
|
|
12
|
+
from soda_core.common.sql_ast import (
|
|
13
|
+
AND,
|
|
14
|
+
CAST,
|
|
15
|
+
COLUMN,
|
|
16
|
+
CREATE_TABLE_COLUMN,
|
|
17
|
+
EQ,
|
|
18
|
+
FROM,
|
|
19
|
+
GT,
|
|
20
|
+
IN,
|
|
21
|
+
JOIN,
|
|
22
|
+
LEFT_INNER_JOIN,
|
|
23
|
+
LITERAL,
|
|
24
|
+
LOWER,
|
|
25
|
+
ORDER_BY_ASC,
|
|
26
|
+
RAW_SQL,
|
|
27
|
+
REGEX_LIKE,
|
|
28
|
+
SELECT,
|
|
29
|
+
WHERE,
|
|
30
|
+
)
|
|
31
|
+
from soda_core.common.sql_dialect import SqlDialect
|
|
32
|
+
from soda_core.common.statements.metadata_tables_query import MetadataTablesQuery
|
|
33
|
+
from soda_postgres.common.data_sources.postgres_data_source_connection import (
|
|
34
|
+
PostgresDataSource as PostgresDataSourceModel,
|
|
35
|
+
)
|
|
36
|
+
from soda_postgres.common.data_sources.postgres_data_source_connection import (
|
|
37
|
+
PostgresDataSourceConnection,
|
|
38
|
+
)
|
|
39
|
+
from soda_postgres.statements.postgres_metadata_tables_query import (
|
|
40
|
+
PostgresMetadataTablesQuery,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
logger: Logger = soda_logger
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
PG_TIMESTAMP_WITH_TIME_ZONE = "timestamp with time zone"
|
|
47
|
+
PG_TIMESTAMP_WITHOUT_TIME_ZONE = "timestamp without time zone"
|
|
48
|
+
PG_DOUBLE_PRECISION = "double precision"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PostgresDataSourceImpl(DataSourceImpl, model_class=PostgresDataSourceModel):
|
|
52
|
+
def __init__(self, data_source_model: PostgresDataSourceModel, connection: Optional[DataSourceConnection] = None):
|
|
53
|
+
super().__init__(data_source_model=data_source_model, connection=connection)
|
|
54
|
+
|
|
55
|
+
def _create_sql_dialect(self) -> SqlDialect:
|
|
56
|
+
return PostgresSqlDialect(data_source_impl=self)
|
|
57
|
+
|
|
58
|
+
def _create_data_source_connection(self) -> DataSourceConnection:
|
|
59
|
+
return PostgresDataSourceConnection(
|
|
60
|
+
name=self.data_source_model.name, connection_properties=self.data_source_model.connection_properties
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def create_metadata_tables_query(self) -> MetadataTablesQuery:
|
|
64
|
+
return PostgresMetadataTablesQuery(
|
|
65
|
+
sql_dialect=self.sql_dialect, data_source_connection=self.data_source_connection
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class PostgresSqlDataType(SqlDataType):
|
|
70
|
+
def get_sql_data_type_str_with_parameters(self) -> str:
|
|
71
|
+
if isinstance(self.datetime_precision, int) and self.name == PG_TIMESTAMP_WITH_TIME_ZONE:
|
|
72
|
+
return f"timestamp({self.datetime_precision}) with time zone"
|
|
73
|
+
elif isinstance(self.datetime_precision, int) and self.name == PG_TIMESTAMP_WITHOUT_TIME_ZONE:
|
|
74
|
+
return f"timestamp({self.datetime_precision}) without time zone"
|
|
75
|
+
return super().get_sql_data_type_str_with_parameters()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PostgresSqlDialect(SqlDialect):
|
|
79
|
+
SODA_DATA_TYPE_SYNONYMS = (
|
|
80
|
+
(SodaDataTypeName.NUMERIC, SodaDataTypeName.DECIMAL),
|
|
81
|
+
(SodaDataTypeName.DOUBLE, SodaDataTypeName.FLOAT),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def supports_materialized_views(self) -> bool:
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
def _build_regex_like_sql(self, matches: REGEX_LIKE) -> str:
|
|
88
|
+
expression: str = self.build_expression_sql(matches.expression)
|
|
89
|
+
return f"{expression} ~ '{matches.regex_pattern}'"
|
|
90
|
+
|
|
91
|
+
def create_schema_if_not_exists_sql(self, prefixes: list[str], add_semicolon: bool = True) -> str:
|
|
92
|
+
return (
|
|
93
|
+
f"{super().create_schema_if_not_exists_sql(prefixes, add_semicolon=False)} AUTHORIZATION CURRENT_USER"
|
|
94
|
+
+ (";" if add_semicolon else "")
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def get_data_source_data_type_name_by_soda_data_type_names(self) -> dict[SodaDataTypeName, str]:
|
|
98
|
+
return {
|
|
99
|
+
SodaDataTypeName.CHAR: "char",
|
|
100
|
+
SodaDataTypeName.VARCHAR: "varchar",
|
|
101
|
+
SodaDataTypeName.TEXT: "text",
|
|
102
|
+
SodaDataTypeName.SMALLINT: "smallint",
|
|
103
|
+
SodaDataTypeName.INTEGER: "integer",
|
|
104
|
+
SodaDataTypeName.DECIMAL: "decimal",
|
|
105
|
+
SodaDataTypeName.BIGINT: "bigint",
|
|
106
|
+
SodaDataTypeName.NUMERIC: "numeric",
|
|
107
|
+
SodaDataTypeName.DECIMAL: "decimal",
|
|
108
|
+
SodaDataTypeName.FLOAT: "float",
|
|
109
|
+
SodaDataTypeName.DOUBLE: PG_DOUBLE_PRECISION,
|
|
110
|
+
SodaDataTypeName.TIMESTAMP: "timestamp",
|
|
111
|
+
SodaDataTypeName.TIMESTAMP_TZ: "timestamptz",
|
|
112
|
+
SodaDataTypeName.DATE: "date",
|
|
113
|
+
SodaDataTypeName.TIME: "time",
|
|
114
|
+
SodaDataTypeName.BOOLEAN: "boolean",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
def get_soda_data_type_name_by_data_source_data_type_names(self) -> dict[str, SodaDataTypeName]:
|
|
118
|
+
return {
|
|
119
|
+
"character varying": SodaDataTypeName.VARCHAR,
|
|
120
|
+
"varchar": SodaDataTypeName.VARCHAR,
|
|
121
|
+
"character": SodaDataTypeName.CHAR,
|
|
122
|
+
"char": SodaDataTypeName.CHAR,
|
|
123
|
+
"text": SodaDataTypeName.TEXT,
|
|
124
|
+
"bpchar": SodaDataTypeName.TEXT,
|
|
125
|
+
"smallint": SodaDataTypeName.SMALLINT,
|
|
126
|
+
"int2": SodaDataTypeName.SMALLINT,
|
|
127
|
+
"integer": SodaDataTypeName.INTEGER,
|
|
128
|
+
"int": SodaDataTypeName.INTEGER,
|
|
129
|
+
"int4": SodaDataTypeName.INTEGER,
|
|
130
|
+
"bigint": SodaDataTypeName.BIGINT,
|
|
131
|
+
"int8": SodaDataTypeName.BIGINT,
|
|
132
|
+
"decimal": SodaDataTypeName.DECIMAL,
|
|
133
|
+
"numeric": SodaDataTypeName.NUMERIC,
|
|
134
|
+
"float": SodaDataTypeName.FLOAT,
|
|
135
|
+
"real": SodaDataTypeName.FLOAT,
|
|
136
|
+
"float4": SodaDataTypeName.FLOAT,
|
|
137
|
+
PG_DOUBLE_PRECISION: SodaDataTypeName.DOUBLE,
|
|
138
|
+
"float8": SodaDataTypeName.DOUBLE,
|
|
139
|
+
"timestamp": SodaDataTypeName.TIMESTAMP,
|
|
140
|
+
PG_TIMESTAMP_WITHOUT_TIME_ZONE: SodaDataTypeName.TIMESTAMP,
|
|
141
|
+
"timestamptz": SodaDataTypeName.TIMESTAMP_TZ,
|
|
142
|
+
PG_TIMESTAMP_WITH_TIME_ZONE: SodaDataTypeName.TIMESTAMP_TZ,
|
|
143
|
+
"date": SodaDataTypeName.DATE,
|
|
144
|
+
"time": SodaDataTypeName.TIME,
|
|
145
|
+
"time without time zone": SodaDataTypeName.TIME,
|
|
146
|
+
"boolean": SodaDataTypeName.BOOLEAN,
|
|
147
|
+
"bool": SodaDataTypeName.BOOLEAN,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
def _build_cast_sql(self, cast: CAST) -> str:
|
|
151
|
+
to_type_text: str = (
|
|
152
|
+
self.get_data_source_data_type_name_for_soda_data_type_name(cast.to_type)
|
|
153
|
+
if isinstance(cast.to_type, SodaDataTypeName)
|
|
154
|
+
else cast.to_type
|
|
155
|
+
)
|
|
156
|
+
return f"{self.build_expression_sql(cast.expression)}::{to_type_text}"
|
|
157
|
+
|
|
158
|
+
def _get_data_type_name_synonyms(self) -> list[list[str]]:
|
|
159
|
+
return [
|
|
160
|
+
["varchar", "character varying"],
|
|
161
|
+
["char", "character"],
|
|
162
|
+
["integer", "int", "int4"],
|
|
163
|
+
["bigint", "int8"],
|
|
164
|
+
["smallint", "int2"],
|
|
165
|
+
["real", "float4"],
|
|
166
|
+
[PG_DOUBLE_PRECISION, "float8"],
|
|
167
|
+
["timestamp", PG_TIMESTAMP_WITHOUT_TIME_ZONE],
|
|
168
|
+
["decimal", "numeric"],
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
def get_sql_data_type_class(self) -> type:
|
|
172
|
+
return PostgresSqlDataType
|
|
173
|
+
|
|
174
|
+
def _build_create_table_column_type(self, create_table_column: CREATE_TABLE_COLUMN) -> str:
|
|
175
|
+
if create_table_column.type.name == "text": # Do not output text with parameters!
|
|
176
|
+
if create_table_column.type.character_maximum_length is not None:
|
|
177
|
+
logger.warning(
|
|
178
|
+
f"Text column {create_table_column.name} has a character maximum length, but text does not support parameters! Ignoring in postgres."
|
|
179
|
+
)
|
|
180
|
+
return "text"
|
|
181
|
+
return super()._build_create_table_column_type(create_table_column=create_table_column)
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def is_same_soda_data_type_with_synonyms(cls, expected: SodaDataTypeName, actual: SodaDataTypeName) -> bool:
|
|
185
|
+
# Postgres cursor can return bpchar, which is an unbounded synonym for char. We convert that to TEXT as that is the best fit. So we could expect CHAR, but actual is TEXT.
|
|
186
|
+
if expected == SodaDataTypeName.CHAR and actual == SodaDataTypeName.TEXT:
|
|
187
|
+
logger.debug(
|
|
188
|
+
f"In is_same_soda_data_type_with_synonyms, expected {expected} and actual {actual} are treated as the same because of postgres cursor returning BPCHAR (best matching with TEXT)"
|
|
189
|
+
)
|
|
190
|
+
return True
|
|
191
|
+
|
|
192
|
+
return super().is_same_soda_data_type_with_synonyms(expected, actual)
|
|
193
|
+
|
|
194
|
+
###
|
|
195
|
+
# Tables and columns metadata queries
|
|
196
|
+
###
|
|
197
|
+
def _pg_catalog(self) -> str:
|
|
198
|
+
return "pg_catalog"
|
|
199
|
+
|
|
200
|
+
def _pg_class(self) -> str:
|
|
201
|
+
return "pg_class"
|
|
202
|
+
|
|
203
|
+
def _pg_namespace(self) -> str:
|
|
204
|
+
return "pg_namespace"
|
|
205
|
+
|
|
206
|
+
def _current_database(self) -> str:
|
|
207
|
+
return "current_database()"
|
|
208
|
+
|
|
209
|
+
def relkind_table_type_sql_expression(self, table_alias: str = "c", column_alias: str = "table_type") -> str:
|
|
210
|
+
return f"""CASE {table_alias}.relkind
|
|
211
|
+
WHEN 'r' THEN
|
|
212
|
+
CASE {table_alias}.relpersistence
|
|
213
|
+
WHEN 't' THEN 'TEMPORARY TABLE'
|
|
214
|
+
WHEN 'p' THEN 'BASE TABLE'
|
|
215
|
+
WHEN 'u' THEN 'UNLOGGED TABLE'
|
|
216
|
+
END
|
|
217
|
+
WHEN 'v' THEN 'VIEW'
|
|
218
|
+
WHEN 'm' THEN 'MATERIALIZED VIEW'
|
|
219
|
+
WHEN 'i' THEN 'INDEX'
|
|
220
|
+
WHEN 'S' THEN 'SEQUENCE'
|
|
221
|
+
WHEN 't' THEN 'TOAST TABLE'
|
|
222
|
+
WHEN 'f' THEN 'FOREIGN TABLE'
|
|
223
|
+
WHEN 'p' THEN 'PARTITIONED TABLE'
|
|
224
|
+
WHEN 'I' THEN 'PARTITIONED INDEX'
|
|
225
|
+
END as {column_alias}"""
|
|
226
|
+
|
|
227
|
+
def build_columns_metadata_query_str(self, table_namespace: DataSourceNamespace, table_name: str) -> str:
|
|
228
|
+
"""
|
|
229
|
+
Builds the full SQL query to query table names from the data source metadata.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
database_name: str | None = table_namespace.get_database_for_metadata_query()
|
|
233
|
+
schema_name: str = table_namespace.get_schema_for_metadata_query()
|
|
234
|
+
|
|
235
|
+
######
|
|
236
|
+
current_database_expression = RAW_SQL(self._current_database())
|
|
237
|
+
select: list = [
|
|
238
|
+
SELECT(
|
|
239
|
+
[
|
|
240
|
+
COLUMN("attname", table_alias="a", field_alias="column_name"),
|
|
241
|
+
# Normalize data type into information_schema.columns style. Consider doing this in python instead, but this is lightweight and simple enough.
|
|
242
|
+
RAW_SQL(
|
|
243
|
+
"""CASE
|
|
244
|
+
-- arrays
|
|
245
|
+
WHEN t.typcategory = 'A' OR t.typelem <> 0 THEN 'ARRAY'
|
|
246
|
+
|
|
247
|
+
-- choose base type for domains, otherwise the type itself
|
|
248
|
+
ELSE CASE COALESCE(bt.typname, t.typname)
|
|
249
|
+
WHEN 'varchar' THEN 'character varying'
|
|
250
|
+
WHEN 'bpchar' THEN 'character'
|
|
251
|
+
WHEN 'bool' THEN 'boolean'
|
|
252
|
+
WHEN 'int2' THEN 'smallint'
|
|
253
|
+
WHEN 'int4' THEN 'integer'
|
|
254
|
+
WHEN 'int8' THEN 'bigint'
|
|
255
|
+
WHEN 'float4' THEN 'real'
|
|
256
|
+
WHEN 'float8' THEN 'double precision'
|
|
257
|
+
WHEN 'timestamptz' THEN 'timestamp with time zone'
|
|
258
|
+
WHEN 'timestamp' THEN 'timestamp without time zone'
|
|
259
|
+
WHEN 'timetz' THEN 'time with time zone'
|
|
260
|
+
WHEN 'time' THEN 'time without time zone'
|
|
261
|
+
WHEN 'bit' THEN 'bit'
|
|
262
|
+
WHEN 'varbit' THEN 'bit varying'
|
|
263
|
+
ELSE COALESCE(bt.typname, t.typname)
|
|
264
|
+
END
|
|
265
|
+
END AS \"data_type\"
|
|
266
|
+
"""
|
|
267
|
+
),
|
|
268
|
+
# Extract type parameters. No abstract level api for this, we have to replicate Postgres logic here.
|
|
269
|
+
# All a.atttypmod are offset by 4 in Postgres
|
|
270
|
+
# varchar/char length (NULL otherwise)
|
|
271
|
+
RAW_SQL(
|
|
272
|
+
"""CASE
|
|
273
|
+
WHEN t.typname IN ('varchar','bpchar') THEN
|
|
274
|
+
CASE
|
|
275
|
+
WHEN a.atttypmod > 4 THEN a.atttypmod - 4
|
|
276
|
+
ELSE NULL
|
|
277
|
+
END
|
|
278
|
+
ELSE NULL
|
|
279
|
+
END AS "character_maximum_length"
|
|
280
|
+
"""
|
|
281
|
+
),
|
|
282
|
+
# numeric precision (NULL otherwise)
|
|
283
|
+
RAW_SQL(
|
|
284
|
+
"""CASE
|
|
285
|
+
WHEN t.typname = 'numeric' THEN
|
|
286
|
+
CASE
|
|
287
|
+
WHEN a.atttypmod > 4 THEN ((a.atttypmod - 4) >> 16)
|
|
288
|
+
ELSE NULL
|
|
289
|
+
END
|
|
290
|
+
ELSE NULL
|
|
291
|
+
END AS "numeric_precision"
|
|
292
|
+
"""
|
|
293
|
+
),
|
|
294
|
+
# numeric scale (NULL otherwise)
|
|
295
|
+
RAW_SQL(
|
|
296
|
+
"""CASE
|
|
297
|
+
WHEN t.typname = 'numeric' THEN
|
|
298
|
+
CASE
|
|
299
|
+
WHEN a.atttypmod > 4 THEN ((a.atttypmod - 4) & 65535)
|
|
300
|
+
ELSE NULL
|
|
301
|
+
END
|
|
302
|
+
ELSE NULL
|
|
303
|
+
END AS "numeric_scale"
|
|
304
|
+
"""
|
|
305
|
+
),
|
|
306
|
+
# datetime precision (NULL otherwise)
|
|
307
|
+
RAW_SQL(
|
|
308
|
+
"""CASE
|
|
309
|
+
WHEN t.typname IN ('time','timetz','timestamp','timestamptz') THEN
|
|
310
|
+
CASE
|
|
311
|
+
WHEN a.atttypmod >= 0 THEN a.atttypmod
|
|
312
|
+
ELSE NULL
|
|
313
|
+
END
|
|
314
|
+
ELSE NULL
|
|
315
|
+
END AS "datetime_precision"
|
|
316
|
+
"""
|
|
317
|
+
),
|
|
318
|
+
COLUMN(current_database_expression, field_alias="table_catalog"),
|
|
319
|
+
COLUMN("nspname", table_alias="n", field_alias="table_schema"),
|
|
320
|
+
COLUMN("relname", table_alias="c", field_alias="table_name"),
|
|
321
|
+
RAW_SQL(self.relkind_table_type_sql_expression()),
|
|
322
|
+
]
|
|
323
|
+
),
|
|
324
|
+
FROM(
|
|
325
|
+
self._pg_class(),
|
|
326
|
+
table_prefix=[self._pg_catalog()],
|
|
327
|
+
alias="c",
|
|
328
|
+
),
|
|
329
|
+
JOIN(
|
|
330
|
+
table_name=self._pg_namespace(),
|
|
331
|
+
table_prefix=[self._pg_catalog()],
|
|
332
|
+
alias="n",
|
|
333
|
+
on_condition=EQ(
|
|
334
|
+
COLUMN("relnamespace", "c"),
|
|
335
|
+
COLUMN("oid", "n"),
|
|
336
|
+
),
|
|
337
|
+
),
|
|
338
|
+
JOIN(
|
|
339
|
+
table_name="pg_attribute",
|
|
340
|
+
table_prefix=[self._pg_catalog()],
|
|
341
|
+
alias="a",
|
|
342
|
+
on_condition=EQ(
|
|
343
|
+
COLUMN("attrelid", "a"),
|
|
344
|
+
COLUMN("oid", "c"),
|
|
345
|
+
),
|
|
346
|
+
),
|
|
347
|
+
JOIN(
|
|
348
|
+
table_name="pg_type",
|
|
349
|
+
table_prefix=[self._pg_catalog()],
|
|
350
|
+
alias="t",
|
|
351
|
+
on_condition=EQ(
|
|
352
|
+
COLUMN("atttypid", "a"),
|
|
353
|
+
COLUMN("oid", "t"),
|
|
354
|
+
),
|
|
355
|
+
),
|
|
356
|
+
LEFT_INNER_JOIN(
|
|
357
|
+
table_name="pg_type",
|
|
358
|
+
table_prefix=[self._pg_catalog()],
|
|
359
|
+
alias="bt",
|
|
360
|
+
on_condition=EQ(
|
|
361
|
+
COLUMN("oid", "bt"),
|
|
362
|
+
RAW_SQL("NULLIF(t.typbasetype, 0)"),
|
|
363
|
+
),
|
|
364
|
+
),
|
|
365
|
+
WHERE(
|
|
366
|
+
AND(
|
|
367
|
+
[
|
|
368
|
+
# Only get object types that correspond to tables/views in information_schema.tables
|
|
369
|
+
IN(
|
|
370
|
+
COLUMN("relkind", "c"),
|
|
371
|
+
[LITERAL("r"), LITERAL("p"), LITERAL("v"), LITERAL("m"), LITERAL("f")],
|
|
372
|
+
),
|
|
373
|
+
# Only get columns that are not dropped
|
|
374
|
+
GT(COLUMN("attnum", "a"), LITERAL(0)),
|
|
375
|
+
EQ(COLUMN("relname", "c"), LITERAL(self.metadata_casify(table_name))),
|
|
376
|
+
]
|
|
377
|
+
)
|
|
378
|
+
),
|
|
379
|
+
ORDER_BY_ASC(COLUMN("attnum", "a")),
|
|
380
|
+
]
|
|
381
|
+
|
|
382
|
+
if database_name:
|
|
383
|
+
database_name_lower: str = database_name.lower()
|
|
384
|
+
select.append(WHERE(EQ(LOWER(current_database_expression), LITERAL(database_name_lower))))
|
|
385
|
+
|
|
386
|
+
if schema_name:
|
|
387
|
+
select.append(WHERE(EQ(LOWER(COLUMN("nspname", "n")), LITERAL(schema_name.lower()))))
|
|
388
|
+
|
|
389
|
+
return self.build_select_sql(select)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from abc import ABC
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Callable, Literal, Optional, Union
|
|
7
|
+
|
|
8
|
+
import psycopg2
|
|
9
|
+
from pydantic import Field, IPvAnyAddress, SecretStr, field_validator
|
|
10
|
+
from soda_core.common.data_source_connection import DataSourceConnection
|
|
11
|
+
from soda_core.common.data_source_results import QueryResult, UpdateResult
|
|
12
|
+
from soda_core.common.logging_constants import soda_logger
|
|
13
|
+
from soda_core.model.data_source.data_source import DataSourceBase
|
|
14
|
+
from soda_core.model.data_source.data_source_connection_properties import (
|
|
15
|
+
DataSourceConnectionProperties,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
logger: logging.Logger = soda_logger
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PostgresConnectionProperties(DataSourceConnectionProperties, ABC):
|
|
22
|
+
...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PostgresConnectionString(PostgresConnectionProperties):
|
|
26
|
+
connection_string: str = Field(..., description="Complete connection string (alternative to individual parameters)")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PostgresConnectionPropertiesBase(PostgresConnectionProperties, ABC):
|
|
30
|
+
host: Union[str, IPvAnyAddress] = Field(..., description="Database host (hostname or IP address)")
|
|
31
|
+
port: int = Field(5432, description="Database port (1-65535)", ge=1, le=65535)
|
|
32
|
+
database: str = Field(..., description="Database name", min_length=1, max_length=63)
|
|
33
|
+
user: str = Field(..., description="Database user (1-63 characters)", min_length=1, max_length=63)
|
|
34
|
+
|
|
35
|
+
# SSL configuration
|
|
36
|
+
sslmode: Literal["disable", "allow", "prefer", "require", "verify-ca", "verify-full"] = Field(
|
|
37
|
+
"prefer", description="SSL mode for the connection"
|
|
38
|
+
)
|
|
39
|
+
sslcert: Optional[str] = Field(None, description="Path to SSL client certificate")
|
|
40
|
+
sslkey: Optional[str] = Field(None, description="Path to SSL client key")
|
|
41
|
+
sslrootcert: Optional[str] = Field(None, description="Path to SSL root certificate")
|
|
42
|
+
|
|
43
|
+
# Connection options
|
|
44
|
+
connection_timeout: Optional[int] = Field(None, description="Connection timeout in seconds")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class PostgresConnectionPassword(PostgresConnectionPropertiesBase):
|
|
48
|
+
password: SecretStr = Field(..., description="Database password")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PostgresConnectionPasswordFile(PostgresConnectionPropertiesBase):
|
|
52
|
+
password_file: Path = Field(..., description="Path to file containing database password")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PostgresDataSource(DataSourceBase, ABC):
|
|
56
|
+
type: Literal["postgres"] = Field("postgres")
|
|
57
|
+
connection_properties: PostgresConnectionProperties = Field(
|
|
58
|
+
..., alias="connection", description="Data source connection details"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@field_validator("connection_properties", mode="before")
|
|
62
|
+
def infer_connection_type(cls, value):
|
|
63
|
+
if "password" in value:
|
|
64
|
+
return PostgresConnectionPassword(**value)
|
|
65
|
+
elif "password_file" in value:
|
|
66
|
+
return PostgresConnectionPasswordFile(**value)
|
|
67
|
+
raise ValueError("Unknown connection structure")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class PostgresDataSourceConnection(DataSourceConnection):
|
|
71
|
+
def __init__(self, name: str, connection_properties: DataSourceConnectionProperties):
|
|
72
|
+
super().__init__(name, connection_properties)
|
|
73
|
+
|
|
74
|
+
def _create_connection(
|
|
75
|
+
self,
|
|
76
|
+
config: PostgresConnectionProperties,
|
|
77
|
+
):
|
|
78
|
+
if isinstance(config, PostgresConnectionPasswordFile):
|
|
79
|
+
with open(config.password_file, "r") as f:
|
|
80
|
+
config_dict = config.model_dump(exclude="password_file")
|
|
81
|
+
config_dict["password"] = f.read().strip()
|
|
82
|
+
config = PostgresConnectionPassword(**config_dict)
|
|
83
|
+
return psycopg2.connect(**config.to_connection_kwargs())
|
|
84
|
+
|
|
85
|
+
def execute_query(self, sql: str, log_query: bool = True) -> QueryResult:
|
|
86
|
+
try:
|
|
87
|
+
return super().execute_query(sql, log_query=log_query)
|
|
88
|
+
except psycopg2.errors.Error as e: # Catch the error and roll back the transaction
|
|
89
|
+
logger.warning(f"SQL query failed: \n{sql}\n{e}")
|
|
90
|
+
logger.debug("Rolling back transaction")
|
|
91
|
+
self.rollback()
|
|
92
|
+
raise e
|
|
93
|
+
|
|
94
|
+
def execute_update(self, sql: str, log_query: bool = True) -> UpdateResult:
|
|
95
|
+
try:
|
|
96
|
+
return super().execute_update(sql, log_query=log_query)
|
|
97
|
+
except psycopg2.errors.Error as e: # Catch the error and roll back the transaction
|
|
98
|
+
logger.warning(f"SQL update failed: \n{sql}\n{e}")
|
|
99
|
+
logger.debug("Rolling back transaction")
|
|
100
|
+
self.rollback()
|
|
101
|
+
raise e
|
|
102
|
+
|
|
103
|
+
def execute_query_one_by_one(
|
|
104
|
+
self,
|
|
105
|
+
sql: str,
|
|
106
|
+
row_callback: Callable[[tuple, tuple[tuple]], None],
|
|
107
|
+
log_query: bool = True,
|
|
108
|
+
row_limit: Optional[int] = None,
|
|
109
|
+
) -> tuple[tuple]:
|
|
110
|
+
try:
|
|
111
|
+
return super().execute_query_one_by_one(sql, row_callback, log_query=log_query, row_limit=row_limit)
|
|
112
|
+
except psycopg2.errors.Error as e: # Catch the error and roll back the transaction
|
|
113
|
+
logger.warning(f"SQL query one-by-one failed: \n{sql}\n{e}")
|
|
114
|
+
logger.debug("Rolling back transaction")
|
|
115
|
+
self.rollback()
|
|
116
|
+
raise e
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from soda_core.common.sql_ast import (
|
|
6
|
+
COLUMN,
|
|
7
|
+
EQ,
|
|
8
|
+
FROM,
|
|
9
|
+
IN,
|
|
10
|
+
JOIN,
|
|
11
|
+
LIKE,
|
|
12
|
+
LITERAL,
|
|
13
|
+
LOWER,
|
|
14
|
+
NOT_LIKE,
|
|
15
|
+
OR,
|
|
16
|
+
RAW_SQL,
|
|
17
|
+
SELECT,
|
|
18
|
+
WHERE,
|
|
19
|
+
)
|
|
20
|
+
from soda_core.common.statements.metadata_tables_query import MetadataTablesQuery
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PostgresMetadataTablesQuery(MetadataTablesQuery):
|
|
24
|
+
def build_sql_statement(
|
|
25
|
+
self,
|
|
26
|
+
database_name: Optional[str] = None,
|
|
27
|
+
schema_name: Optional[str] = None,
|
|
28
|
+
include_table_name_like_filters: Optional[list[str]] = None,
|
|
29
|
+
exclude_table_name_like_filters: Optional[list[str]] = None,
|
|
30
|
+
) -> list:
|
|
31
|
+
"""
|
|
32
|
+
Builds the full SQL query statement to query table names from the data source metadata.
|
|
33
|
+
"""
|
|
34
|
+
current_database_expression = RAW_SQL(self.sql_dialect._current_database())
|
|
35
|
+
select: list = [
|
|
36
|
+
SELECT(
|
|
37
|
+
[
|
|
38
|
+
COLUMN(current_database_expression, field_alias="table_catalog"),
|
|
39
|
+
COLUMN("nspname", table_alias="n", field_alias="table_schema"),
|
|
40
|
+
COLUMN("relname", table_alias="c", field_alias="table_name"),
|
|
41
|
+
RAW_SQL(self.sql_dialect.relkind_table_type_sql_expression()),
|
|
42
|
+
]
|
|
43
|
+
),
|
|
44
|
+
FROM(
|
|
45
|
+
self.sql_dialect._pg_class(),
|
|
46
|
+
table_prefix=[self.sql_dialect._pg_catalog()],
|
|
47
|
+
alias="c",
|
|
48
|
+
),
|
|
49
|
+
JOIN(
|
|
50
|
+
table_name=self.sql_dialect._pg_namespace(),
|
|
51
|
+
table_prefix=[self.sql_dialect._pg_catalog()],
|
|
52
|
+
alias="n",
|
|
53
|
+
on_condition=EQ(
|
|
54
|
+
COLUMN("relnamespace", "c"),
|
|
55
|
+
COLUMN("oid", "n"),
|
|
56
|
+
),
|
|
57
|
+
),
|
|
58
|
+
# Only get object types that correspond to tables/views in information_schema.tables
|
|
59
|
+
WHERE(
|
|
60
|
+
IN(
|
|
61
|
+
COLUMN("relkind", "c"),
|
|
62
|
+
[
|
|
63
|
+
LITERAL("r"), # ordinary table
|
|
64
|
+
LITERAL("p"), # partitioned table
|
|
65
|
+
LITERAL("v"), # view
|
|
66
|
+
LITERAL("m"), # materialized view
|
|
67
|
+
LITERAL("f"), # foreign table
|
|
68
|
+
],
|
|
69
|
+
)
|
|
70
|
+
),
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
if database_name:
|
|
74
|
+
database_name_lower: str = database_name.lower()
|
|
75
|
+
select.append(WHERE(EQ(LOWER(current_database_expression), LITERAL(database_name_lower))))
|
|
76
|
+
|
|
77
|
+
if schema_name:
|
|
78
|
+
select.append(WHERE(EQ(LOWER(COLUMN("nspname", "n")), LITERAL(schema_name.lower()))))
|
|
79
|
+
|
|
80
|
+
if include_table_name_like_filters:
|
|
81
|
+
select.append(
|
|
82
|
+
WHERE(
|
|
83
|
+
OR(
|
|
84
|
+
[
|
|
85
|
+
LIKE(LOWER(COLUMN("relname", "c")), LITERAL(include_table_name_like_filter.lower()))
|
|
86
|
+
for include_table_name_like_filter in include_table_name_like_filters
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if exclude_table_name_like_filters:
|
|
93
|
+
for exclude_table_name_like_filter in exclude_table_name_like_filters:
|
|
94
|
+
select.append(
|
|
95
|
+
WHERE(NOT_LIKE(LOWER(COLUMN("relname", "c")), LITERAL(exclude_table_name_like_filter.lower())))
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return select
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from helpers.data_source_test_helper import DataSourceTestHelper
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PostgresDataSourceTestHelper(DataSourceTestHelper):
|
|
9
|
+
def _create_database_name(self) -> str | None:
|
|
10
|
+
return os.getenv("POSTGRES_DATABASE", "soda_test")
|
|
11
|
+
|
|
12
|
+
def _create_data_source_yaml_str(self) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Called in _create_data_source_impl to initialized self.data_source_impl
|
|
15
|
+
self.database_name and self.schema_name are available if appropriate for the data source type
|
|
16
|
+
"""
|
|
17
|
+
return f"""
|
|
18
|
+
type: postgres
|
|
19
|
+
name: {self.name}
|
|
20
|
+
connection:
|
|
21
|
+
host: {os.getenv("POSTGRES_HOST", "localhost")}
|
|
22
|
+
user: {os.getenv("POSTGRES_USERNAME", "soda_test")}
|
|
23
|
+
password: {os.getenv("POSTGRES_PASSWORD")}
|
|
24
|
+
port: {int(os.getenv("POSTGRES_PORT", "5432"))}
|
|
25
|
+
database: {self.dataset_prefix[0]}
|
|
26
|
+
"""
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
soda_postgres/common/data_sources/postgres_data_source.py,sha256=cJjBfZIMr-Ps0rC9gbPzY4XeG-x24CsmxP3f-figh34,16512
|
|
2
|
+
soda_postgres/common/data_sources/postgres_data_source_connection.py,sha256=faMkFaKkZZTdj1qojzRX7QZ55474Z5xCW9xn11HJJPo,4986
|
|
3
|
+
soda_postgres/statements/postgres_metadata_tables_query.py,sha256=8_2_tZDh4I-jw1FFpUtP8N56jw4guSfohf7r276uvqY,3408
|
|
4
|
+
soda_postgres/test_helpers/postgres_data_source_test_helper.py,sha256=Dmf_DF7jOvV0Z0U7qf9SYfga2SJqyubDFxh37tkyqLY,959
|
|
5
|
+
soda_postgres-4.0.5.dist-info/METADATA,sha256=Fg_DRPTMKEnTc2CDH-6EP6z9-ep00b4Ornn2FBtAy7k,155
|
|
6
|
+
soda_postgres-4.0.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
7
|
+
soda_postgres-4.0.5.dist-info/entry_points.txt,sha256=dRkRwpHNgnKteGULW3ztKCOs7DIvoYM0ZRiS1KYRy4s,139
|
|
8
|
+
soda_postgres-4.0.5.dist-info/top_level.txt,sha256=MwpCelT9xmFrou_LcF_IoKYvNimCYQnHtWr0J5Lhy3w,14
|
|
9
|
+
soda_postgres-4.0.5.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
soda_postgres
|