soda-postgres 4.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ from logging import Logger
2
+ from typing import Optional
3
+
4
+ from soda_core.common.data_source_connection import DataSourceConnection
5
+ from soda_core.common.data_source_impl import DataSourceImpl
6
+ from soda_core.common.logging_constants import soda_logger
7
+ from soda_core.common.metadata_types import (
8
+ DataSourceNamespace,
9
+ SodaDataTypeName,
10
+ SqlDataType,
11
+ )
12
+ from soda_core.common.sql_ast import (
13
+ AND,
14
+ CAST,
15
+ COLUMN,
16
+ CREATE_TABLE_COLUMN,
17
+ EQ,
18
+ FROM,
19
+ GT,
20
+ IN,
21
+ JOIN,
22
+ LEFT_INNER_JOIN,
23
+ LITERAL,
24
+ LOWER,
25
+ ORDER_BY_ASC,
26
+ RAW_SQL,
27
+ REGEX_LIKE,
28
+ SELECT,
29
+ WHERE,
30
+ )
31
+ from soda_core.common.sql_dialect import SqlDialect
32
+ from soda_core.common.statements.metadata_tables_query import MetadataTablesQuery
33
+ from soda_postgres.common.data_sources.postgres_data_source_connection import (
34
+ PostgresDataSource as PostgresDataSourceModel,
35
+ )
36
+ from soda_postgres.common.data_sources.postgres_data_source_connection import (
37
+ PostgresDataSourceConnection,
38
+ )
39
+ from soda_postgres.statements.postgres_metadata_tables_query import (
40
+ PostgresMetadataTablesQuery,
41
+ )
42
+
43
+ logger: Logger = soda_logger
44
+
45
+
46
+ PG_TIMESTAMP_WITH_TIME_ZONE = "timestamp with time zone"
47
+ PG_TIMESTAMP_WITHOUT_TIME_ZONE = "timestamp without time zone"
48
+ PG_DOUBLE_PRECISION = "double precision"
49
+
50
+
51
+ class PostgresDataSourceImpl(DataSourceImpl, model_class=PostgresDataSourceModel):
52
+ def __init__(self, data_source_model: PostgresDataSourceModel, connection: Optional[DataSourceConnection] = None):
53
+ super().__init__(data_source_model=data_source_model, connection=connection)
54
+
55
+ def _create_sql_dialect(self) -> SqlDialect:
56
+ return PostgresSqlDialect(data_source_impl=self)
57
+
58
+ def _create_data_source_connection(self) -> DataSourceConnection:
59
+ return PostgresDataSourceConnection(
60
+ name=self.data_source_model.name, connection_properties=self.data_source_model.connection_properties
61
+ )
62
+
63
+ def create_metadata_tables_query(self) -> MetadataTablesQuery:
64
+ return PostgresMetadataTablesQuery(
65
+ sql_dialect=self.sql_dialect, data_source_connection=self.data_source_connection
66
+ )
67
+
68
+
69
+ class PostgresSqlDataType(SqlDataType):
70
+ def get_sql_data_type_str_with_parameters(self) -> str:
71
+ if isinstance(self.datetime_precision, int) and self.name == PG_TIMESTAMP_WITH_TIME_ZONE:
72
+ return f"timestamp({self.datetime_precision}) with time zone"
73
+ elif isinstance(self.datetime_precision, int) and self.name == PG_TIMESTAMP_WITHOUT_TIME_ZONE:
74
+ return f"timestamp({self.datetime_precision}) without time zone"
75
+ return super().get_sql_data_type_str_with_parameters()
76
+
77
+
78
+ class PostgresSqlDialect(SqlDialect):
79
+ SODA_DATA_TYPE_SYNONYMS = (
80
+ (SodaDataTypeName.NUMERIC, SodaDataTypeName.DECIMAL),
81
+ (SodaDataTypeName.DOUBLE, SodaDataTypeName.FLOAT),
82
+ )
83
+
84
+ def supports_materialized_views(self) -> bool:
85
+ return True
86
+
87
+ def _build_regex_like_sql(self, matches: REGEX_LIKE) -> str:
88
+ expression: str = self.build_expression_sql(matches.expression)
89
+ return f"{expression} ~ '{matches.regex_pattern}'"
90
+
91
+ def create_schema_if_not_exists_sql(self, prefixes: list[str], add_semicolon: bool = True) -> str:
92
+ return (
93
+ f"{super().create_schema_if_not_exists_sql(prefixes, add_semicolon=False)} AUTHORIZATION CURRENT_USER"
94
+ + (";" if add_semicolon else "")
95
+ )
96
+
97
+ def get_data_source_data_type_name_by_soda_data_type_names(self) -> dict[SodaDataTypeName, str]:
98
+ return {
99
+ SodaDataTypeName.CHAR: "char",
100
+ SodaDataTypeName.VARCHAR: "varchar",
101
+ SodaDataTypeName.TEXT: "text",
102
+ SodaDataTypeName.SMALLINT: "smallint",
103
+ SodaDataTypeName.INTEGER: "integer",
104
+ SodaDataTypeName.DECIMAL: "decimal",
105
+ SodaDataTypeName.BIGINT: "bigint",
106
+ SodaDataTypeName.NUMERIC: "numeric",
107
+ SodaDataTypeName.DECIMAL: "decimal",
108
+ SodaDataTypeName.FLOAT: "float",
109
+ SodaDataTypeName.DOUBLE: PG_DOUBLE_PRECISION,
110
+ SodaDataTypeName.TIMESTAMP: "timestamp",
111
+ SodaDataTypeName.TIMESTAMP_TZ: "timestamptz",
112
+ SodaDataTypeName.DATE: "date",
113
+ SodaDataTypeName.TIME: "time",
114
+ SodaDataTypeName.BOOLEAN: "boolean",
115
+ }
116
+
117
+ def get_soda_data_type_name_by_data_source_data_type_names(self) -> dict[str, SodaDataTypeName]:
118
+ return {
119
+ "character varying": SodaDataTypeName.VARCHAR,
120
+ "varchar": SodaDataTypeName.VARCHAR,
121
+ "character": SodaDataTypeName.CHAR,
122
+ "char": SodaDataTypeName.CHAR,
123
+ "text": SodaDataTypeName.TEXT,
124
+ "bpchar": SodaDataTypeName.TEXT,
125
+ "smallint": SodaDataTypeName.SMALLINT,
126
+ "int2": SodaDataTypeName.SMALLINT,
127
+ "integer": SodaDataTypeName.INTEGER,
128
+ "int": SodaDataTypeName.INTEGER,
129
+ "int4": SodaDataTypeName.INTEGER,
130
+ "bigint": SodaDataTypeName.BIGINT,
131
+ "int8": SodaDataTypeName.BIGINT,
132
+ "decimal": SodaDataTypeName.DECIMAL,
133
+ "numeric": SodaDataTypeName.NUMERIC,
134
+ "float": SodaDataTypeName.FLOAT,
135
+ "real": SodaDataTypeName.FLOAT,
136
+ "float4": SodaDataTypeName.FLOAT,
137
+ PG_DOUBLE_PRECISION: SodaDataTypeName.DOUBLE,
138
+ "float8": SodaDataTypeName.DOUBLE,
139
+ "timestamp": SodaDataTypeName.TIMESTAMP,
140
+ PG_TIMESTAMP_WITHOUT_TIME_ZONE: SodaDataTypeName.TIMESTAMP,
141
+ "timestamptz": SodaDataTypeName.TIMESTAMP_TZ,
142
+ PG_TIMESTAMP_WITH_TIME_ZONE: SodaDataTypeName.TIMESTAMP_TZ,
143
+ "date": SodaDataTypeName.DATE,
144
+ "time": SodaDataTypeName.TIME,
145
+ "time without time zone": SodaDataTypeName.TIME,
146
+ "boolean": SodaDataTypeName.BOOLEAN,
147
+ "bool": SodaDataTypeName.BOOLEAN,
148
+ }
149
+
150
+ def _build_cast_sql(self, cast: CAST) -> str:
151
+ to_type_text: str = (
152
+ self.get_data_source_data_type_name_for_soda_data_type_name(cast.to_type)
153
+ if isinstance(cast.to_type, SodaDataTypeName)
154
+ else cast.to_type
155
+ )
156
+ return f"{self.build_expression_sql(cast.expression)}::{to_type_text}"
157
+
158
+ def _get_data_type_name_synonyms(self) -> list[list[str]]:
159
+ return [
160
+ ["varchar", "character varying"],
161
+ ["char", "character"],
162
+ ["integer", "int", "int4"],
163
+ ["bigint", "int8"],
164
+ ["smallint", "int2"],
165
+ ["real", "float4"],
166
+ [PG_DOUBLE_PRECISION, "float8"],
167
+ ["timestamp", PG_TIMESTAMP_WITHOUT_TIME_ZONE],
168
+ ["decimal", "numeric"],
169
+ ]
170
+
171
+ def get_sql_data_type_class(self) -> type:
172
+ return PostgresSqlDataType
173
+
174
+ def _build_create_table_column_type(self, create_table_column: CREATE_TABLE_COLUMN) -> str:
175
+ if create_table_column.type.name == "text": # Do not output text with parameters!
176
+ if create_table_column.type.character_maximum_length is not None:
177
+ logger.warning(
178
+ f"Text column {create_table_column.name} has a character maximum length, but text does not support parameters! Ignoring in postgres."
179
+ )
180
+ return "text"
181
+ return super()._build_create_table_column_type(create_table_column=create_table_column)
182
+
183
+ @classmethod
184
+ def is_same_soda_data_type_with_synonyms(cls, expected: SodaDataTypeName, actual: SodaDataTypeName) -> bool:
185
+ # Postgres cursor can return bpchar, which is an unbounded synonym for char. We convert that to TEXT as that is the best fit. So we could expect CHAR, but actual is TEXT.
186
+ if expected == SodaDataTypeName.CHAR and actual == SodaDataTypeName.TEXT:
187
+ logger.debug(
188
+ f"In is_same_soda_data_type_with_synonyms, expected {expected} and actual {actual} are treated as the same because of postgres cursor returning BPCHAR (best matching with TEXT)"
189
+ )
190
+ return True
191
+
192
+ return super().is_same_soda_data_type_with_synonyms(expected, actual)
193
+
194
+ ###
195
+ # Tables and columns metadata queries
196
+ ###
197
+ def _pg_catalog(self) -> str:
198
+ return "pg_catalog"
199
+
200
+ def _pg_class(self) -> str:
201
+ return "pg_class"
202
+
203
+ def _pg_namespace(self) -> str:
204
+ return "pg_namespace"
205
+
206
+ def _current_database(self) -> str:
207
+ return "current_database()"
208
+
209
+ def relkind_table_type_sql_expression(self, table_alias: str = "c", column_alias: str = "table_type") -> str:
210
+ return f"""CASE {table_alias}.relkind
211
+ WHEN 'r' THEN
212
+ CASE {table_alias}.relpersistence
213
+ WHEN 't' THEN 'TEMPORARY TABLE'
214
+ WHEN 'p' THEN 'BASE TABLE'
215
+ WHEN 'u' THEN 'UNLOGGED TABLE'
216
+ END
217
+ WHEN 'v' THEN 'VIEW'
218
+ WHEN 'm' THEN 'MATERIALIZED VIEW'
219
+ WHEN 'i' THEN 'INDEX'
220
+ WHEN 'S' THEN 'SEQUENCE'
221
+ WHEN 't' THEN 'TOAST TABLE'
222
+ WHEN 'f' THEN 'FOREIGN TABLE'
223
+ WHEN 'p' THEN 'PARTITIONED TABLE'
224
+ WHEN 'I' THEN 'PARTITIONED INDEX'
225
+ END as {column_alias}"""
226
+
227
+ def build_columns_metadata_query_str(self, table_namespace: DataSourceNamespace, table_name: str) -> str:
228
+ """
229
+ Builds the full SQL query to query table names from the data source metadata.
230
+ """
231
+
232
+ database_name: str | None = table_namespace.get_database_for_metadata_query()
233
+ schema_name: str = table_namespace.get_schema_for_metadata_query()
234
+
235
+ ######
236
+ current_database_expression = RAW_SQL(self._current_database())
237
+ select: list = [
238
+ SELECT(
239
+ [
240
+ COLUMN("attname", table_alias="a", field_alias="column_name"),
241
+ # Normalize data type into information_schema.columns style. Consider doing this in python instead, but this is lightweight and simple enough.
242
+ RAW_SQL(
243
+ """CASE
244
+ -- arrays
245
+ WHEN t.typcategory = 'A' OR t.typelem <> 0 THEN 'ARRAY'
246
+
247
+ -- choose base type for domains, otherwise the type itself
248
+ ELSE CASE COALESCE(bt.typname, t.typname)
249
+ WHEN 'varchar' THEN 'character varying'
250
+ WHEN 'bpchar' THEN 'character'
251
+ WHEN 'bool' THEN 'boolean'
252
+ WHEN 'int2' THEN 'smallint'
253
+ WHEN 'int4' THEN 'integer'
254
+ WHEN 'int8' THEN 'bigint'
255
+ WHEN 'float4' THEN 'real'
256
+ WHEN 'float8' THEN 'double precision'
257
+ WHEN 'timestamptz' THEN 'timestamp with time zone'
258
+ WHEN 'timestamp' THEN 'timestamp without time zone'
259
+ WHEN 'timetz' THEN 'time with time zone'
260
+ WHEN 'time' THEN 'time without time zone'
261
+ WHEN 'bit' THEN 'bit'
262
+ WHEN 'varbit' THEN 'bit varying'
263
+ ELSE COALESCE(bt.typname, t.typname)
264
+ END
265
+ END AS \"data_type\"
266
+ """
267
+ ),
268
+ # Extract type parameters. No abstract level api for this, we have to replicate Postgres logic here.
269
+ # All a.atttypmod are offset by 4 in Postgres
270
+ # varchar/char length (NULL otherwise)
271
+ RAW_SQL(
272
+ """CASE
273
+ WHEN t.typname IN ('varchar','bpchar') THEN
274
+ CASE
275
+ WHEN a.atttypmod > 4 THEN a.atttypmod - 4
276
+ ELSE NULL
277
+ END
278
+ ELSE NULL
279
+ END AS "character_maximum_length"
280
+ """
281
+ ),
282
+ # numeric precision (NULL otherwise)
283
+ RAW_SQL(
284
+ """CASE
285
+ WHEN t.typname = 'numeric' THEN
286
+ CASE
287
+ WHEN a.atttypmod > 4 THEN ((a.atttypmod - 4) >> 16)
288
+ ELSE NULL
289
+ END
290
+ ELSE NULL
291
+ END AS "numeric_precision"
292
+ """
293
+ ),
294
+ # numeric scale (NULL otherwise)
295
+ RAW_SQL(
296
+ """CASE
297
+ WHEN t.typname = 'numeric' THEN
298
+ CASE
299
+ WHEN a.atttypmod > 4 THEN ((a.atttypmod - 4) & 65535)
300
+ ELSE NULL
301
+ END
302
+ ELSE NULL
303
+ END AS "numeric_scale"
304
+ """
305
+ ),
306
+ # datetime precision (NULL otherwise)
307
+ RAW_SQL(
308
+ """CASE
309
+ WHEN t.typname IN ('time','timetz','timestamp','timestamptz') THEN
310
+ CASE
311
+ WHEN a.atttypmod >= 0 THEN a.atttypmod
312
+ ELSE NULL
313
+ END
314
+ ELSE NULL
315
+ END AS "datetime_precision"
316
+ """
317
+ ),
318
+ COLUMN(current_database_expression, field_alias="table_catalog"),
319
+ COLUMN("nspname", table_alias="n", field_alias="table_schema"),
320
+ COLUMN("relname", table_alias="c", field_alias="table_name"),
321
+ RAW_SQL(self.relkind_table_type_sql_expression()),
322
+ ]
323
+ ),
324
+ FROM(
325
+ self._pg_class(),
326
+ table_prefix=[self._pg_catalog()],
327
+ alias="c",
328
+ ),
329
+ JOIN(
330
+ table_name=self._pg_namespace(),
331
+ table_prefix=[self._pg_catalog()],
332
+ alias="n",
333
+ on_condition=EQ(
334
+ COLUMN("relnamespace", "c"),
335
+ COLUMN("oid", "n"),
336
+ ),
337
+ ),
338
+ JOIN(
339
+ table_name="pg_attribute",
340
+ table_prefix=[self._pg_catalog()],
341
+ alias="a",
342
+ on_condition=EQ(
343
+ COLUMN("attrelid", "a"),
344
+ COLUMN("oid", "c"),
345
+ ),
346
+ ),
347
+ JOIN(
348
+ table_name="pg_type",
349
+ table_prefix=[self._pg_catalog()],
350
+ alias="t",
351
+ on_condition=EQ(
352
+ COLUMN("atttypid", "a"),
353
+ COLUMN("oid", "t"),
354
+ ),
355
+ ),
356
+ LEFT_INNER_JOIN(
357
+ table_name="pg_type",
358
+ table_prefix=[self._pg_catalog()],
359
+ alias="bt",
360
+ on_condition=EQ(
361
+ COLUMN("oid", "bt"),
362
+ RAW_SQL("NULLIF(t.typbasetype, 0)"),
363
+ ),
364
+ ),
365
+ WHERE(
366
+ AND(
367
+ [
368
+ # Only get object types that correspond to tables/views in information_schema.tables
369
+ IN(
370
+ COLUMN("relkind", "c"),
371
+ [LITERAL("r"), LITERAL("p"), LITERAL("v"), LITERAL("m"), LITERAL("f")],
372
+ ),
373
+ # Only get columns that are not dropped
374
+ GT(COLUMN("attnum", "a"), LITERAL(0)),
375
+ EQ(COLUMN("relname", "c"), LITERAL(self.metadata_casify(table_name))),
376
+ ]
377
+ )
378
+ ),
379
+ ORDER_BY_ASC(COLUMN("attnum", "a")),
380
+ ]
381
+
382
+ if database_name:
383
+ database_name_lower: str = database_name.lower()
384
+ select.append(WHERE(EQ(LOWER(current_database_expression), LITERAL(database_name_lower))))
385
+
386
+ if schema_name:
387
+ select.append(WHERE(EQ(LOWER(COLUMN("nspname", "n")), LITERAL(schema_name.lower()))))
388
+
389
+ return self.build_select_sql(select)
@@ -0,0 +1,116 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from abc import ABC
5
+ from pathlib import Path
6
+ from typing import Callable, Literal, Optional, Union
7
+
8
+ import psycopg2
9
+ from pydantic import Field, IPvAnyAddress, SecretStr, field_validator
10
+ from soda_core.common.data_source_connection import DataSourceConnection
11
+ from soda_core.common.data_source_results import QueryResult, UpdateResult
12
+ from soda_core.common.logging_constants import soda_logger
13
+ from soda_core.model.data_source.data_source import DataSourceBase
14
+ from soda_core.model.data_source.data_source_connection_properties import (
15
+ DataSourceConnectionProperties,
16
+ )
17
+
18
+ logger: logging.Logger = soda_logger
19
+
20
+
21
+ class PostgresConnectionProperties(DataSourceConnectionProperties, ABC):
22
+ ...
23
+
24
+
25
+ class PostgresConnectionString(PostgresConnectionProperties):
26
+ connection_string: str = Field(..., description="Complete connection string (alternative to individual parameters)")
27
+
28
+
29
+ class PostgresConnectionPropertiesBase(PostgresConnectionProperties, ABC):
30
+ host: Union[str, IPvAnyAddress] = Field(..., description="Database host (hostname or IP address)")
31
+ port: int = Field(5432, description="Database port (1-65535)", ge=1, le=65535)
32
+ database: str = Field(..., description="Database name", min_length=1, max_length=63)
33
+ user: str = Field(..., description="Database user (1-63 characters)", min_length=1, max_length=63)
34
+
35
+ # SSL configuration
36
+ sslmode: Literal["disable", "allow", "prefer", "require", "verify-ca", "verify-full"] = Field(
37
+ "prefer", description="SSL mode for the connection"
38
+ )
39
+ sslcert: Optional[str] = Field(None, description="Path to SSL client certificate")
40
+ sslkey: Optional[str] = Field(None, description="Path to SSL client key")
41
+ sslrootcert: Optional[str] = Field(None, description="Path to SSL root certificate")
42
+
43
+ # Connection options
44
+ connection_timeout: Optional[int] = Field(None, description="Connection timeout in seconds")
45
+
46
+
47
+ class PostgresConnectionPassword(PostgresConnectionPropertiesBase):
48
+ password: SecretStr = Field(..., description="Database password")
49
+
50
+
51
+ class PostgresConnectionPasswordFile(PostgresConnectionPropertiesBase):
52
+ password_file: Path = Field(..., description="Path to file containing database password")
53
+
54
+
55
+ class PostgresDataSource(DataSourceBase, ABC):
56
+ type: Literal["postgres"] = Field("postgres")
57
+ connection_properties: PostgresConnectionProperties = Field(
58
+ ..., alias="connection", description="Data source connection details"
59
+ )
60
+
61
+ @field_validator("connection_properties", mode="before")
62
+ def infer_connection_type(cls, value):
63
+ if "password" in value:
64
+ return PostgresConnectionPassword(**value)
65
+ elif "password_file" in value:
66
+ return PostgresConnectionPasswordFile(**value)
67
+ raise ValueError("Unknown connection structure")
68
+
69
+
70
+ class PostgresDataSourceConnection(DataSourceConnection):
71
+ def __init__(self, name: str, connection_properties: DataSourceConnectionProperties):
72
+ super().__init__(name, connection_properties)
73
+
74
+ def _create_connection(
75
+ self,
76
+ config: PostgresConnectionProperties,
77
+ ):
78
+ if isinstance(config, PostgresConnectionPasswordFile):
79
+ with open(config.password_file, "r") as f:
80
+ config_dict = config.model_dump(exclude="password_file")
81
+ config_dict["password"] = f.read().strip()
82
+ config = PostgresConnectionPassword(**config_dict)
83
+ return psycopg2.connect(**config.to_connection_kwargs())
84
+
85
+ def execute_query(self, sql: str, log_query: bool = True) -> QueryResult:
86
+ try:
87
+ return super().execute_query(sql, log_query=log_query)
88
+ except psycopg2.errors.Error as e: # Catch the error and roll back the transaction
89
+ logger.warning(f"SQL query failed: \n{sql}\n{e}")
90
+ logger.debug("Rolling back transaction")
91
+ self.rollback()
92
+ raise e
93
+
94
+ def execute_update(self, sql: str, log_query: bool = True) -> UpdateResult:
95
+ try:
96
+ return super().execute_update(sql, log_query=log_query)
97
+ except psycopg2.errors.Error as e: # Catch the error and roll back the transaction
98
+ logger.warning(f"SQL update failed: \n{sql}\n{e}")
99
+ logger.debug("Rolling back transaction")
100
+ self.rollback()
101
+ raise e
102
+
103
+ def execute_query_one_by_one(
104
+ self,
105
+ sql: str,
106
+ row_callback: Callable[[tuple, tuple[tuple]], None],
107
+ log_query: bool = True,
108
+ row_limit: Optional[int] = None,
109
+ ) -> tuple[tuple]:
110
+ try:
111
+ return super().execute_query_one_by_one(sql, row_callback, log_query=log_query, row_limit=row_limit)
112
+ except psycopg2.errors.Error as e: # Catch the error and roll back the transaction
113
+ logger.warning(f"SQL query one-by-one failed: \n{sql}\n{e}")
114
+ logger.debug("Rolling back transaction")
115
+ self.rollback()
116
+ raise e
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from soda_core.common.sql_ast import (
6
+ COLUMN,
7
+ EQ,
8
+ FROM,
9
+ IN,
10
+ JOIN,
11
+ LIKE,
12
+ LITERAL,
13
+ LOWER,
14
+ NOT_LIKE,
15
+ OR,
16
+ RAW_SQL,
17
+ SELECT,
18
+ WHERE,
19
+ )
20
+ from soda_core.common.statements.metadata_tables_query import MetadataTablesQuery
21
+
22
+
23
+ class PostgresMetadataTablesQuery(MetadataTablesQuery):
24
+ def build_sql_statement(
25
+ self,
26
+ database_name: Optional[str] = None,
27
+ schema_name: Optional[str] = None,
28
+ include_table_name_like_filters: Optional[list[str]] = None,
29
+ exclude_table_name_like_filters: Optional[list[str]] = None,
30
+ ) -> list:
31
+ """
32
+ Builds the full SQL query statement to query table names from the data source metadata.
33
+ """
34
+ current_database_expression = RAW_SQL(self.sql_dialect._current_database())
35
+ select: list = [
36
+ SELECT(
37
+ [
38
+ COLUMN(current_database_expression, field_alias="table_catalog"),
39
+ COLUMN("nspname", table_alias="n", field_alias="table_schema"),
40
+ COLUMN("relname", table_alias="c", field_alias="table_name"),
41
+ RAW_SQL(self.sql_dialect.relkind_table_type_sql_expression()),
42
+ ]
43
+ ),
44
+ FROM(
45
+ self.sql_dialect._pg_class(),
46
+ table_prefix=[self.sql_dialect._pg_catalog()],
47
+ alias="c",
48
+ ),
49
+ JOIN(
50
+ table_name=self.sql_dialect._pg_namespace(),
51
+ table_prefix=[self.sql_dialect._pg_catalog()],
52
+ alias="n",
53
+ on_condition=EQ(
54
+ COLUMN("relnamespace", "c"),
55
+ COLUMN("oid", "n"),
56
+ ),
57
+ ),
58
+ # Only get object types that correspond to tables/views in information_schema.tables
59
+ WHERE(
60
+ IN(
61
+ COLUMN("relkind", "c"),
62
+ [
63
+ LITERAL("r"), # ordinary table
64
+ LITERAL("p"), # partitioned table
65
+ LITERAL("v"), # view
66
+ LITERAL("m"), # materialized view
67
+ LITERAL("f"), # foreign table
68
+ ],
69
+ )
70
+ ),
71
+ ]
72
+
73
+ if database_name:
74
+ database_name_lower: str = database_name.lower()
75
+ select.append(WHERE(EQ(LOWER(current_database_expression), LITERAL(database_name_lower))))
76
+
77
+ if schema_name:
78
+ select.append(WHERE(EQ(LOWER(COLUMN("nspname", "n")), LITERAL(schema_name.lower()))))
79
+
80
+ if include_table_name_like_filters:
81
+ select.append(
82
+ WHERE(
83
+ OR(
84
+ [
85
+ LIKE(LOWER(COLUMN("relname", "c")), LITERAL(include_table_name_like_filter.lower()))
86
+ for include_table_name_like_filter in include_table_name_like_filters
87
+ ]
88
+ )
89
+ )
90
+ )
91
+
92
+ if exclude_table_name_like_filters:
93
+ for exclude_table_name_like_filter in exclude_table_name_like_filters:
94
+ select.append(
95
+ WHERE(NOT_LIKE(LOWER(COLUMN("relname", "c")), LITERAL(exclude_table_name_like_filter.lower())))
96
+ )
97
+
98
+ return select
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from helpers.data_source_test_helper import DataSourceTestHelper
6
+
7
+
8
+ class PostgresDataSourceTestHelper(DataSourceTestHelper):
9
+ def _create_database_name(self) -> str | None:
10
+ return os.getenv("POSTGRES_DATABASE", "soda_test")
11
+
12
+ def _create_data_source_yaml_str(self) -> str:
13
+ """
14
+ Called in _create_data_source_impl to initialized self.data_source_impl
15
+ self.database_name and self.schema_name are available if appropriate for the data source type
16
+ """
17
+ return f"""
18
+ type: postgres
19
+ name: {self.name}
20
+ connection:
21
+ host: {os.getenv("POSTGRES_HOST", "localhost")}
22
+ user: {os.getenv("POSTGRES_USERNAME", "soda_test")}
23
+ password: {os.getenv("POSTGRES_PASSWORD")}
24
+ port: {int(os.getenv("POSTGRES_PORT", "5432"))}
25
+ database: {self.dataset_prefix[0]}
26
+ """
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: soda-postgres
3
+ Version: 4.0.5
4
+ Requires-Dist: soda-core==4.0.5
5
+ Requires-Dist: psycopg2-binary<3.0,>=2.8.5
6
+ Dynamic: requires-dist
@@ -0,0 +1,9 @@
1
+ soda_postgres/common/data_sources/postgres_data_source.py,sha256=cJjBfZIMr-Ps0rC9gbPzY4XeG-x24CsmxP3f-figh34,16512
2
+ soda_postgres/common/data_sources/postgres_data_source_connection.py,sha256=faMkFaKkZZTdj1qojzRX7QZ55474Z5xCW9xn11HJJPo,4986
3
+ soda_postgres/statements/postgres_metadata_tables_query.py,sha256=8_2_tZDh4I-jw1FFpUtP8N56jw4guSfohf7r276uvqY,3408
4
+ soda_postgres/test_helpers/postgres_data_source_test_helper.py,sha256=Dmf_DF7jOvV0Z0U7qf9SYfga2SJqyubDFxh37tkyqLY,959
5
+ soda_postgres-4.0.5.dist-info/METADATA,sha256=Fg_DRPTMKEnTc2CDH-6EP6z9-ep00b4Ornn2FBtAy7k,155
6
+ soda_postgres-4.0.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
+ soda_postgres-4.0.5.dist-info/entry_points.txt,sha256=dRkRwpHNgnKteGULW3ztKCOs7DIvoYM0ZRiS1KYRy4s,139
8
+ soda_postgres-4.0.5.dist-info/top_level.txt,sha256=MwpCelT9xmFrou_LcF_IoKYvNimCYQnHtWr0J5Lhy3w,14
9
+ soda_postgres-4.0.5.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [soda.plugins.data_source.postgres]
2
+ PostgresDataSourceImpl = soda_postgres.common.data_sources.postgres_data_source:PostgresDataSourceImpl
@@ -0,0 +1 @@
1
+ soda_postgres