tdfs4ds 0.2.4.25__py3-none-any.whl → 0.2.4.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +586 -564
- tdfs4ds/feature_store/feature_data_processing.py +367 -299
- tdfs4ds/feature_store/feature_query_retrieval.py +105 -52
- tdfs4ds/feature_store/feature_store_management.py +268 -285
- tdfs4ds/process_store/process_followup.py +113 -2
- tdfs4ds/process_store/process_query_administration.py +1 -1
- tdfs4ds/process_store/process_registration_management.py +67 -55
- tdfs4ds/process_store/process_store_catalog_management.py +2 -2
- tdfs4ds/utils/filter_management.py +521 -138
- tdfs4ds/utils/query_management.py +18 -40
- tdfs4ds/utils/time_management.py +547 -97
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/RECORD +15 -15
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.41.dist-info}/top_level.txt +0 -0
|
@@ -1,239 +1,622 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import numpy as np # Needed for np.datetime64 handling in get_date_in_the_past
|
|
1
3
|
import teradataml as tdml
|
|
2
4
|
import tdfs4ds
|
|
3
|
-
import
|
|
5
|
+
from tdfs4ds import logger, logger_safe
|
|
6
|
+
import json
|
|
4
7
|
|
|
5
8
|
|
|
6
|
-
def get_hidden_table_name(
|
|
7
|
-
|
|
9
|
+
def get_hidden_table_name(schema_name, view_name):
|
|
10
|
+
"""
|
|
11
|
+
Return the backing 'hidden' table name for a public view/table.
|
|
8
12
|
|
|
13
|
+
Args:
|
|
14
|
+
table_name (str): Public-facing table/view name.
|
|
9
15
|
|
|
10
|
-
|
|
16
|
+
Returns:
|
|
17
|
+
str: The corresponding hidden table name (suffix '_HIDDEN').
|
|
11
18
|
"""
|
|
12
|
-
Manages dynamic filtering on a database table by creating and maintaining a view based on specified filter criteria.
|
|
13
19
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
try:
|
|
21
|
+
return tdfs4ds.utils.lineage.get_ddl(schema_name=schema_name, view_name=view_name).split('\n')[-2].split('.')[1]
|
|
22
|
+
except Exception as e:
|
|
23
|
+
logger_safe("warning", "Failed to extract hidden table name from DDL; defaulting to suffix method.", e)
|
|
24
|
+
return view_name + "_HIDDEN"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class FilterManager:
|
|
28
|
+
"""
|
|
29
|
+
A utility for managing dynamic, versioned filter sets as database-backed views.
|
|
30
|
+
|
|
31
|
+
The FilterManager enables lightweight scenario management by storing multiple
|
|
32
|
+
filter definitions in a hidden Teradata table and exposing a public view that
|
|
33
|
+
dynamically switches between them by `filter_id`. Each row in the hidden table
|
|
34
|
+
represents a complete filter configuration. The active configuration is
|
|
35
|
+
controlled by updating the view definition rather than rewriting table data.
|
|
36
|
+
|
|
37
|
+
Key Features:
|
|
38
|
+
- Store multiple filter states (scenarios) indexed by `filter_id`
|
|
39
|
+
- Switch filter states instantly by updating a view
|
|
40
|
+
- Optionally include time-based slicing using a `BUSINESS_DATE` column
|
|
41
|
+
- Clone filters between managers (soft or hard clone modes)
|
|
42
|
+
- Prune obsolete filters to control table size
|
|
43
|
+
- Retrieve current and historical filter definitions
|
|
44
|
+
|
|
45
|
+
Workflow Overview:
|
|
46
|
+
1. Create a `FilterManager` pointing to a target view name.
|
|
47
|
+
2. Load one or more filter definitions using `load_filter()`.
|
|
48
|
+
3. Switch active filters using `update(filter_id)`.
|
|
49
|
+
4. Inspect the active filter via `display()` or view DDL.
|
|
50
|
+
5. Optionally prune or clone filters as needed.
|
|
51
|
+
|
|
52
|
+
How It Works Internally:
|
|
53
|
+
- A hidden table named `<view_name>_HIDDEN` stores filter definitions.
|
|
54
|
+
- A Teradata view named `<view_name>` exposes only the *active* filter row.
|
|
55
|
+
- Each filter automatically receives a sequential `filter_id`
|
|
56
|
+
(`ROW_NUMBER()` ordering ensures deterministic assignment).
|
|
57
|
+
- If time-based filtering is used via `time_column`, a `BUSINESS_DATE`
|
|
58
|
+
column is added and projected in all operations.
|
|
59
|
+
|
|
60
|
+
Parameters:
|
|
61
|
+
table_name (str): Public view name to manage or create.
|
|
62
|
+
schema_name (str): Teradata schema where artifacts will be created.
|
|
63
|
+
filter_id_name (str, optional): Name of the filter ID column. Defaults to `'filter_id'`.
|
|
64
|
+
time_column (str, optional): Optional name of a timestamp column from input DataFrames
|
|
65
|
+
that maps to a `BUSINESS_DATE` column for time-aware filters.
|
|
17
66
|
|
|
18
67
|
Attributes:
|
|
19
|
-
schema_name (str):
|
|
20
|
-
table_name (str):
|
|
21
|
-
view_name (str):
|
|
22
|
-
filter_id_name (str):
|
|
23
|
-
nb_filters (int):
|
|
24
|
-
col_names (list):
|
|
25
|
-
time_filtering (bool):
|
|
68
|
+
schema_name (str): Target schema for view and hidden table.
|
|
69
|
+
table_name (str): Name of hidden table storing filters (auto-suffixed with `_HIDDEN`).
|
|
70
|
+
view_name (str): Name of public view pointing to current filter.
|
|
71
|
+
filter_id_name (str): Column containing filter ID.
|
|
72
|
+
nb_filters (int | None): Number of stored filters (None until initialized).
|
|
73
|
+
col_names (list[str] | None): Columns projected by the view (data columns only).
|
|
74
|
+
time_filtering (bool | None): True if time-based filtering enabled.
|
|
75
|
+
|
|
76
|
+
Notes:
|
|
77
|
+
- Database objects are only created when `load_filter()` is first called.
|
|
78
|
+
- Safe for iterative pipeline runs—auto-detects existing artifacts.
|
|
79
|
+
- Designed for large production tables and Teradata-native workflows.
|
|
26
80
|
"""
|
|
27
81
|
|
|
28
|
-
def __init__(self, table_name, schema_name, filter_id_name='filter_id', time_column = None):
|
|
29
|
-
"""
|
|
30
|
-
Initializes the FilterManager for managing filtered views.
|
|
31
82
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
83
|
+
def __init__(self, table_name, schema_name, filter_id_name="filter_id", time_column=None):
|
|
84
|
+
"""
|
|
85
|
+
Initialize the FilterManager.
|
|
35
86
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
self.schema_name
|
|
43
|
-
self.
|
|
44
|
-
self.view_name = table_name
|
|
87
|
+
If the hidden table/view already exist, metadata (column names, maximum
|
|
88
|
+
filter id, and time filtering status) are detected and cached. If they do
|
|
89
|
+
not exist yet, attributes are initialized but no objects are created until
|
|
90
|
+
`load_filter()` is called.
|
|
91
|
+
"""
|
|
92
|
+
self.schema_name = schema_name
|
|
93
|
+
self.table_name = get_hidden_table_name(schema_name=schema_name, view_name=table_name)
|
|
94
|
+
self.view_name = table_name
|
|
45
95
|
self.filter_id_name = filter_id_name
|
|
46
|
-
self.nb_filters
|
|
47
|
-
self.col_names
|
|
96
|
+
self.nb_filters = None
|
|
97
|
+
self.col_names = None
|
|
48
98
|
self.time_filtering = None
|
|
99
|
+
self._init_time_column = time_column # Remember user hint for later
|
|
100
|
+
|
|
101
|
+
logger_safe(
|
|
102
|
+
"debug",
|
|
103
|
+
"Initializing FilterManager | schema_name=%s | view_name=%s | table_name=%s | filter_id_name=%s",
|
|
104
|
+
self.schema_name, self.view_name, self.table_name, self.filter_id_name
|
|
105
|
+
)
|
|
49
106
|
|
|
50
107
|
if self._exists():
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
108
|
+
logger_safe(
|
|
109
|
+
"info",
|
|
110
|
+
"Existing filter artifacts detected | schema_name=%s | view_name=%s | table_name=%s",
|
|
111
|
+
self.schema_name, self.view_name, self.table_name
|
|
112
|
+
)
|
|
113
|
+
|
|
56
114
|
df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
57
|
-
self.filter_id_name = df.columns[0]
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
115
|
+
self.filter_id_name = df.columns[0] # First column is assumed to be filter id
|
|
116
|
+
|
|
117
|
+
self.nb_filters = tdml.execute_sql(
|
|
118
|
+
f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
119
|
+
).fetchall()[0][0]
|
|
120
|
+
|
|
61
121
|
self.time_filtering = self._istimefiltering()
|
|
62
|
-
if self.time_filtering:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
122
|
+
self.col_names = df.columns[2:] if self.time_filtering else df.columns[1:]
|
|
123
|
+
|
|
124
|
+
logger_safe(
|
|
125
|
+
"debug",
|
|
126
|
+
"Detected existing configuration | filter_id_name=%s | nb_filters=%s | time_filtering=%s | col_names=%s",
|
|
127
|
+
self.filter_id_name, self.nb_filters, self.time_filtering, list(self.col_names)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
logger_safe(
|
|
132
|
+
"info",
|
|
133
|
+
"No existing filter artifacts found; will be created by load_filter() | schema_name=%s | view_name=%s",
|
|
134
|
+
self.schema_name, self.view_name
|
|
135
|
+
)
|
|
136
|
+
|
|
66
137
|
|
|
67
138
|
def _istimefiltering(self):
|
|
68
|
-
"""
|
|
139
|
+
"""
|
|
140
|
+
Determine if the hidden table includes a `BUSINESS_DATE` column.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
bool: True if the hidden table contains `BUSINESS_DATE`, else False.
|
|
144
|
+
"""
|
|
69
145
|
df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
70
|
-
|
|
146
|
+
has_time = "BUSINESS_DATE" in df.columns
|
|
147
|
+
logger.debug("Time filtering detected: %s", has_time)
|
|
148
|
+
return has_time
|
|
71
149
|
|
|
72
150
|
def _exists(self):
|
|
73
|
-
"""Check if both table and view exist."""
|
|
74
|
-
existing_tables = [x.lower().replace('"', '') for x in
|
|
75
|
-
tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
|
|
76
|
-
return self.view_name.lower() in existing_tables or self.table_name.lower() in existing_tables
|
|
77
|
-
def load_filter(self, df, primary_index=None, time_column = None):
|
|
78
151
|
"""
|
|
79
|
-
|
|
152
|
+
Check if either the public view or hidden table already exist in the schema.
|
|
80
153
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
154
|
+
Returns:
|
|
155
|
+
bool: True if the hidden table or view exists, else False.
|
|
156
|
+
"""
|
|
157
|
+
existing_tables = [
|
|
158
|
+
x.lower().replace('"', "") for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
|
|
159
|
+
]
|
|
160
|
+
exists = self.view_name.lower() in existing_tables or self.table_name.lower() in existing_tables
|
|
161
|
+
logger.debug("Existence check", extra={"exists": exists, "objects": existing_tables})
|
|
162
|
+
return exists
|
|
163
|
+
|
|
164
|
+
def load_filter(self, df, primary_index=None, time_column=None):
|
|
165
|
+
"""
|
|
166
|
+
Load a new filter set into the hidden table and (re)point the public view at filter_id=1.
|
|
167
|
+
|
|
168
|
+
Each row in `df` is assigned a deterministic `filter_id` based on ROW_NUMBER() over the
|
|
169
|
+
ordered set of its columns (plus `BUSINESS_DATE` when time filtering is enabled). If
|
|
170
|
+
`time_column` is provided, values from that column are copied into `BUSINESS_DATE` and the
|
|
171
|
+
view will include that time dimension.
|
|
84
172
|
|
|
85
173
|
Args:
|
|
86
|
-
df (DataFrame):
|
|
87
|
-
primary_index (list, optional):
|
|
88
|
-
|
|
174
|
+
df (DataFrame): Incoming filter definitions (one row per filter).
|
|
175
|
+
primary_index (list[str], optional): Primary index columns for the hidden table.
|
|
176
|
+
Defaults to ['filter_id'] when omitted.
|
|
177
|
+
time_column (str, optional): Name of the time column in `df` to map into `BUSINESS_DATE`.
|
|
178
|
+
If provided, time-based filtering is enabled.
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
ValueError: If `time_column` is provided but not present in `df`.
|
|
89
182
|
"""
|
|
183
|
+
logger.info("Loading filters", extra={"rows": df.shape[0], "time_column": time_column})
|
|
90
184
|
|
|
91
185
|
if time_column and time_column not in df.columns:
|
|
186
|
+
logger.error("Specified time_column not found in DataFrame.", extra={"time_column": time_column})
|
|
92
187
|
raise ValueError(f"Specified time_column '{time_column}' not found in DataFrame columns.")
|
|
93
188
|
|
|
189
|
+
# Determine projection and ordering columns
|
|
94
190
|
if time_column is None:
|
|
191
|
+
self.time_filtering = False
|
|
95
192
|
self.col_names = df.columns
|
|
96
|
-
all_columns
|
|
97
|
-
collect_stats
|
|
193
|
+
all_columns = ",".join(df.columns)
|
|
194
|
+
collect_stats = ",".join([f"COLUMN ({c})" for c in df.columns])
|
|
98
195
|
else:
|
|
99
196
|
self.time_filtering = True
|
|
100
|
-
# check if time_colum is part of the column
|
|
101
197
|
self.col_names = [c for c in df.columns if c != time_column]
|
|
102
|
-
all_columns
|
|
103
|
-
collect_stats
|
|
104
|
-
|
|
105
|
-
|
|
198
|
+
all_columns = ",".join(["BUSINESS_DATE"] + self.col_names)
|
|
199
|
+
collect_stats = ",".join([f"COLUMN ({c})" for c in ["BUSINESS_DATE"] + self.col_names])
|
|
106
200
|
|
|
201
|
+
logger.debug(
|
|
202
|
+
"Computed load_filter columns",
|
|
203
|
+
extra={"time_filtering": self.time_filtering, "col_names": list(self.col_names), "all_columns": all_columns},
|
|
204
|
+
)
|
|
107
205
|
|
|
206
|
+
# Build the filter rows with an ordered ROW_NUMBER()
|
|
108
207
|
if time_column is None:
|
|
109
|
-
df_filter = df.assign(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
208
|
+
df_filter = df.assign(
|
|
209
|
+
**{
|
|
210
|
+
self.filter_id_name: tdml.sqlalchemy.literal_column(
|
|
211
|
+
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {all_columns})", tdml.BIGINT()
|
|
212
|
+
)
|
|
213
|
+
}
|
|
214
|
+
)[[self.filter_id_name] + list(df.columns)]
|
|
114
215
|
else:
|
|
115
|
-
df_filter = df.assign(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
216
|
+
df_filter = df.assign(
|
|
217
|
+
**{
|
|
218
|
+
self.filter_id_name: tdml.sqlalchemy.literal_column(
|
|
219
|
+
f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {all_columns})", tdml.BIGINT()
|
|
220
|
+
),
|
|
221
|
+
"BUSINESS_DATE": df[time_column],
|
|
222
|
+
}
|
|
223
|
+
)[[self.filter_id_name, "BUSINESS_DATE"] + self.col_names]
|
|
224
|
+
|
|
225
|
+
# Persist to hidden table
|
|
122
226
|
if primary_index is None:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
227
|
+
primary_index = [self.filter_id_name]
|
|
228
|
+
|
|
229
|
+
logger.debug("Writing hidden table", extra={"primary_index": primary_index})
|
|
230
|
+
df_filter.to_sql(
|
|
231
|
+
table_name=self.table_name,
|
|
232
|
+
schema_name=self.schema_name,
|
|
233
|
+
if_exists="replace",
|
|
234
|
+
primary_index=primary_index,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Create/replace public view with filter_id = 1
|
|
238
|
+
view_sql = f"""
|
|
133
239
|
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
134
240
|
SEL {all_columns}
|
|
135
241
|
FROM {self.schema_name}.{self.table_name}
|
|
136
242
|
WHERE {self.filter_id_name} = 1
|
|
137
243
|
"""
|
|
244
|
+
logger.debug("Replacing view for filter_id=1")
|
|
245
|
+
tdml.execute_sql(view_sql)
|
|
138
246
|
|
|
139
|
-
# Collect stats
|
|
140
|
-
|
|
141
|
-
query_collect_stats = f"""
|
|
247
|
+
# Collect stats to help the optimizer
|
|
248
|
+
stats_sql = f"""
|
|
142
249
|
COLLECT STATISTICS USING NO SAMPLE AND NO THRESHOLD
|
|
143
|
-
COLUMN (
|
|
250
|
+
COLUMN ({self.filter_id_name})
|
|
144
251
|
, {collect_stats}
|
|
145
252
|
ON {self.schema_name}.{self.table_name}
|
|
146
253
|
"""
|
|
147
|
-
|
|
148
|
-
tdml.execute_sql(
|
|
254
|
+
logger.debug("Collecting statistics on hidden table")
|
|
255
|
+
tdml.execute_sql(stats_sql)
|
|
149
256
|
|
|
150
257
|
self.nb_filters = tdml.execute_sql(
|
|
151
|
-
f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
258
|
+
f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
259
|
+
).fetchall()[0][0]
|
|
260
|
+
logger.info("Filters loaded", extra={"nb_filters": self.nb_filters})
|
|
152
261
|
|
|
153
262
|
def _drop(self):
|
|
154
263
|
"""
|
|
155
|
-
|
|
264
|
+
Drop the public view and (optionally) the hidden table.
|
|
156
265
|
|
|
157
|
-
|
|
266
|
+
If this manager does not own the hidden table (default), only the view is dropped.
|
|
158
267
|
"""
|
|
159
|
-
# Drop the
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
268
|
+
# Drop the view (in our schema)
|
|
269
|
+
existing = [x.lower().replace('"', "") for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
|
|
270
|
+
if self.view_name.lower() in existing:
|
|
271
|
+
logger.warning("Dropping view.", extra={"schema_name": self.schema_name, "view_name": self.view_name})
|
|
272
|
+
tdml.db_drop_view(schema_name=self.schema_name, table_name=self.view_name)
|
|
273
|
+
else:
|
|
274
|
+
logger.info("View not found; nothing to drop.", extra={"schema_name": self.schema_name, "view_name": self.view_name})
|
|
275
|
+
|
|
276
|
+
# Drop the hidden table only if we own it
|
|
277
|
+
if getattr(self, "_owns_hidden", False):
|
|
278
|
+
schema_tbl = getattr(self, "schema_name_for_table", self.schema_name)
|
|
279
|
+
logger.warning(
|
|
280
|
+
"Dropping hidden table (ownership acknowledged).",
|
|
281
|
+
extra={"schema_name": schema_tbl, "table_name": self.table_name},
|
|
282
|
+
)
|
|
283
|
+
tdml.db_drop_table(schema_name=schema_tbl, table_name=self.table_name)
|
|
284
|
+
else:
|
|
285
|
+
logger.info("Hidden table not dropped (not owned).")
|
|
286
|
+
|
|
163
287
|
|
|
164
288
|
def update(self, filter_id):
|
|
165
289
|
"""
|
|
166
|
-
|
|
290
|
+
Repoint the public view to a different filter id.
|
|
167
291
|
|
|
168
292
|
Args:
|
|
169
|
-
filter_id (int):
|
|
293
|
+
filter_id (int): Target filter id to apply.
|
|
294
|
+
|
|
295
|
+
Raises:
|
|
296
|
+
ValueError: If filter artifacts do not exist yet.
|
|
170
297
|
"""
|
|
298
|
+
|
|
299
|
+
|
|
171
300
|
if not self._exists():
|
|
172
|
-
|
|
301
|
+
logger_safe("error", "Filter artifacts not initialized.")
|
|
302
|
+
raise ValueError("The filter has not been initialized with load_filter() or has been deleted.")
|
|
173
303
|
|
|
174
304
|
if self.time_filtering:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
SEL {','.join(['BUSINESS_DATE']+self.col_names)}
|
|
178
|
-
FROM {self.schema_name}.{self.table_name}
|
|
179
|
-
WHERE {self.filter_id_name} = {filter_id}
|
|
180
|
-
"""
|
|
181
|
-
|
|
305
|
+
select_cols_str = ["BUSINESS_DATE"] + list(self.col_names)
|
|
306
|
+
select_cols = ",".join(["BUSINESS_DATE"] + list(self.col_names))
|
|
182
307
|
else:
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
308
|
+
select_cols_str = list(self.col_names)
|
|
309
|
+
select_cols = ",".join(self.col_names)
|
|
310
|
+
|
|
311
|
+
query = f"""
|
|
312
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
313
|
+
SEL {select_cols}
|
|
314
|
+
FROM {self.schema_name}.{self.table_name}
|
|
315
|
+
WHERE {self.filter_id_name} = {filter_id}
|
|
316
|
+
"""
|
|
317
|
+
logger_safe("info", "Updating active filter | %s", ','.join([f"{c}:{v}" for c,v in zip(select_cols_str, tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall()[0])]))
|
|
318
|
+
|
|
319
|
+
if getattr(tdfs4ds, "DEBUG_MODE", False):
|
|
320
|
+
logger_safe("debug", "Replacing view with new filter:\n%s", query)
|
|
321
|
+
|
|
192
322
|
tdml.execute_sql(query)
|
|
323
|
+
logger_safe("debug", "View %s.%s updated to filter_id=%s", self.schema_name, self.view_name, filter_id)
|
|
324
|
+
|
|
193
325
|
|
|
194
326
|
def display(self):
|
|
195
327
|
"""
|
|
196
|
-
|
|
328
|
+
Retrieve the current view contents as a `teradataml.DataFrame`.
|
|
197
329
|
|
|
198
330
|
Returns:
|
|
199
|
-
DataFrame:
|
|
331
|
+
teradataml.DataFrame: Rows projected by the public view (current filter).
|
|
200
332
|
"""
|
|
333
|
+
logger.debug("Fetching current view contents")
|
|
201
334
|
return tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name))
|
|
202
335
|
|
|
203
336
|
def get_all_filters(self):
|
|
337
|
+
"""
|
|
338
|
+
Retrieve all filter rows from the hidden table.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
teradataml.DataFrame: Full set of stored filters.
|
|
342
|
+
"""
|
|
343
|
+
logger.debug("Fetching all filters from hidden table")
|
|
204
344
|
return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
205
345
|
|
|
206
346
|
def get_date_in_the_past(self):
|
|
207
347
|
"""
|
|
208
|
-
|
|
348
|
+
Return the earliest business date/time from the *current view*.
|
|
349
|
+
|
|
350
|
+
The method reads the first `BUSINESS_DATE` value from the current view
|
|
351
|
+
and normalizes it to a `%Y-%m-%d %H:%M:%S` string. Requires that time
|
|
352
|
+
filtering is enabled.
|
|
209
353
|
|
|
210
354
|
Returns:
|
|
211
|
-
str:
|
|
355
|
+
str: Earliest datetime as formatted string ('YYYY-MM-DD HH:MM:SS').
|
|
356
|
+
|
|
357
|
+
Raises:
|
|
358
|
+
ValueError: If time-based filtering is not enabled.
|
|
212
359
|
"""
|
|
360
|
+
logger.debug("Computing earliest BUSINESS_DATE from current view")
|
|
213
361
|
|
|
214
|
-
if self._istimefiltering()
|
|
215
|
-
|
|
362
|
+
if not self._istimefiltering():
|
|
363
|
+
logger.error("Time filtering requested but not enabled.")
|
|
364
|
+
raise ValueError("The filter manager is not filtering on time.")
|
|
216
365
|
|
|
217
|
-
# '9999-01-01 00:00:00'
|
|
218
366
|
date_obj = self.display().to_pandas().reset_index().BUSINESS_DATE.values[0]
|
|
219
367
|
|
|
220
368
|
if isinstance(date_obj, datetime.datetime):
|
|
221
|
-
# print("temp is a datetime.datetime object")
|
|
222
369
|
datetime_obj = date_obj
|
|
223
370
|
elif isinstance(date_obj, datetime.date):
|
|
224
|
-
# print("temp is a datetime.date object")
|
|
225
|
-
# Convert date object to a datetime object at midnight (00:00:00)
|
|
226
371
|
datetime_obj = datetime.datetime.combine(date_obj, datetime.time.min)
|
|
227
372
|
elif isinstance(date_obj, np.datetime64):
|
|
228
|
-
#
|
|
229
|
-
datetime_obj = date_obj.astype(
|
|
373
|
+
# normalize to datetime (ms precision to avoid timezone pitfalls)
|
|
374
|
+
datetime_obj = date_obj.astype("datetime64[ms]").astype(datetime.datetime)
|
|
230
375
|
else:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
376
|
+
logger.error(
|
|
377
|
+
"Unsupported BUSINESS_DATE type.",
|
|
378
|
+
extra={"value": str(date_obj), "type": str(type(date_obj))},
|
|
379
|
+
)
|
|
380
|
+
raise TypeError(f"Unsupported BUSINESS_DATE type: {type(date_obj)}")
|
|
235
381
|
|
|
236
|
-
# Convert datetime object to string
|
|
237
382
|
output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
|
|
383
|
+
logger.debug("Earliest date computed", extra={"earliest": output_string})
|
|
384
|
+
return output_string
|
|
385
|
+
|
|
386
|
+
def get_current_filterid(self):
|
|
387
|
+
"""
|
|
388
|
+
Extract the currently active filter id from the view DDL.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
int: Filter id parsed from the view's definition.
|
|
392
|
+
|
|
393
|
+
Raises:
|
|
394
|
+
ValueError: If the filter id cannot be parsed from the DDL.
|
|
395
|
+
"""
|
|
396
|
+
logger.debug("Reading view DDL to extract current filter id")
|
|
397
|
+
txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
|
|
398
|
+
try:
|
|
399
|
+
current = int(txt.split("\n")[-1].split("=")[1])
|
|
400
|
+
logger.info("Current filter id extracted", extra={"filter_id": current})
|
|
401
|
+
return current
|
|
402
|
+
except Exception as exc:
|
|
403
|
+
logger.exception("Failed to parse filter id from view DDL")
|
|
404
|
+
raise ValueError("Unable to parse current filter id from view DDL.") from exc
|
|
405
|
+
|
|
406
|
+
def print_view_ddl(self):
|
|
407
|
+
"""
|
|
408
|
+
Log the view definition (DDL) for troubleshooting/traceability.
|
|
409
|
+
"""
|
|
410
|
+
ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
|
|
411
|
+
logger.info("View DDL:\n%s", ddl)
|
|
412
|
+
|
|
413
|
+
def prune_filter(self, filter_id=None):
|
|
414
|
+
"""
|
|
415
|
+
Remove all filters with ids lower than `filter_id` and renumber remaining ones.
|
|
416
|
+
|
|
417
|
+
If `filter_id` is omitted, the method uses the current filter id from the view.
|
|
418
|
+
After pruning, filter ids are normalized so the smallest remaining id becomes 1,
|
|
419
|
+
and the public view is repointed to filter_id=1.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
filter_id (int, optional): Threshold id; rows with `{filter_id_name} < filter_id` are deleted.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
FilterManager: Self, to allow method chaining.
|
|
426
|
+
"""
|
|
427
|
+
if filter_id is None:
|
|
428
|
+
filter_id = self.get_current_filterid()
|
|
429
|
+
|
|
430
|
+
logger.info("Pruning filters", extra={"threshold_filter_id": filter_id})
|
|
431
|
+
|
|
432
|
+
delete_sql = f"DELETE {self.schema_name}.{self.table_name} WHERE {self.filter_id_name} < {filter_id}"
|
|
433
|
+
update_sql = f"UPDATE {self.schema_name}.{self.table_name} SET {self.filter_id_name} = {self.filter_id_name} - {filter_id} + 1"
|
|
434
|
+
|
|
435
|
+
logger.debug("Executing prune delete", extra={"sql": delete_sql})
|
|
436
|
+
tdml.execute_sql(delete_sql)
|
|
437
|
+
|
|
438
|
+
logger.debug("Executing prune renumber", extra={"sql": update_sql})
|
|
439
|
+
tdml.execute_sql(update_sql)
|
|
440
|
+
|
|
441
|
+
self.update(1)
|
|
442
|
+
logger.info("Prune complete; active filter set to 1.")
|
|
443
|
+
return self
|
|
444
|
+
|
|
445
|
+
def clone_filter(self, source_filtermanager, filter_id_to_apply=1, take_ownership=False, clone_mode="soft", if_exists="error"):
|
|
446
|
+
"""
|
|
447
|
+
Clone filter definitions from another FilterManager.
|
|
448
|
+
|
|
449
|
+
Supports:
|
|
450
|
+
- soft clone (default): just point to source _HIDDEN table
|
|
451
|
+
- hard clone: copy the source _HIDDEN table and own the copy
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
source_filtermanager (FilterManager): Source FilterManager to clone.
|
|
455
|
+
filter_id_to_apply (int, optional): Filter ID to activate. Default: 1.
|
|
456
|
+
take_ownership (bool, optional): Whether this manager owns the cloned table (soft mode only).
|
|
457
|
+
clone_mode (str, optional): "soft" or "hard". Default: "soft".
|
|
458
|
+
if_exists (str, optional): Behavior if target hidden table already exists
|
|
459
|
+
- "error" (default): raise an exception
|
|
460
|
+
- "replace": drop and recreate
|
|
461
|
+
- "skip": reuse existing table
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
FilterManager
|
|
465
|
+
|
|
466
|
+
Raises:
|
|
467
|
+
ValueError: On invalid clone_mode or missing source.
|
|
468
|
+
"""
|
|
469
|
+
if clone_mode not in ("soft", "hard"):
|
|
470
|
+
raise ValueError("clone_mode must be 'soft' or 'hard'")
|
|
471
|
+
if if_exists not in ("error", "replace", "skip"):
|
|
472
|
+
raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
|
|
473
|
+
|
|
474
|
+
src_schema = source_filtermanager.schema_name
|
|
475
|
+
src_hidden = source_filtermanager.table_name
|
|
476
|
+
|
|
477
|
+
logger.info(
|
|
478
|
+
"Cloning filter",
|
|
479
|
+
extra={
|
|
480
|
+
"mode": clone_mode,
|
|
481
|
+
"source": f"{src_schema}.{src_hidden}",
|
|
482
|
+
"target_view": f"{self.schema_name}.{self.view_name}"
|
|
483
|
+
},
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
# Validate source exists
|
|
487
|
+
existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
|
|
488
|
+
if src_hidden.lower() not in existing_src:
|
|
489
|
+
raise ValueError(f"Source hidden filter table {src_schema}.{src_hidden} does not exist.")
|
|
490
|
+
|
|
491
|
+
if clone_mode == "hard":
|
|
492
|
+
# Hard clone requires a NEW hidden table in this schema
|
|
493
|
+
self.table_name = get_hidden_table_name(schema_name=self.schema_name, view_name=self.view_name)
|
|
494
|
+
existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
|
|
495
|
+
|
|
496
|
+
# Handle table existence
|
|
497
|
+
if self.table_name.lower() in existing_dest:
|
|
498
|
+
if if_exists == "error":
|
|
499
|
+
raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
|
|
500
|
+
elif if_exists == "replace":
|
|
501
|
+
logger.warning(f"Replacing existing table {self.schema_name}.{self.table_name}")
|
|
502
|
+
tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
|
|
503
|
+
elif if_exists == "skip":
|
|
504
|
+
logger.info(f"Skipping clone, using existing {self.schema_name}.{self.table_name}")
|
|
505
|
+
if self.table_name.lower() not in existing_dest or if_exists == "replace":
|
|
506
|
+
# Create cloned table
|
|
507
|
+
logger.info(f"Creating cloned table {self.schema_name}.{self.table_name}")
|
|
508
|
+
create_sql = f"""
|
|
509
|
+
CREATE TABLE {self.schema_name}.{self.table_name} AS
|
|
510
|
+
(SELECT * FROM {src_schema}.{src_hidden})
|
|
511
|
+
WITH DATA
|
|
512
|
+
"""
|
|
513
|
+
tdml.execute_sql(create_sql)
|
|
514
|
+
|
|
515
|
+
self._owns_hidden = True # Hard clones always own their copy
|
|
516
|
+
target_schema = self.schema_name
|
|
517
|
+
|
|
518
|
+
else:
|
|
519
|
+
# Soft clone: link to source
|
|
520
|
+
logger.info("Soft clone: linking to source table")
|
|
521
|
+
self.table_name = src_hidden
|
|
522
|
+
self._owns_hidden = bool(take_ownership)
|
|
523
|
+
target_schema = src_schema # view selects from source schema
|
|
524
|
+
|
|
525
|
+
# Load metadata
|
|
526
|
+
df = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
|
|
527
|
+
self.filter_id_name = df.columns[0]
|
|
528
|
+
self.time_filtering = "BUSINESS_DATE" in df.columns
|
|
529
|
+
self.col_names = df.columns[2:] if self.time_filtering else df.columns[1:]
|
|
530
|
+
self.nb_filters = df.shape[0]
|
|
531
|
+
|
|
532
|
+
# Rebuild view
|
|
533
|
+
select_cols = ",".join((["BUSINESS_DATE"] if self.time_filtering else []) + list(self.col_names))
|
|
534
|
+
view_sql = f"""
|
|
535
|
+
REPLACE VIEW {self.schema_name}.{self.view_name} AS
|
|
536
|
+
SELECT {select_cols}
|
|
537
|
+
FROM {target_schema}.{self.table_name}
|
|
538
|
+
WHERE {self.filter_id_name} = {int(filter_id_to_apply)}
|
|
539
|
+
"""
|
|
540
|
+
tdml.execute_sql(view_sql)
|
|
541
|
+
|
|
542
|
+
logger.info(f"Clone complete → Active filter_id={filter_id_to_apply}")
|
|
543
|
+
return self
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def take_ownership(self):
|
|
547
|
+
"""
|
|
548
|
+
Take ownership of the currently linked hidden filter table.
|
|
549
|
+
|
|
550
|
+
This enables this FilterManager instance to manage (and potentially drop)
|
|
551
|
+
the hidden table via `_drop()` or future maintenance methods.
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
FilterManager: self (for chaining)
|
|
555
|
+
"""
|
|
556
|
+
logger.warning(
|
|
557
|
+
"Ownership taken for hidden table. This manager may now drop or modify it.",
|
|
558
|
+
extra={
|
|
559
|
+
"schema_name": getattr(self, "schema_name_for_table", self.schema_name),
|
|
560
|
+
"table_name": self.table_name
|
|
561
|
+
}
|
|
562
|
+
)
|
|
563
|
+
self._owns_hidden = True
|
|
564
|
+
return self
|
|
565
|
+
|
|
566
|
+
def get_filter_condition(self, output_type: str = "str"):
|
|
567
|
+
"""
|
|
568
|
+
Retrieve the currently applied filter condition from the database.
|
|
569
|
+
|
|
570
|
+
This method executes a SQL query that aggregates the filter columns from a
|
|
571
|
+
filter manager view and returns the result either as a Python dictionary
|
|
572
|
+
or a raw JSON string.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
output_type (str, optional): Format of the returned filter condition.
|
|
576
|
+
- "dict": Returns a Python dictionary (default).
|
|
577
|
+
- "str": Returns a raw JSON string.
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
dict | str | None: The current filter condition in the requested format.
|
|
581
|
+
Returns None if no filter is present.
|
|
582
|
+
|
|
583
|
+
Raises:
|
|
584
|
+
ValueError: If `output_type` is not "dict" or "str".
|
|
585
|
+
RuntimeError: If the SQL execution fails.
|
|
586
|
+
|
|
587
|
+
Example:
|
|
588
|
+
>>> filters = self.get_filter_condition(output_type="dict")
|
|
589
|
+
>>> print(filters)
|
|
590
|
+
{'country': 'US', 'status': 'active'}
|
|
591
|
+
"""
|
|
592
|
+
logger_safe("debug", "Fetching current filter condition from the database")
|
|
593
|
+
|
|
594
|
+
# Build JSON_AGG expression safely
|
|
595
|
+
json_columns = ",".join(self.col_names)
|
|
596
|
+
|
|
597
|
+
query = f"""
|
|
598
|
+
SELECT JSON_AGG({json_columns}) AS applied_filter
|
|
599
|
+
FROM {self.schema_name}.{self.view_name} FILTER_MANAGER
|
|
600
|
+
"""
|
|
601
|
+
|
|
602
|
+
try:
|
|
603
|
+
result = tdml.execute_sql(query).fetchall()
|
|
604
|
+
except Exception as e:
|
|
605
|
+
logger_safe("error", "Failed to execute SQL for filter condition: %s", e)
|
|
606
|
+
raise RuntimeError("Database query failed while fetching filter condition") from e
|
|
607
|
+
|
|
608
|
+
# Handle no result
|
|
609
|
+
if not result or result[0][0] is None:
|
|
610
|
+
logger_safe("info", "No filter conditions found")
|
|
611
|
+
return None
|
|
612
|
+
|
|
613
|
+
json_result = result[0][0]
|
|
614
|
+
|
|
615
|
+
if output_type == "dict":
|
|
616
|
+
logger_safe("debug", "Returning filter as Python dictionary")
|
|
617
|
+
return json.loads(json_result)
|
|
618
|
+
elif output_type == "str":
|
|
619
|
+
logger_safe("debug", "Returning filter as JSON string")
|
|
620
|
+
return json_result
|
|
238
621
|
|
|
239
|
-
|
|
622
|
+
raise ValueError("Invalid output_type. Expected 'dict' or 'str'.")
|