tdfs4ds 0.2.4.26__py3-none-any.whl → 0.2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,239 +1,649 @@
1
+ import datetime
2
+ import numpy as np # Needed for np.datetime64 handling in get_date_in_the_past
1
3
  import teradataml as tdml
2
4
  import tdfs4ds
3
- import datetime
5
+ from tdfs4ds import logger, logger_safe
6
+ import json
7
+
8
+
9
+ def get_hidden_table_name(schema_name, view_name):
10
+ try:
11
+ ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=schema_name, view_name=view_name)
4
12
 
13
+ # Your current parsing is fragile; but keeping your intent:
14
+ backing = ddl.split("\n")[-2].split(".")[1]
15
+ return backing
5
16
 
6
- def get_hidden_table_name(table_name):
7
- return table_name + '_HIDDEN'
17
+ except Exception as e:
18
+ msg = str(e)
19
+
20
+ # Teradata "object does not exist" -> normal for first-time setup
21
+ if "Error 3807" in msg or "does not exist" in msg.lower():
22
+ logger_safe(
23
+ "info",
24
+ "View %s.%s not found; using default hidden table name %s_HIDDEN.",
25
+ schema_name, view_name, view_name
26
+ )
27
+ else:
28
+ logger_safe(
29
+ "warning",
30
+ "Failed to extract hidden table name from DDL; defaulting to suffix method. Error: %s",
31
+ msg.splitlines()[0]
32
+ )
33
+
34
+ return view_name + "_HIDDEN"
8
35
 
9
36
 
10
37
  class FilterManager:
11
38
  """
12
- Manages dynamic filtering on a database table by creating and maintaining a view based on specified filter criteria.
13
-
14
- This class enables dynamic filtering of a Teradata database table, providing methods to create, update, and manage
15
- a view that represents filtered data based on a specific filter ID. It facilitates loading new filters, updating
16
- existing ones, and managing time-based filtering if applicable.
39
+ A utility for managing dynamic, versioned filter sets as database-backed views.
40
+
41
+ The FilterManager enables lightweight scenario management by storing multiple
42
+ filter definitions in a hidden Teradata table and exposing a public view that
43
+ dynamically switches between them by `filter_id`. Each row in the hidden table
44
+ represents a complete filter configuration. The active configuration is
45
+ controlled by updating the view definition rather than rewriting table data.
46
+
47
+ Key Features:
48
+ - Store multiple filter states (scenarios) indexed by `filter_id`
49
+ - Switch filter states instantly by updating a view
50
+ - Optionally include time-based slicing using a `BUSINESS_DATE` column
51
+ - Clone filters between managers (soft or hard clone modes)
52
+ - Prune obsolete filters to control table size
53
+ - Retrieve current and historical filter definitions
54
+
55
+ Workflow Overview:
56
+ 1. Create a `FilterManager` pointing to a target view name.
57
+ 2. Load one or more filter definitions using `load_filter()`.
58
+ 3. Switch active filters using `update(filter_id)`.
59
+ 4. Inspect the active filter via `display()` or view DDL.
60
+ 5. Optionally prune or clone filters as needed.
61
+
62
+ How It Works Internally:
63
+ - A hidden table named `<view_name>_HIDDEN` stores filter definitions.
64
+ - A Teradata view named `<view_name>` exposes only the *active* filter row.
65
+ - Each filter automatically receives a sequential `filter_id`
66
+ (`ROW_NUMBER()` ordering ensures deterministic assignment).
67
+ - If time-based filtering is used via `time_column`, a `BUSINESS_DATE`
68
+ column is added and projected in all operations.
69
+
70
+ Parameters:
71
+ table_name (str): Public view name to manage or create.
72
+ schema_name (str): Teradata schema where artifacts will be created.
73
+ filter_id_name (str, optional): Name of the filter ID column. Defaults to `'filter_id'`.
74
+ time_column (str, optional): Optional name of a timestamp column from input DataFrames
75
+ that maps to a `BUSINESS_DATE` column for time-aware filters.
17
76
 
18
77
  Attributes:
19
- schema_name (str): The schema in the database containing the table and view.
20
- table_name (str): The underlying table in the schema holding the raw data for filtering.
21
- view_name (str): The view representing filtered data based on current filter criteria.
22
- filter_id_name (str): The column identifying different filters. Defaults to 'filter_id'.
23
- nb_filters (int): The count of filters currently defined in the table, updated with filter changes.
24
- col_names (list): List of column names in the table excluding the filter ID and time columns.
25
- time_filtering (bool): Indicates if time-based filtering is enabled based on a 'BUSINESS_DATE' column.
78
+ schema_name (str): Target schema for view and hidden table.
79
+ table_name (str): Name of hidden table storing filters (auto-suffixed with `_HIDDEN`).
80
+ view_name (str): Name of public view pointing to current filter.
81
+ filter_id_name (str): Column containing filter ID.
82
+ nb_filters (int | None): Number of stored filters (None until initialized).
83
+ col_names (list[str] | None): Columns projected by the view (data columns only).
84
+ time_filtering (bool | None): True if time-based filtering enabled.
85
+
86
+ Notes:
87
+ - Database objects are only created when `load_filter()` is first called.
88
+ - Safe for iterative pipeline runs—auto-detects existing artifacts.
89
+ - Designed for large production tables and Teradata-native workflows.
26
90
  """
27
91
 
28
- def __init__(self, table_name, schema_name, filter_id_name='filter_id', time_column = None):
29
- """
30
- Initializes the FilterManager for managing filtered views.
31
92
 
32
- Checks for the existence of the specified table in the schema. If the table exists, the FilterManager
33
- initializes attributes for the column names, filter count, and time-based filtering. If not, provisions
34
- for table creation are set up.
93
+ def __init__(self, table_name, schema_name, filter_id_name="filter_id", time_column=None):
94
+ """
95
+ Initialize the FilterManager.
35
96
 
36
- Args:
37
- table_name (str): Name of the table to manage filters for.
38
- schema_name (str): Name of the schema where the table is located.
39
- filter_id_name (str, optional): Column name used to identify filters. Defaults to 'filter_id'.
40
- time_column (str, optional): Optional time column name for time-based filtering.
41
- """
42
- self.schema_name = schema_name
43
- self.table_name = get_hidden_table_name(table_name)
44
- self.view_name = table_name
97
+ If the hidden table/view already exist, metadata (column names, maximum
98
+ filter id, and time filtering status) are detected and cached. If they do
99
+ not exist yet, attributes are initialized but no objects are created until
100
+ `load_filter()` is called.
101
+ """
102
+ self.schema_name = schema_name
103
+ self.table_name = get_hidden_table_name(schema_name=schema_name, view_name=table_name)
104
+ self.view_name = table_name
45
105
  self.filter_id_name = filter_id_name
46
- self.nb_filters = None
47
- self.col_names = None
106
+ self.nb_filters = None
107
+ self.col_names = None
48
108
  self.time_filtering = None
109
+ self._init_time_column = time_column # Remember user hint for later
110
+
111
+ logger_safe(
112
+ "debug",
113
+ "Initializing FilterManager | schema_name=%s | view_name=%s | table_name=%s | filter_id_name=%s",
114
+ self.schema_name, self.view_name, self.table_name, self.filter_id_name
115
+ )
49
116
 
50
117
  if self._exists():
51
- if tdfs4ds.DEBUG_MODE:
52
- print('filter exists: ',[x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values if
53
- x.lower().replace('"', '') == self.view_name.lower()])
54
- print('schema_name:', self.schema_name)
55
- print('table_name:', self.table_name)
118
+ logger_safe(
119
+ "info",
120
+ "Existing filter artifacts detected | schema_name=%s | view_name=%s | table_name=%s",
121
+ self.schema_name, self.view_name, self.table_name
122
+ )
123
+
56
124
  df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
57
- self.filter_id_name = df.columns[0]
58
- self.nb_filters = tdml.execute_sql(
59
- f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}").fetchall()[
60
- 0][0]
125
+ self.filter_id_name = df.columns[0] # First column is assumed to be filter id
126
+
127
+ self.nb_filters = tdml.execute_sql(
128
+ f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}"
129
+ ).fetchall()[0][0]
130
+
61
131
  self.time_filtering = self._istimefiltering()
62
- if self.time_filtering:
63
- self.col_names = df.columns[2::]
64
- else:
65
- self.col_names = df.columns[1::]
132
+ self.col_names = df.columns[2:] if self.time_filtering else df.columns[1:]
133
+
134
+ logger_safe(
135
+ "debug",
136
+ "Detected existing configuration | filter_id_name=%s | nb_filters=%s | time_filtering=%s | col_names=%s",
137
+ self.filter_id_name, self.nb_filters, self.time_filtering, list(self.col_names)
138
+ )
139
+
140
+ else:
141
+ logger_safe(
142
+ "info",
143
+ "No existing filter artifacts found; will be created by load_filter() | schema_name=%s | view_name=%s",
144
+ self.schema_name, self.view_name
145
+ )
146
+
66
147
 
67
148
  def _istimefiltering(self):
68
- """Check if the table has a 'BUSINESS_DATE' column for time-based filtering."""
149
+ """
150
+ Determine if the hidden table includes a `BUSINESS_DATE` column.
151
+
152
+ Returns:
153
+ bool: True if the hidden table contains `BUSINESS_DATE`, else False.
154
+ """
69
155
  df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
70
- return 'BUSINESS_DATE' in df.columns
156
+ has_time = "BUSINESS_DATE" in df.columns
157
+ logger.debug("Time filtering detected: %s", has_time)
158
+ return has_time
71
159
 
72
160
  def _exists(self):
73
- """Check if both table and view exist."""
74
- existing_tables = [x.lower().replace('"', '') for x in
75
- tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
76
- return self.view_name.lower() in existing_tables or self.table_name.lower() in existing_tables
77
- def load_filter(self, df, primary_index=None, time_column = None):
78
161
  """
79
- Loads a new filter into the table and updates the view to reflect this filter.
162
+ Check if either the public view or hidden table already exist in the schema.
80
163
 
81
- This method takes a DataFrame as input, assigns filter IDs to each row, and updates or replaces the table
82
- and view to reflect the new filter configuration. If `time_column` is specified and present in `df`,
83
- it will be used in time-based filtering logic. Raises a ValueError if `time_column` is specified but not found in `df`.
164
+ Returns:
165
+ bool: True if the hidden table or view exists, else False.
166
+ """
167
+ existing_tables = [
168
+ x.lower().replace('"', "") for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
169
+ ]
170
+ exists = self.view_name.lower() in existing_tables or self.table_name.lower() in existing_tables
171
+ logger.debug("Existence check", extra={"exists": exists, "objects": existing_tables})
172
+ return exists
173
+
174
+ def load_filter(self, df, primary_index=None, time_column=None):
175
+ """
176
+ Load a new filter set into the hidden table and (re)point the public view at filter_id=1.
177
+
178
+ Each row in `df` is assigned a deterministic `filter_id` based on ROW_NUMBER() over the
179
+ ordered set of its columns (plus `BUSINESS_DATE` when time filtering is enabled). If
180
+ `time_column` is provided, values from that column are copied into `BUSINESS_DATE` and the
181
+ view will include that time dimension.
84
182
 
85
183
  Args:
86
- df (DataFrame): DataFrame containing the new filter configuration.
87
- primary_index (list, optional): List of primary index columns for the table. Defaults to `['filter_id']`.
88
- time_column (str, optional): Column name used for time-based filtering, if applicable.
184
+ df (DataFrame): Incoming filter definitions (one row per filter).
185
+ primary_index (list[str], optional): Primary index columns for the hidden table.
186
+ Defaults to ['filter_id'] when omitted.
187
+ time_column (str, optional): Name of the time column in `df` to map into `BUSINESS_DATE`.
188
+ If provided, time-based filtering is enabled.
189
+
190
+ Raises:
191
+ ValueError: If `time_column` is provided but not present in `df`.
89
192
  """
193
+ logger.info("Loading filters", extra={"rows": df.shape[0], "time_column": time_column})
90
194
 
91
195
  if time_column and time_column not in df.columns:
196
+ logger.error("Specified time_column not found in DataFrame.", extra={"time_column": time_column})
92
197
  raise ValueError(f"Specified time_column '{time_column}' not found in DataFrame columns.")
93
198
 
199
+ # Determine projection and ordering columns
94
200
  if time_column is None:
201
+ self.time_filtering = False
95
202
  self.col_names = df.columns
96
- all_columns = ','.join(df.columns)
97
- collect_stats = ','.join([f'COLUMN ({c}) \n' for c in df.columns])
203
+ all_columns = ",".join(df.columns)
204
+ collect_stats = ",".join([f"COLUMN ({c})" for c in df.columns])
98
205
  else:
99
206
  self.time_filtering = True
100
- # check if time_colum is part of the column
101
207
  self.col_names = [c for c in df.columns if c != time_column]
102
- all_columns = ','.join(['BUSINESS_DATE'] + [c for c in df.columns if c != time_column])
103
- collect_stats = ','.join([f'COLUMN ({c})' for c in ['BUSINESS_DATE'] + [c for c in df.columns if c != time_column]])
104
-
105
-
208
+ all_columns = ",".join(["BUSINESS_DATE"] + self.col_names)
209
+ collect_stats = ",".join([f"COLUMN ({c})" for c in ["BUSINESS_DATE"] + self.col_names])
106
210
 
211
+ logger.debug(
212
+ "Computed load_filter columns",
213
+ extra={"time_filtering": self.time_filtering, "col_names": list(self.col_names), "all_columns": all_columns},
214
+ )
107
215
 
216
+ # Build the filter rows with an ordered ROW_NUMBER()
108
217
  if time_column is None:
109
- df_filter = df.assign(**{
110
- self.filter_id_name: tdml.sqlalchemy.literal_column(
111
- f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {all_columns})", tdml.BIGINT())}
112
- )[['filter_id'] + df.columns]
113
-
218
+ df_filter = df.assign(
219
+ **{
220
+ self.filter_id_name: tdml.sqlalchemy.literal_column(
221
+ f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {all_columns})", tdml.BIGINT()
222
+ )
223
+ }
224
+ )[[self.filter_id_name] + list(df.columns)]
114
225
  else:
115
- df_filter = df.assign(**{
116
- self.filter_id_name: tdml.sqlalchemy.literal_column(
117
- f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {all_columns})", tdml.BIGINT()),
118
- 'BUSINESS_DATE' : df[time_column]
119
- })[['filter_id'] + ['BUSINESS_DATE'] + [c for c in df.columns if c != time_column]]
120
-
121
-
226
+ df_filter = df.assign(
227
+ **{
228
+ self.filter_id_name: tdml.sqlalchemy.literal_column(
229
+ f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {all_columns})", tdml.BIGINT()
230
+ ),
231
+ "BUSINESS_DATE": df[time_column],
232
+ }
233
+ )[[self.filter_id_name, "BUSINESS_DATE"] + self.col_names]
234
+
235
+ # Persist to hidden table
122
236
  if primary_index is None:
123
- df_filter.to_sql(
124
- table_name = self.table_name,
125
- schema_name = self.schema_name,
126
- if_exists = 'replace',
127
- primary_index = ['filter_id'])
128
- else:
129
- df_filter.to_sql(table_name=self.table_name, schema_name=self.schema_name, if_exists='replace',
130
- primary_index=primary_index)
131
-
132
- query = f"""
237
+ primary_index = [self.filter_id_name]
238
+
239
+ logger.debug("Writing hidden table", extra={"primary_index": primary_index})
240
+ data_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_filter)
241
+ query_creation = f"""
242
+ CREATE TABLE {self.schema_name}.{self.table_name} (
243
+ {', '.join([f'"{k}" {v}' for k,v in data_types.items()])}
244
+ ) UNIQUE PRIMARY INDEX ({primary_index[0]})
245
+ """
246
+
247
+ logger.debug("Creating hidden table with SQL:\n%s", query_creation)
248
+ try:
249
+ tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
250
+ except Exception:
251
+ logger.debug("Hidden table did not exist; proceeding to create.")
252
+ pass # Ignore if table does not exist
253
+ tdml.execute_sql(query_creation)
254
+ logger.debug("Inserting filter data into hidden table")
255
+
256
+ df_filter.to_sql(
257
+ table_name=self.table_name,
258
+ schema_name=self.schema_name,
259
+ if_exists="append"
260
+ )
261
+
262
+ # Create/replace public view with filter_id = 1
263
+ view_sql = f"""
133
264
  REPLACE VIEW {self.schema_name}.{self.view_name} AS
134
265
  SEL {all_columns}
135
266
  FROM {self.schema_name}.{self.table_name}
136
267
  WHERE {self.filter_id_name} = 1
137
268
  """
269
+ logger.debug("Replacing view for filter_id=1")
270
+ tdml.execute_sql(view_sql)
138
271
 
139
- # Collect stats
140
-
141
- query_collect_stats = f"""
272
+ # Collect stats to help the optimizer
273
+ for col in df_filter.columns:
274
+ tdml.execute_sql(f"COLLECT STATISTICS ON {self.schema_name}.{self.table_name} COLUMN ({col})")
275
+ stats_sql = f"""
142
276
  COLLECT STATISTICS USING NO SAMPLE AND NO THRESHOLD
143
- COLUMN (filter_id)
277
+ COLUMN ({self.filter_id_name})
144
278
  , {collect_stats}
145
279
  ON {self.schema_name}.{self.table_name}
146
280
  """
147
- tdml.execute_sql(query_collect_stats)
148
- tdml.execute_sql(query)
281
+ logger.debug("Collecting statistics on hidden table")
282
+ tdml.execute_sql(stats_sql)
149
283
 
150
284
  self.nb_filters = tdml.execute_sql(
151
- f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}").fetchall()[0][0]
285
+ f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}"
286
+ ).fetchall()[0][0]
287
+ logger.info("Filters loaded", extra={"nb_filters": self.nb_filters})
152
288
 
153
289
  def _drop(self):
154
290
  """
155
- Drops the view and the table from the database if they exist.
291
+ Drop the public view and (optionally) the hidden table.
156
292
 
157
- This method is used to clean up the database by removing the managed view and table. It checks for the existence of the table and view before attempting to drop them.
293
+ If this manager does not own the hidden table (default), only the view is dropped.
158
294
  """
159
- # Drop the table if it exists
160
- if self._exists():
161
- tdml.db_drop_view(schema_name=self.schema_name, table_name=self.table_view)
162
- tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
295
+ # Drop the view (in our schema)
296
+ existing = [x.lower().replace('"', "") for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
297
+ if self.view_name.lower() in existing:
298
+ logger.warning("Dropping view.", extra={"schema_name": self.schema_name, "view_name": self.view_name})
299
+ tdml.db_drop_view(schema_name=self.schema_name, table_name=self.view_name)
300
+ else:
301
+ logger.info("View not found; nothing to drop.", extra={"schema_name": self.schema_name, "view_name": self.view_name})
302
+
303
+ # Drop the hidden table only if we own it
304
+ if getattr(self, "_owns_hidden", False):
305
+ schema_tbl = getattr(self, "schema_name_for_table", self.schema_name)
306
+ logger.warning(
307
+ "Dropping hidden table (ownership acknowledged).",
308
+ extra={"schema_name": schema_tbl, "table_name": self.table_name},
309
+ )
310
+ tdml.db_drop_table(schema_name=schema_tbl, table_name=self.table_name)
311
+ else:
312
+ logger.info("Hidden table not dropped (not owned).")
313
+
163
314
 
164
315
  def update(self, filter_id):
165
316
  """
166
- Updates the view to apply a new filter based on the provided filter ID.
317
+ Repoint the public view to a different filter id.
167
318
 
168
319
  Args:
169
- filter_id (int): The ID of the filter to apply. The view will be updated to only show data that matches this filter ID.
320
+ filter_id (int): Target filter id to apply.
321
+
322
+ Raises:
323
+ ValueError: If filter artifacts do not exist yet.
170
324
  """
325
+
326
+
171
327
  if not self._exists():
172
- raise ValueError(f"The filter has not be initialized with load_filter or has been deleted.")
328
+ logger_safe("error", "Filter artifacts not initialized.")
329
+ raise ValueError("The filter has not been initialized with load_filter() or has been deleted.")
173
330
 
174
331
  if self.time_filtering:
175
- query = f"""
176
- REPLACE VIEW {self.schema_name}.{self.view_name} AS
177
- SEL {','.join(['BUSINESS_DATE']+self.col_names)}
178
- FROM {self.schema_name}.{self.table_name}
179
- WHERE {self.filter_id_name} = {filter_id}
180
- """
181
-
332
+ select_cols_str = ["BUSINESS_DATE"] + list(self.col_names)
333
+ select_cols = ",".join(["BUSINESS_DATE"] + list(self.col_names))
182
334
  else:
183
- query = f"""
184
- REPLACE VIEW {self.schema_name}.{self.view_name} AS
185
- SEL {','.join(self.col_names)}
186
- FROM {self.schema_name}.{self.table_name}
187
- WHERE {self.filter_id_name} = {filter_id}
188
- """
189
-
190
- if tdfs4ds.DEBUG_MODE:
191
- print(query)
335
+ select_cols_str = list(self.col_names)
336
+ select_cols = ",".join(self.col_names)
337
+
338
+ query = f"""
339
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
340
+ SEL {select_cols}
341
+ FROM {self.schema_name}.{self.table_name}
342
+ WHERE {self.filter_id_name} = {filter_id}
343
+ """
344
+ logger_safe("info", "Updating active filter | %s", ','.join([f"{c}:{v}" for c,v in zip(select_cols_str, tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall()[0])]))
345
+
346
+ if getattr(tdfs4ds, "DEBUG_MODE", False):
347
+ logger_safe("debug", "Replacing view with new filter:\n%s", query)
348
+
192
349
  tdml.execute_sql(query)
350
+ logger_safe("debug", "View %s.%s updated to filter_id=%s", self.schema_name, self.view_name, filter_id)
351
+
193
352
 
194
353
  def display(self):
195
354
  """
196
- Retrieves the current data from the view as a DataFrame.
355
+ Retrieve the current view contents as a `teradataml.DataFrame`.
197
356
 
198
357
  Returns:
199
- DataFrame: The current data visible through the view, filtered by the active filter ID.
358
+ teradataml.DataFrame: Rows projected by the public view (current filter).
200
359
  """
360
+ logger.debug("Fetching current view contents")
201
361
  return tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name))
202
362
 
203
363
  def get_all_filters(self):
364
+ """
365
+ Retrieve all filter rows from the hidden table.
366
+
367
+ Returns:
368
+ teradataml.DataFrame: Full set of stored filters.
369
+ """
370
+ logger.debug("Fetching all filters from hidden table")
204
371
  return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
205
372
 
206
373
  def get_date_in_the_past(self):
207
374
  """
208
- Retrieves the earliest date and time value from the table.
375
+ Return the earliest business date/time from the *current view*.
376
+
377
+ The method reads the first `BUSINESS_DATE` value from the current view
378
+ and normalizes it to a `%Y-%m-%d %H:%M:%S` string. Requires that time
379
+ filtering is enabled.
209
380
 
210
381
  Returns:
211
- str: The earliest date and time value as a formatted string ('YYYY-MM-DD HH:MM:SS').
382
+ str: Earliest datetime as formatted string ('YYYY-MM-DD HH:MM:SS').
383
+
384
+ Raises:
385
+ ValueError: If time-based filtering is not enabled.
212
386
  """
387
+ logger.debug("Computing earliest BUSINESS_DATE from current view")
213
388
 
214
- if self._istimefiltering() == False:
215
- raise ValueError(f"The filter manager is not filtering on time.")
389
+ if not self._istimefiltering():
390
+ logger.error("Time filtering requested but not enabled.")
391
+ raise ValueError("The filter manager is not filtering on time.")
216
392
 
217
- # '9999-01-01 00:00:00'
218
393
  date_obj = self.display().to_pandas().reset_index().BUSINESS_DATE.values[0]
219
394
 
220
395
  if isinstance(date_obj, datetime.datetime):
221
- # print("temp is a datetime.datetime object")
222
396
  datetime_obj = date_obj
223
397
  elif isinstance(date_obj, datetime.date):
224
- # print("temp is a datetime.date object")
225
- # Convert date object to a datetime object at midnight (00:00:00)
226
398
  datetime_obj = datetime.datetime.combine(date_obj, datetime.time.min)
227
399
  elif isinstance(date_obj, np.datetime64):
228
- # Case when the object is a numpy.datetime64, convert it to datetime
229
- datetime_obj = date_obj.astype('datetime64[ms]').astype(datetime.datetime)
400
+ # normalize to datetime (ms precision to avoid timezone pitfalls)
401
+ datetime_obj = date_obj.astype("datetime64[ms]").astype(datetime.datetime)
230
402
  else:
231
- print("temp is neither a datetime.date nor a datetime.datetime object")
232
- print('temp', date_obj)
233
- print('temp type', type(date_obj))
234
- return
403
+ logger.error(
404
+ "Unsupported BUSINESS_DATE type.",
405
+ extra={"value": str(date_obj), "type": str(type(date_obj))},
406
+ )
407
+ raise TypeError(f"Unsupported BUSINESS_DATE type: {type(date_obj)}")
235
408
 
236
- # Convert datetime object to string
237
409
  output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
410
+ logger.debug("Earliest date computed", extra={"earliest": output_string})
411
+ return output_string
412
+
413
+ def get_current_filterid(self):
414
+ """
415
+ Extract the currently active filter id from the view DDL.
416
+
417
+ Returns:
418
+ int: Filter id parsed from the view's definition.
419
+
420
+ Raises:
421
+ ValueError: If the filter id cannot be parsed from the DDL.
422
+ """
423
+ logger.debug("Reading view DDL to extract current filter id")
424
+ txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
425
+ try:
426
+ current = int(txt.split("\n")[-1].split("=")[1])
427
+ logger.info("Current filter id extracted", extra={"filter_id": current})
428
+ return current
429
+ except Exception as exc:
430
+ logger.exception("Failed to parse filter id from view DDL")
431
+ raise ValueError("Unable to parse current filter id from view DDL.") from exc
432
+
433
+ def print_view_ddl(self):
434
+ """
435
+ Log the view definition (DDL) for troubleshooting/traceability.
436
+ """
437
+ ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
438
+ logger.info("View DDL:\n%s", ddl)
439
+
440
+ def prune_filter(self, filter_id=None):
441
+ """
442
+ Remove all filters with ids lower than `filter_id` and renumber remaining ones.
443
+
444
+ If `filter_id` is omitted, the method uses the current filter id from the view.
445
+ After pruning, filter ids are normalized so the smallest remaining id becomes 1,
446
+ and the public view is repointed to filter_id=1.
447
+
448
+ Args:
449
+ filter_id (int, optional): Threshold id; rows with `{filter_id_name} < filter_id` are deleted.
450
+
451
+ Returns:
452
+ FilterManager: Self, to allow method chaining.
453
+ """
454
+ if filter_id is None:
455
+ filter_id = self.get_current_filterid()
456
+
457
+ logger.info("Pruning filters", extra={"threshold_filter_id": filter_id})
458
+
459
+ delete_sql = f"DELETE {self.schema_name}.{self.table_name} WHERE {self.filter_id_name} < {filter_id}"
460
+ update_sql = f"UPDATE {self.schema_name}.{self.table_name} SET {self.filter_id_name} = {self.filter_id_name} - {filter_id} + 1"
461
+
462
+ logger.debug("Executing prune delete", extra={"sql": delete_sql})
463
+ tdml.execute_sql(delete_sql)
464
+
465
+ logger.debug("Executing prune renumber", extra={"sql": update_sql})
466
+ tdml.execute_sql(update_sql)
467
+
468
+ self.update(1)
469
+ logger.info("Prune complete; active filter set to 1.")
470
+ return self
471
+
472
+ def clone_filter(self, source_filtermanager, filter_id_to_apply=1, take_ownership=False, clone_mode="soft", if_exists="error"):
473
+ """
474
+ Clone filter definitions from another FilterManager.
475
+
476
+ Supports:
477
+ - soft clone (default): just point to source _HIDDEN table
478
+ - hard clone: copy the source _HIDDEN table and own the copy
479
+
480
+ Args:
481
+ source_filtermanager (FilterManager): Source FilterManager to clone.
482
+ filter_id_to_apply (int, optional): Filter ID to activate. Default: 1.
483
+ take_ownership (bool, optional): Whether this manager owns the cloned table (soft mode only).
484
+ clone_mode (str, optional): "soft" or "hard". Default: "soft".
485
+ if_exists (str, optional): Behavior if target hidden table already exists
486
+ - "error" (default): raise an exception
487
+ - "replace": drop and recreate
488
+ - "skip": reuse existing table
489
+
490
+ Returns:
491
+ FilterManager
492
+
493
+ Raises:
494
+ ValueError: On invalid clone_mode or missing source.
495
+ """
496
+ if clone_mode not in ("soft", "hard"):
497
+ raise ValueError("clone_mode must be 'soft' or 'hard'")
498
+ if if_exists not in ("error", "replace", "skip"):
499
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
500
+
501
+ src_schema = source_filtermanager.schema_name
502
+ src_hidden = source_filtermanager.table_name
503
+
504
+ logger.info(
505
+ "Cloning filter",
506
+ extra={
507
+ "mode": clone_mode,
508
+ "source": f"{src_schema}.{src_hidden}",
509
+ "target_view": f"{self.schema_name}.{self.view_name}"
510
+ },
511
+ )
512
+
513
+ # Validate source exists
514
+ existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
515
+ if src_hidden.lower() not in existing_src:
516
+ raise ValueError(f"Source hidden filter table {src_schema}.{src_hidden} does not exist.")
517
+
518
+ if clone_mode == "hard":
519
+ # Hard clone requires a NEW hidden table in this schema
520
+ self.table_name = get_hidden_table_name(schema_name=self.schema_name, view_name=self.view_name)
521
+ existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
522
+
523
+ # Handle table existence
524
+ if self.table_name.lower() in existing_dest:
525
+ if if_exists == "error":
526
+ raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
527
+ elif if_exists == "replace":
528
+ logger.warning(f"Replacing existing table {self.schema_name}.{self.table_name}")
529
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
530
+ elif if_exists == "skip":
531
+ logger.info(f"Skipping clone, using existing {self.schema_name}.{self.table_name}")
532
+ if self.table_name.lower() not in existing_dest or if_exists == "replace":
533
+ # Create cloned table
534
+ logger.info(f"Creating cloned table {self.schema_name}.{self.table_name}")
535
+ create_sql = f"""
536
+ CREATE TABLE {self.schema_name}.{self.table_name} AS
537
+ (SELECT * FROM {src_schema}.{src_hidden})
538
+ WITH DATA
539
+ """
540
+ tdml.execute_sql(create_sql)
541
+
542
+ self._owns_hidden = True # Hard clones always own their copy
543
+ target_schema = self.schema_name
544
+
545
+ else:
546
+ # Soft clone: link to source
547
+ logger.info("Soft clone: linking to source table")
548
+ self.table_name = src_hidden
549
+ self._owns_hidden = bool(take_ownership)
550
+ target_schema = src_schema # view selects from source schema
551
+
552
+ # Load metadata
553
+ df = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
554
+ self.filter_id_name = df.columns[0]
555
+ self.time_filtering = "BUSINESS_DATE" in df.columns
556
+ self.col_names = df.columns[2:] if self.time_filtering else df.columns[1:]
557
+ self.nb_filters = df.shape[0]
558
+
559
+ # Rebuild view
560
+ select_cols = ",".join((["BUSINESS_DATE"] if self.time_filtering else []) + list(self.col_names))
561
+ view_sql = f"""
562
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
563
+ SELECT {select_cols}
564
+ FROM {target_schema}.{self.table_name}
565
+ WHERE {self.filter_id_name} = {int(filter_id_to_apply)}
566
+ """
567
+ tdml.execute_sql(view_sql)
568
+
569
+ logger.info(f"Clone complete → Active filter_id={filter_id_to_apply}")
570
+ return self
571
+
572
+
573
+ def take_ownership(self):
574
+ """
575
+ Take ownership of the currently linked hidden filter table.
576
+
577
+ This enables this FilterManager instance to manage (and potentially drop)
578
+ the hidden table via `_drop()` or future maintenance methods.
579
+
580
+ Returns:
581
+ FilterManager: self (for chaining)
582
+ """
583
+ logger.warning(
584
+ "Ownership taken for hidden table. This manager may now drop or modify it.",
585
+ extra={
586
+ "schema_name": getattr(self, "schema_name_for_table", self.schema_name),
587
+ "table_name": self.table_name
588
+ }
589
+ )
590
+ self._owns_hidden = True
591
+ return self
592
+
593
+ def get_filter_condition(self, output_type: str = "str"):
594
+ """
595
+ Retrieve the currently applied filter condition from the database.
596
+
597
+ This method executes a SQL query that aggregates the filter columns from a
598
+ filter manager view and returns the result either as a Python dictionary
599
+ or a raw JSON string.
600
+
601
+ Args:
602
+ output_type (str, optional): Format of the returned filter condition.
603
+ - "dict": Returns a Python dictionary (default).
604
+ - "str": Returns a raw JSON string.
605
+
606
+ Returns:
607
+ dict | str | None: The current filter condition in the requested format.
608
+ Returns None if no filter is present.
609
+
610
+ Raises:
611
+ ValueError: If `output_type` is not "dict" or "str".
612
+ RuntimeError: If the SQL execution fails.
613
+
614
+ Example:
615
+ >>> filters = self.get_filter_condition(output_type="dict")
616
+ >>> print(filters)
617
+ {'country': 'US', 'status': 'active'}
618
+ """
619
+ logger_safe("debug", "Fetching current filter condition from the database")
620
+
621
+ # Build JSON_AGG expression safely
622
+ json_columns = ",".join(self.col_names)
623
+
624
+ query = f"""
625
+ SELECT JSON_AGG({json_columns}) AS applied_filter
626
+ FROM {self.schema_name}.{self.view_name} FILTER_MANAGER
627
+ """
628
+
629
+ try:
630
+ result = tdml.execute_sql(query).fetchall()
631
+ except Exception as e:
632
+ logger_safe("error", "Failed to execute SQL for filter condition: %s", e)
633
+ raise RuntimeError("Database query failed while fetching filter condition") from e
634
+
635
+ # Handle no result
636
+ if not result or result[0][0] is None:
637
+ logger_safe("info", "No filter conditions found")
638
+ return None
639
+
640
+ json_result = result[0][0]
641
+
642
+ if output_type == "dict":
643
+ logger_safe("debug", "Returning filter as Python dictionary")
644
+ return json.loads(json_result)
645
+ elif output_type == "str":
646
+ logger_safe("debug", "Returning filter as JSON string")
647
+ return json_result
238
648
 
239
- return output_string
649
+ raise ValueError("Invalid output_type. Expected 'dict' or 'str'.")