tdfs4ds 0.2.4.31__py3-none-any.whl → 0.2.4.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,55 @@
1
1
  import teradataml as tdml
2
2
  import datetime
3
+ from tdfs4ds import logger
4
+ import re
3
5
 
4
6
  import tdfs4ds
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
 
8
- def get_hidden_table_name(table_name):
9
- return table_name + '_HIDDEN'
10
+
11
+ def get_hidden_table_name(table_name: str) -> str:
12
+ """Return the hidden table name associated with a public view name.
13
+
14
+ Args:
15
+ table_name: Base table or view name.
16
+
17
+ Returns:
18
+ The hidden table name (e.g., "<name>_HIDDEN").
19
+ """
20
+ return f"{table_name}_HIDDEN"
21
+
22
+
10
23
  class TimeManager:
11
24
  """
12
- A class to manage time-related operations in a database table.
25
+ Manage time-step metadata for a Teradata table and a companion view.
26
+
27
+ This class maintains a hidden physical table (``<view>_HIDDEN``) that stores
28
+ sequential time steps (``TIME_ID``) mapped to a business date/time
29
+ (``BUSINESS_DATE``). It also maintains a public view (``<view>``) that
30
+ exposes the *current* business date/time by filtering the hidden table on
31
+ a specific ``TIME_ID``.
13
32
 
14
33
  Attributes:
15
- schema_name (str): Name of the schema in the database.
16
- table_name (str): Name of the table in the schema.
17
- data_type (str): Type of the date/time data, defaults to 'DATE'.
34
+ schema_name: Target database/schema.
35
+ table_name: Hidden physical table name (``<view>_HIDDEN``).
36
+ view_name: Public view name (``<view>``).
37
+ time_id: Name of the incrementing identifier column (default: ``"time_id"``).
38
+ nb_time_steps: Number of time steps currently stored in the hidden table
39
+ (``None`` if the hidden table does not exist yet).
40
+ data_type: SQL data type of ``BUSINESS_DATE`` (e.g., ``DATE``,
41
+ ``TIMESTAMP WITH TIME ZONE``), set after load/inspection.
18
42
  """
19
43
 
20
- def __init__(self, table_name, schema_name):
21
- """
22
- Initializes the TimeManager with a table name, schema name, and optionally a data type.
44
+ def __init__(self, table_name: str, schema_name: str) -> None:
45
+ """Initialize a TimeManager for an existing or future hidden table/view.
23
46
 
24
- If the table doesn't exist, it creates one with a BUSINESS_DATE column of the specified data type.
47
+ On initialization, if the hidden table already exists, the instance
48
+ inspects it to populate ``data_type`` and ``nb_time_steps``.
25
49
 
26
50
  Args:
27
- table_name (str): Name of the table.
28
- schema_name (str): Name of the schema.
29
- data_type (str, optional): Type of the date/time data. Defaults to 'DATE'.
51
+ table_name: Base public view name to manage (e.g., ``"MY_VIEW"``).
52
+ schema_name: Schema that contains/should contain the objects.
30
53
  """
31
54
  self.schema_name = schema_name
32
55
  self.table_name = get_hidden_table_name(table_name)
@@ -35,39 +58,52 @@ class TimeManager:
35
58
  self.nb_time_steps = None
36
59
  self.data_type = None
37
60
 
61
+ logger.debug(
62
+ "Initializing TimeManager for schema=%s, view=%s, table=%s",
63
+ self.schema_name, self.view_name, self.table_name
64
+ )
65
+
38
66
  if self._exists():
67
+ logger.debug("Hidden table %s.%s exists; inspecting metadata.", self.schema_name, self.table_name)
39
68
  df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
40
69
  d_ = {x[0]: x[1] for x in df._td_column_names_and_types}
41
- self.data_type = d_['BUSINESS_DATE']
42
- self.nb_time_steps = tdml.execute_sql(
43
- f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}").fetchall()[
44
- 0][0]
70
+ self.data_type = d_.get('BUSINESS_DATE')
71
+ self.nb_time_steps = tdml.execute_sql(
72
+ f"SEL MAX(TIME_ID) AS nb_time_steps FROM {self.schema_name}.{self.table_name}"
73
+ ).fetchall()[0][0]
74
+ logger.info(
75
+ "Detected BUSINESS_DATE data_type=%s with nb_time_steps=%s",
76
+ self.data_type, self.nb_time_steps
77
+ )
45
78
 
46
- def load_time_steps(self, df, time_column):
47
- """
48
- Load time steps into the table and update the view accordingly.
79
+ def load_time_steps(self, df: pd.DataFrame, time_column: str) -> None:
80
+ """Load/replace the hidden table and (re)point the public view to step 1.
49
81
 
50
- This method:
51
- 1. Creates a new DataFrame with a sequential time_id and BUSINESS_DATE.
52
- 2. Ensures BUSINESS_DATE has the correct SQL data type.
53
- 3. Drops and recreates the target table with the appropriate schema.
54
- 4. Inserts the new data into the table.
55
- 5. Updates the view to reference the first time step.
56
- 6. Stores the number of time steps in `self.nb_time_steps`.
82
+ Workflow:
83
+ 1) Build a DataFrame with sequential ``TIME_ID`` and ``BUSINESS_DATE``.
84
+ 2) Infer SQL types and upcast to ``TIMESTAMP WITH TIME ZONE`` when
85
+ needed (to preserve offsets).
86
+ 3) Drop and recreate the hidden table with inferred schema.
87
+ 4) Append the rows.
88
+ 5) Replace the public view to expose ``TIME_ID = 1``.
89
+ 6) Store ``nb_time_steps``.
57
90
 
58
91
  Args:
59
- df (pd.DataFrame): The input DataFrame containing time data.
60
- time_column (str): The column name representing time.
92
+ df: Input pandas DataFrame with a time column.
93
+ time_column: Name of the time column in ``df`` to use as ``BUSINESS_DATE``.
61
94
  """
95
+ logger.info("Loading time steps into %s.%s from column '%s'.",
96
+ self.schema_name, self.table_name, time_column)
62
97
 
63
- # Step 1: Build DataFrame with time_id and BUSINESS_DATE
98
+ # Step 1: Build DataFrame with TIME_ID and BUSINESS_DATE
64
99
  df_ = df.assign(
65
100
  time_id=tdml.sqlalchemy.literal_column(
66
- f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
101
+ f"ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY {time_column})",
67
102
  tdml.BIGINT()
68
103
  ),
69
104
  BUSINESS_DATE=df[time_column]
70
105
  )[["time_id", "BUSINESS_DATE"]]
106
+ logger.debug("Constructed intermediate DataFrame with TIME_ID and BUSINESS_DATE.")
71
107
 
72
108
  # Step 2: Get SQL types and adjust BUSINESS_DATE if necessary
73
109
  sql_types = tdfs4ds.utils.info.get_feature_types_sql_format(df_)
@@ -75,9 +111,9 @@ class TimeManager:
75
111
 
76
112
  if "TIMESTAMP" in type_business_date.upper() and "ZONE" not in type_business_date.upper():
77
113
  new_type = f"{type_business_date} WITH TIME ZONE"
78
- print(
79
- f"Data type of the time column modified from {type_business_date} "
80
- f"to {new_type}"
114
+ logger.info(
115
+ "Upcasting BUSINESS_DATE from %s to %s to preserve timezone.",
116
+ type_business_date, new_type
81
117
  )
82
118
  type_business_date = new_type
83
119
  sql_types["BUSINESS_DATE"] = new_type
@@ -89,13 +125,19 @@ class TimeManager:
89
125
  )
90
126
 
91
127
  self.data_type = type_business_date
128
+ logger.debug("Final BUSINESS_DATE SQL type: %s", self.data_type)
92
129
 
93
130
  # Step 3: Drop table if it exists
94
131
  try:
95
132
  tdml.execute_sql(f"DROP TABLE {self.schema_name}.{self.table_name}")
133
+ logger.debug("Dropped existing table %s.%s (if existed).", self.schema_name, self.table_name)
96
134
  except Exception as e:
135
+ # Not fatal; the table might not exist. Log at debug when in dev, warning otherwise.
136
+ msg = f"Error dropping table {self.schema_name}.{self.table_name}: {e}"
97
137
  if tdfs4ds.DEBUG_MODE:
98
- print(f"Error dropping table {self.schema_name}.{self.table_name}: {e}")
138
+ logger.debug(msg)
139
+ else:
140
+ logger.warning(msg)
99
141
 
100
142
  # Step 4: Recreate table
101
143
  ddl = ",\n".join([f"{col} {dtype}" for col, dtype in sql_types.items()])
@@ -106,6 +148,7 @@ class TimeManager:
106
148
  PRIMARY INDEX (time_id)
107
149
  """
108
150
  tdml.execute_sql(create_table_sql)
151
+ logger.info("Created table %s.%s with schema: %s", self.schema_name, self.table_name, sql_types)
109
152
 
110
153
  # Step 5: Insert data
111
154
  df_[list(sql_types.keys())].to_sql(
@@ -113,6 +156,7 @@ class TimeManager:
113
156
  schema_name=self.schema_name,
114
157
  if_exists="append"
115
158
  )
159
+ logger.info("Inserted %s time steps into %s.%s.", len(df_), self.schema_name, self.table_name)
116
160
 
117
161
  # Step 6: Update view
118
162
  create_view_sql = f"""
@@ -122,40 +166,72 @@ class TimeManager:
122
166
  WHERE time_id = 1
123
167
  """
124
168
  tdml.execute_sql(create_view_sql)
169
+ logger.debug("Replaced view %s.%s to point at TIME_ID=1.", self.schema_name, self.view_name)
125
170
 
126
171
  # Step 7: Store number of time steps
127
172
  result = tdml.execute_sql(
128
173
  f"SELECT MAX(time_id) AS nb_filters FROM {self.schema_name}.{self.table_name}"
129
174
  ).fetchall()
130
175
  self.nb_time_steps = result[0][0]
176
+ logger.info("Time steps loaded. nb_time_steps=%s", self.nb_time_steps)
131
177
 
132
-
133
- def _exists(self):
134
- """
135
- Checks if the table exists in the database.
178
+ def _exists(self) -> bool:
179
+ """Check if the hidden table exists in the schema.
136
180
 
137
181
  Returns:
138
- bool: True if the table exists, False otherwise.
182
+ True if the hidden table exists; False otherwise.
139
183
  """
184
+ exists = len([
185
+ x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
186
+ if x.lower().replace('"', '') == self.table_name.lower()
187
+ ]) > 0
188
+ logger.debug("Hidden table %s.%s exists? %s", self.schema_name, self.table_name, exists)
189
+ return exists
140
190
 
141
- return len([x for x in tdml.db_list_tables(schema_name=self.schema_name).TableName.values if
142
- x.lower().replace('"', '') == self.table_name.lower()]) > 0
191
+ def _drop(self, drop_view: bool = False, force: bool = False) -> None:
192
+ """Drop the hidden table if we own it, and optionally the public view.
143
193
 
144
- def _drop(self):
145
- """
146
- Drops the table if it exists.
194
+ Args:
195
+ drop_view: If True, also drop the public view.
196
+ force: If True, drop the hidden table even if we don't own it.
197
+
198
+ Notes:
199
+ - The hidden table is dropped only if:
200
+ * self._owns_hidden is True, or
201
+ * force is True.
202
+ - The view can be dropped regardless of ownership when drop_view=True.
147
203
  """
148
- # Drop the table if it exists
204
+ # Drop hidden table
149
205
  if self._exists():
150
- tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
206
+ if getattr(self, "_owns_hidden", False) or force:
207
+ logger.info(
208
+ "Dropping hidden table %s.%s (force=%s).",
209
+ self.schema_name, self.table_name, force
210
+ )
211
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
212
+ else:
213
+ logger.warning(
214
+ "Refusing to drop hidden table %s.%s because this manager does not own it. "
215
+ "Use force=True to override.",
216
+ self.schema_name, self.table_name
217
+ )
218
+ else:
219
+ logger.debug("Hidden table %s.%s does not exist.", self.schema_name, self.table_name)
151
220
 
221
+ # Optionally drop view
222
+ if drop_view:
223
+ try:
224
+ logger.info("Dropping view %s.%s.", self.schema_name, self.view_name)
225
+ tdml.execute_sql(f"DROP VIEW {self.schema_name}.{self.view_name}")
226
+ except Exception as e:
227
+ logger.warning("Error dropping view %s.%s: %s", self.schema_name, self.view_name, e)
152
228
 
153
- def update(self, time_id):
154
- """
155
- Updates the view to apply a new filter based on the provided filter ID.
229
+
230
+ def update(self, time_id: int) -> None:
231
+ """Point the public view at a specific ``TIME_ID``.
156
232
 
157
233
  Args:
158
- filter_id (int): The ID of the filter to apply. The view will be updated to only show data that matches this filter ID.
234
+ time_id: The time step identifier to expose via the public view.
159
235
  """
160
236
  if self._exists():
161
237
  query = f"""
@@ -164,32 +240,41 @@ class TimeManager:
164
240
  FROM {self.schema_name}.{self.table_name}
165
241
  WHERE TIME_ID = {time_id}
166
242
  """
167
-
168
243
  if tdfs4ds.DEBUG_MODE:
169
- print(query)
244
+ logger.debug("Executing view update:\n%s", query)
170
245
  tdml.execute_sql(query)
246
+ logger.info("Updated view %s.%s to TIME_ID=%s.", self.schema_name, self.view_name, time_id)
247
+ else:
248
+ logger.warning(
249
+ "Cannot update view: hidden table %s.%s does not exist.",
250
+ self.schema_name, self.table_name
251
+ )
171
252
 
172
- def display(self):
173
- """
174
- Displays the table.
253
+ def display(self) -> pd.DataFrame:
254
+ """Return the current public view (one row: current BUSINESS_DATE).
175
255
 
176
256
  Returns:
177
- DataFrame: The table data as a DataFrame.
257
+ A pandas DataFrame with the current ``BUSINESS_DATE`` exposed by the view.
178
258
  """
179
-
259
+ logger.debug("Reading current BUSINESS_DATE from %s.%s.", self.schema_name, self.view_name)
180
260
  cols = tdml.DataFrame(tdml.in_schema(self.schema_name, self.view_name)).columns
181
- return pd.DataFrame(tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(), columns=cols)
261
+ return pd.DataFrame(
262
+ tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall(),
263
+ columns=cols
264
+ )
182
265
 
183
- def get_date_in_the_past(self):
184
- """
185
- Retrieves the earliest date and time value from the table.
266
+ def get_date_in_the_past(self) -> str | None:
267
+ """Return the earliest BUSINESS_DATE from the public view as a string.
268
+
269
+ The format includes timezone offset when applicable:
270
+ - ``YYYY-MM-DD HH:MM:SS±HH:MM`` if timezone info is present;
271
+ - otherwise ``YYYY-MM-DD HH:MM:SS``.
186
272
 
187
273
  Returns:
188
- str: The earliest date and time value as a formatted string
189
- ('YYYY-MM-DD HH:MM:SS±HH:MM' if timezone is available, else 'YYYY-MM-DD HH:MM:SS').
274
+ The formatted earliest date/time string, or ``None`` if parsing fails.
190
275
  """
191
- # Use iloc to preserve timezone awareness from pandas
192
276
  date_obj = self.display().BUSINESS_DATE.iloc[0]
277
+ logger.debug("Raw earliest BUSINESS_DATE value read: %r (%s)", date_obj, type(date_obj))
193
278
 
194
279
  if isinstance(date_obj, pd.Timestamp):
195
280
  datetime_obj = date_obj.to_pydatetime()
@@ -200,68 +285,390 @@ class TimeManager:
200
285
  elif isinstance(date_obj, np.datetime64):
201
286
  datetime_obj = pd.to_datetime(date_obj).to_pydatetime()
202
287
  else:
203
- print("temp is of unrecognized type")
204
- print('temp', date_obj)
205
- print('temp type', type(date_obj))
206
- return
288
+ logger.warning("Unrecognized BUSINESS_DATE type: %s; value=%r", type(date_obj), date_obj)
289
+ return None
207
290
 
208
- # Format with timezone offset if available
209
291
  if datetime_obj.tzinfo is not None and datetime_obj.tzinfo.utcoffset(datetime_obj) is not None:
210
292
  output_string = datetime_obj.isoformat(sep=' ', timespec='seconds')
211
293
  else:
212
294
  output_string = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
213
295
 
296
+ logger.debug("Formatted earliest BUSINESS_DATE: %s", output_string)
214
297
  return output_string
215
298
 
216
- def get_list_date(self):
217
- """
218
- Retrieve a list of dates from the specified table.
219
-
220
- This function returns a DataFrame containing the dates from the table specified by the schema_name and table_name attributes of the class.
299
+ def get_list_date(self) -> tdml.DataFrame:
300
+ """Return the full list of time steps from the hidden table.
221
301
 
222
302
  Returns:
223
- DataFrame: A Teradata DataFrame containing the dates from the specified table.
303
+ A Teradata DataFrame over ``schema.table`` (hidden table) with
304
+ ``TIME_ID`` and ``BUSINESS_DATE``.
224
305
  """
306
+ logger.debug("Returning Teradata DataFrame for %s.%s.", self.schema_name, self.table_name)
225
307
  return tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
226
308
 
227
- def generate_timeline(self, schema_name, view_name, current_included=True):
228
- """
229
- Generate a timeline view based on business dates from a hidden source view.
309
+ def generate_timeline(self, schema_name: str, view_name: str, current_included: bool = True) -> tdml.DataFrame:
310
+ """Create a timeline view filtered relative to the current business date.
230
311
 
231
- This function creates a new SQL view that includes business dates from a hidden source view.
232
- The timeline view can either include or exclude the current business date based on the `current_included` flag.
312
+ The new view (``schema_name.view_name``) selects dates from the hidden
313
+ source (``self.view_name + '_HIDDEN'``) up to the current business date
314
+ exposed by the public view (``self.view_name``).
233
315
 
234
- Parameters:
235
- schema_name (str): The name of the schema where the new view will be created.
236
- view_name (str): The name of the new view to be created.
237
- current_included (bool, optional): If True, includes the current business date in the timeline.
238
- If False, excludes the current business date. Defaults to True.
316
+ Args:
317
+ schema_name: Schema where the new timeline view will be created.
318
+ view_name: Name of the new timeline view.
319
+ current_included: If True, include the current business date;
320
+ otherwise, exclude it.
239
321
 
240
322
  Returns:
241
- DataFrame: A Teradata DataFrame representing the created timeline view, containing the selected business dates.
323
+ A Teradata DataFrame bound to the newly replaced timeline view.
242
324
  """
243
- # Construct the base query to replace the view with the timeline data
325
+ logger.info(
326
+ "Generating timeline view %s.%s (current_included=%s).",
327
+ schema_name, view_name, current_included
328
+ )
244
329
  query = f"""
245
330
  REPLACE VIEW {schema_name}.{view_name} AS
246
331
  SEL BUSINESS_DATE
247
332
  FROM {self.schema_name}.{self.view_name + '_HIDDEN'} A
248
333
  """
249
- # Modify the query based on whether the current business date should be included
250
334
  if current_included:
251
335
  query += f"WHERE BUSINESS_DATE <= (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
252
336
  else:
253
337
  query += f"WHERE BUSINESS_DATE < (SELECT BUSINESS_DATE FROM {self.schema_name}.{self.view_name})"
254
338
 
255
- # Execute the query to create the view
256
339
  tdml.execute_sql(query)
257
- # Return the DataFrame of the created view
340
+ logger.debug("Replaced timeline view with query:\n%s", query.strip())
258
341
  return tdml.DataFrame(tdml.in_schema(schema_name, view_name))
259
342
 
260
- def get_current_step(self):
343
+ def get_current_step(self) -> int | None:
344
+ """Return the TIME_ID corresponding to the current BUSINESS_DATE in the view.
261
345
 
262
- res = tdml.execute_sql(f"SELECT TIME_ID FROM {self.table_name} WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})").fetchall()
346
+ Returns:
347
+ The current ``TIME_ID`` if exactly one match is found; otherwise ``None``.
348
+ """
349
+ # Note: original code omits schema qualifiers here; kept intentionally.
350
+ logger.debug("Fetching current TIME_ID from %s and %s.", self.table_name, self.view_name)
351
+ res = tdml.execute_sql(
352
+ f"SELECT TIME_ID FROM {self.table_name} "
353
+ f"WHERE BUSINESS_DATE = (SELECT BUSINESS_DATE FROM {self.view_name})"
354
+ ).fetchall()
263
355
 
264
- if len(res)==1:
356
+ if len(res) == 1:
357
+ logger.info("Current TIME_ID resolved to %s.", res[0][0])
265
358
  return res[0][0]
266
359
 
267
- return
360
+ logger.warning("Could not resolve a unique current TIME_ID (rows returned: %s).", len(res))
361
+ return None
362
+
363
+ def clone_timer(
364
+ self,
365
+ source_timemanager,
366
+ time_id_to_apply: int = 1,
367
+ take_ownership: bool = False,
368
+ clone_mode: str = "soft",
369
+ if_exists: str = "error",
370
+ ):
371
+ """
372
+ Clone time-step definitions from another TimeManager.
373
+
374
+ Supports:
375
+ - soft clone (default): point this manager's view to the source _HIDDEN table
376
+ - hard clone: copy the source _HIDDEN table into this schema and own the copy
377
+
378
+ Args:
379
+ source_timemanager (TimeManager): Source manager to clone from.
380
+ time_id_to_apply (int, optional): TIME_ID to activate in the public view. Default: 1.
381
+ take_ownership (bool, optional): For soft clones only, whether this
382
+ manager should consider itself the owner of the hidden table.
383
+ (Hard clones always own their copy.) Default: False.
384
+ clone_mode (str, optional): "soft" or "hard". Default: "soft".
385
+ if_exists (str, optional): What to do if the destination hidden table already exists
386
+ - "error" (default): raise an exception
387
+ - "replace": drop and recreate
388
+ - "skip": reuse existing table
389
+
390
+ Returns:
391
+ TimeManager: self
392
+
393
+ Raises:
394
+ ValueError: On invalid clone_mode/if_exists or missing source table.
395
+ RuntimeError: If destination exists and if_exists="error".
396
+ """
397
+ if clone_mode not in ("soft", "hard"):
398
+ raise ValueError("clone_mode must be 'soft' or 'hard'")
399
+ if if_exists not in ("error", "replace", "skip"):
400
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
401
+
402
+ src_schema = source_timemanager.schema_name
403
+ src_hidden = source_timemanager.table_name
404
+
405
+ logger.info(
406
+ "Cloning timer",
407
+ extra={
408
+ "mode": clone_mode,
409
+ "source": f"{src_schema}.{src_hidden}",
410
+ "target_view": f"{self.schema_name}.{self.view_name}",
411
+ },
412
+ )
413
+
414
+ # Validate source existence
415
+ existing_src = [t.lower() for t in tdml.db_list_tables(schema_name=src_schema).TableName.values]
416
+ if src_hidden.lower() not in existing_src:
417
+ raise ValueError(f"Source hidden timer table {src_schema}.{src_hidden} does not exist.")
418
+
419
+ if clone_mode == "hard":
420
+ # Hard clone → create (or reuse) a NEW hidden table in this schema
421
+ self.table_name = get_hidden_table_name(self.view_name)
422
+ existing_dest = [t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values]
423
+
424
+ if self.table_name.lower() in existing_dest:
425
+ if if_exists == "error":
426
+ raise RuntimeError(f"Target table {self.schema_name}.{self.table_name} already exists.")
427
+ elif if_exists == "replace":
428
+ logger.warning("Replacing existing table %s.%s", self.schema_name, self.table_name)
429
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=self.table_name)
430
+ elif if_exists == "skip":
431
+ logger.info("Skipping clone, using existing %s.%s", self.schema_name, self.table_name)
432
+
433
+ if self.table_name.lower() not in existing_dest or if_exists == "replace":
434
+ logger.info("Creating cloned table %s.%s", self.schema_name, self.table_name)
435
+ create_sql = f"""
436
+ CREATE TABLE {self.schema_name}.{self.table_name} AS
437
+ (SELECT * FROM {src_schema}.{src_hidden})
438
+ WITH DATA
439
+ """
440
+ tdml.execute_sql(create_sql)
441
+
442
+ self._owns_hidden = True
443
+ target_schema = self.schema_name
444
+
445
+ else:
446
+ # Soft clone → just point to the source hidden table
447
+ logger.info("Soft clone: linking view to source hidden table")
448
+ self.table_name = src_hidden
449
+ self._owns_hidden = bool(take_ownership)
450
+ target_schema = src_schema # view will select from the source schema
451
+
452
+ # Load metadata from the target hidden table
453
+ df_meta = tdml.DataFrame(tdml.in_schema(target_schema, self.table_name))
454
+ # Get data type for BUSINESS_DATE (if present)
455
+ try:
456
+ dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
457
+ self.data_type = dtypes.get("BUSINESS_DATE")
458
+ except Exception:
459
+ self.data_type = None
460
+
461
+ self.nb_time_steps = tdml.execute_sql(
462
+ f"SEL MAX(TIME_ID) FROM {target_schema}.{self.table_name}"
463
+ ).fetchall()[0][0]
464
+
465
+ # Rebuild the public view to the requested TIME_ID
466
+ view_sql = f"""
467
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
468
+ SELECT BUSINESS_DATE
469
+ FROM {target_schema}.{self.table_name}
470
+ WHERE TIME_ID = {int(time_id_to_apply)}
471
+ """
472
+ tdml.execute_sql(view_sql)
473
+
474
+ logger.info(
475
+ "Timer clone complete → Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
476
+ time_id_to_apply, self.nb_time_steps, self.data_type
477
+ )
478
+ return self
479
+
480
+ def take_ownership(
481
+ self,
482
+ create_copy: bool = True,
483
+ if_exists: str = "error",
484
+ ) -> "TimeManager":
485
+ """Promote this manager to OWN the hidden table.
486
+
487
+ Two modes:
488
+ - create_copy=True (default): Hard-promote by copying the current source
489
+ hidden table into this manager's schema as <view>_HIDDEN, repoint the
490
+ view, and set ownership.
491
+ - create_copy=False: Only mark as owned if the current hidden table is
492
+ already <schema=self.schema_name, table=<view>_HIDDEN>. Otherwise, warn.
493
+
494
+ Args:
495
+ create_copy: If True, copy data into this schema and repoint view.
496
+ if_exists: Behavior when the destination <view>_HIDDEN already exists:
497
+ - "error" (default): raise
498
+ - "replace": drop & recreate
499
+ - "skip": reuse existing
500
+
501
+ Returns:
502
+ TimeManager: self
503
+ """
504
+ if if_exists not in ("error", "replace", "skip"):
505
+ raise ValueError("if_exists must be 'error', 'replace', or 'skip'")
506
+
507
+ # Figure out current active TIME_ID to preserve selection after repointing
508
+ try:
509
+ current_time_id = self.get_current_step()
510
+ except Exception:
511
+ current_time_id = None
512
+ if current_time_id is None:
513
+ current_time_id = 1
514
+
515
+ dest_table = get_hidden_table_name(self.view_name)
516
+
517
+ if not create_copy:
518
+ # Only mark as owned if we already match <schema, view_HIDDEN>
519
+ if self.schema_name and self.table_name == dest_table:
520
+ logger.info(
521
+ "Marking existing hidden table %s.%s as owned (no copy).",
522
+ self.schema_name, self.table_name
523
+ )
524
+ self._owns_hidden = True
525
+ return self
526
+ logger.warning(
527
+ "Cannot take ownership without copying: current table is %s (expected %s). "
528
+ "Re-run with create_copy=True to copy into %s.%s.",
529
+ self.table_name, dest_table, self.schema_name, dest_table
530
+ )
531
+ return self
532
+
533
+ # We will copy data into <self.schema_name>.<view>_HIDDEN
534
+ dest_exists = [
535
+ t.lower() for t in tdml.db_list_tables(schema_name=self.schema_name).TableName.values
536
+ ]
537
+ need_create = True
538
+
539
+ if dest_table.lower() in dest_exists:
540
+ if if_exists == "error":
541
+ raise RuntimeError(f"Destination table {self.schema_name}.{dest_table} already exists.")
542
+ elif if_exists == "replace":
543
+ logger.warning("Replacing existing table %s.%s", self.schema_name, dest_table)
544
+ tdml.db_drop_table(schema_name=self.schema_name, table_name=dest_table)
545
+ elif if_exists == "skip":
546
+ logger.info("Reusing existing destination table %s.%s", self.schema_name, dest_table)
547
+ need_create = False
548
+
549
+ if need_create:
550
+ logger.info(
551
+ "Creating owned copy %s.%s from current source %s.%s",
552
+ self.schema_name, dest_table, self.schema_name, self.table_name
553
+ )
554
+ # The current table might be in another schema; qualify from the DataFrame binding
555
+ # Derive the true source schema for safety
556
+ # (If you know it's always schema-qualified in self.table_name, keep as-is.)
557
+ src_schema = self.schema_name if self._exists() else None
558
+ # Fallback to probing the DataFrame binding for schema
559
+ if src_schema is None:
560
+ logger.debug("Could not verify source schema via _exists(); defaulting to self.schema_name.")
561
+ src_schema = self.schema_name
562
+
563
+ create_sql = f"""
564
+ CREATE TABLE {self.schema_name}.{dest_table} AS
565
+ (SELECT * FROM {src_schema}.{self.table_name})
566
+ WITH DATA
567
+ """
568
+ tdml.execute_sql(create_sql)
569
+
570
+ # Repoint this manager to the new owned table and rebuild the view
571
+ self.table_name = dest_table
572
+ self._owns_hidden = True
573
+
574
+ view_sql = f"""
575
+ REPLACE VIEW {self.schema_name}.{self.view_name} AS
576
+ SELECT BUSINESS_DATE
577
+ FROM {self.schema_name}.{self.table_name}
578
+ WHERE TIME_ID = {int(current_time_id)}
579
+ """
580
+ tdml.execute_sql(view_sql)
581
+
582
+ # Refresh metadata
583
+ try:
584
+ df_meta = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
585
+ dtypes = {c: t for c, t in df_meta._td_column_names_and_types}
586
+ self.data_type = dtypes.get("BUSINESS_DATE")
587
+ except Exception:
588
+ pass
589
+ self.nb_time_steps = tdml.execute_sql(
590
+ f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
591
+ ).fetchall()[0][0]
592
+
593
+ logger.info(
594
+ "Ownership taken for %s.%s. Active TIME_ID=%s; nb_time_steps=%s; data_type=%s",
595
+ self.schema_name, self.table_name, current_time_id, self.nb_time_steps, self.data_type
596
+ )
597
+ return self
598
+
599
+ def get_current_timeid(self) -> int:
600
+ """
601
+ Extract the currently active TIME_ID from the public view's DDL.
602
+
603
+ Returns:
604
+ int: TIME_ID parsed from the view definition.
605
+
606
+ Raises:
607
+ ValueError: If the TIME_ID cannot be parsed from the DDL.
608
+ """
609
+ logger.debug("Reading view DDL to extract current TIME_ID")
610
+ txt = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
611
+
612
+ # Look for "WHERE TIME_ID = <number>" (allow whitespace/case variations)
613
+ m = re.search(r"WHERE\s+TIME_ID\s*=\s*(\d+)", txt, flags=re.IGNORECASE)
614
+ if not m:
615
+ logger.exception("Failed to parse TIME_ID from view DDL")
616
+ raise ValueError("Unable to parse current TIME_ID from view DDL.")
617
+ current = int(m.group(1))
618
+ logger.info("Current TIME_ID extracted", extra={"time_id": current})
619
+ return current
620
+
621
+
622
+ def print_view_ddl(self) -> None:
623
+ """
624
+ Log the view definition (DDL) for troubleshooting/traceability.
625
+ """
626
+ ddl = tdfs4ds.utils.lineage.get_ddl(schema_name=self.schema_name, view_name=self.view_name)
627
+ logger.info("View DDL:\n%s", ddl)
628
+
629
+
630
+ def prune_time(self, time_id: int | None = None):
631
+ """
632
+ Remove all time steps with TIME_ID lower than `time_id` and renumber remaining ones.
633
+
634
+ If `time_id` is omitted, the method uses the current TIME_ID from the view.
635
+ After pruning, TIME_ID values are normalized so the smallest remaining id becomes 1,
636
+ and the public view is repointed to TIME_ID=1.
637
+
638
+ Args:
639
+ time_id (int, optional): Threshold id; rows with TIME_ID < time_id are deleted.
640
+
641
+ Returns:
642
+ TimeManager: Self, to allow method chaining.
643
+ """
644
+ if time_id is None:
645
+ time_id = self.get_current_timeid()
646
+
647
+ logger.info("Pruning time steps", extra={"threshold_time_id": time_id})
648
+
649
+ delete_sql = f"""
650
+ DELETE {self.schema_name}.{self.table_name}
651
+ WHERE TIME_ID < {int(time_id)}
652
+ """
653
+ update_sql = f"""
654
+ UPDATE {self.schema_name}.{self.table_name}
655
+ SET TIME_ID = TIME_ID - {int(time_id)} + 1
656
+ """
657
+
658
+ logger.debug("Executing prune delete", extra={"sql": delete_sql})
659
+ tdml.execute_sql(delete_sql)
660
+
661
+ logger.debug("Executing prune renumber", extra={"sql": update_sql})
662
+ tdml.execute_sql(update_sql)
663
+
664
+ # Refresh metadata and repoint view to TIME_ID=1
665
+ self.update(1)
666
+ self.nb_time_steps = tdml.execute_sql(
667
+ f"SEL MAX(TIME_ID) FROM {self.schema_name}.{self.table_name}"
668
+ ).fetchall()[0][0]
669
+
670
+ logger.info(
671
+ "Prune complete; active TIME_ID set to 1; nb_time_steps=%s",
672
+ self.nb_time_steps
673
+ )
674
+ return self