tdfs4ds 0.2.4.32__py3-none-any.whl → 0.2.4.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +341 -519
- tdfs4ds/feature_store/feature_data_processing.py +236 -268
- tdfs4ds/process_store/process_query_administration.py +1 -1
- tdfs4ds/process_store/process_registration_management.py +67 -55
- tdfs4ds/utils/filter_management.py +87 -53
- tdfs4ds/utils/time_management.py +67 -24
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.33.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.33.dist-info}/RECORD +10 -10
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.33.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.33.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ def list_processes():
|
|
|
28
28
|
return tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.PROCESS_CATALOG_NAME_VIEW))
|
|
29
29
|
except Exception as e:
|
|
30
30
|
print(str(e))
|
|
31
|
-
print(
|
|
31
|
+
print(tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.PROCESS_CATALOG_NAME_VIEW)).show_query())
|
|
32
32
|
|
|
33
33
|
def list_processes_feature_split():
|
|
34
34
|
"""
|
|
@@ -3,6 +3,7 @@ import tdfs4ds
|
|
|
3
3
|
from tdfs4ds.utils.query_management import execute_query_wrapper
|
|
4
4
|
import uuid
|
|
5
5
|
import json
|
|
6
|
+
from tdfs4ds import logger,logger_safe
|
|
6
7
|
|
|
7
8
|
@execute_query_wrapper
|
|
8
9
|
def register_process_view(view_name, entity_id, feature_names, metadata={}, entity_null_substitute = {}, **kwargs):
|
|
@@ -74,80 +75,91 @@ def _register_process_view_merge(view_name, entity_id, feature_names, metadata={
|
|
|
74
75
|
- Requires 'tdml' module for DataFrame operations and 'uuid' for generating unique identifiers.
|
|
75
76
|
"""
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if type(view_name) == tdml.dataframe.dataframe.DataFrame:
|
|
78
|
+
# Handle teradataml DataFrame input
|
|
79
|
+
if isinstance(view_name, tdml.dataframe.dataframe.DataFrame):
|
|
80
80
|
try:
|
|
81
81
|
view_name = view_name._table_name
|
|
82
|
-
except:
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
except Exception:
|
|
83
|
+
logger_safe(
|
|
84
|
+
"error",
|
|
85
|
+
"Invalid DataFrame for view registration. Use: tdml.DataFrame(<table/view>). Crystallize if needed."
|
|
86
|
+
)
|
|
85
87
|
raise
|
|
86
88
|
|
|
89
|
+
# Prevent using temporary teradataml views
|
|
87
90
|
if view_name.split('.')[1].startswith('ml__'):
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
91
|
+
logger_safe(
|
|
92
|
+
"error",
|
|
93
|
+
"Invalid view name '%s': starts with 'ml__'. Please crystallize your view first.",
|
|
94
|
+
view_name
|
|
95
|
+
)
|
|
96
|
+
raise ValueError("Invalid process view name: temporary teradataml views are not allowed.")
|
|
97
|
+
|
|
98
|
+
# Get optional arguments
|
|
92
99
|
filtermanager = kwargs.get('filtermanager', None)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
# Get data distribution related inputs:
|
|
97
|
-
primary_index = kwargs.get('primary_index', [e for e in entity_id.keys()])
|
|
100
|
+
query_upsert_filtermanager = None
|
|
101
|
+
primary_index = kwargs.get('primary_index', list(entity_id.keys()))
|
|
98
102
|
partitioning = kwargs.get('partitioning', '').replace("'", '"')
|
|
99
103
|
|
|
100
104
|
if primary_index is None:
|
|
101
|
-
primary_index =
|
|
105
|
+
primary_index = list(entity_id.keys())
|
|
102
106
|
|
|
107
|
+
feature_names = ','.join(feature_names)
|
|
103
108
|
|
|
109
|
+
# Validtime period
|
|
110
|
+
end_period_ = '9999-01-01 00:00:00' if tdfs4ds.END_PERIOD == 'UNTIL_CHANGED' else tdfs4ds.END_PERIOD
|
|
111
|
+
validtime_statement = (
|
|
112
|
+
'CURRENT VALIDTIME'
|
|
113
|
+
if tdfs4ds.FEATURE_STORE_TIME is None
|
|
114
|
+
else f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{end_period_})'"
|
|
115
|
+
)
|
|
104
116
|
|
|
105
|
-
|
|
106
|
-
feature_names = ','.join(feature_names)
|
|
117
|
+
logger_safe("info", "Registering process view: %s", view_name)
|
|
107
118
|
|
|
108
|
-
#
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
119
|
+
# Check if view already exists in catalog
|
|
120
|
+
query_process_id = f"""
|
|
121
|
+
SEL PROCESS_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW}
|
|
122
|
+
WHERE view_name = '{view_name}'
|
|
123
|
+
"""
|
|
124
|
+
process_id_result = tdml.execute_sql(query_process_id).fetchall()
|
|
113
125
|
|
|
114
|
-
if
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{end_period_})'"
|
|
126
|
+
if process_id_result:
|
|
127
|
+
process_id = process_id_result[0][0]
|
|
128
|
+
logger_safe("info", "Updating existing process_id=%s", process_id)
|
|
118
129
|
|
|
130
|
+
query_feature_version = f"""
|
|
131
|
+
SEL PROCESS_VERSION FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW}
|
|
132
|
+
WHERE view_name = '{view_name}'
|
|
133
|
+
"""
|
|
134
|
+
feature_version = tdml.execute_sql(query_feature_version).fetchall()[0][0]
|
|
119
135
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if len(query_primary_index_res)>0:
|
|
129
|
-
FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING = tdml.execute_sql(query_primary_index).fetchall()[0]
|
|
136
|
+
query_primary_index = f"""
|
|
137
|
+
SEL FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING
|
|
138
|
+
FROM {tdfs4ds.SCHEMA}.{tdfs4ds.DATA_DISTRIBUTION_NAME}
|
|
139
|
+
WHERE process_id = '{process_id}'
|
|
140
|
+
"""
|
|
141
|
+
dist_res = tdml.execute_sql(query_primary_index).fetchall()
|
|
142
|
+
if dist_res:
|
|
143
|
+
FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING = dist_res[0]
|
|
130
144
|
else:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
""
|
|
145
|
+
logger_safe(
|
|
146
|
+
"error",
|
|
147
|
+
"Missing data distribution info for existing process %s. Check distribution table.",
|
|
148
|
+
process_id
|
|
149
|
+
)
|
|
150
|
+
raise ValueError("Missing distribution info.")
|
|
137
151
|
else:
|
|
138
|
-
# Generating a unique process identifier
|
|
139
152
|
process_id = str(uuid.uuid4())
|
|
140
153
|
feature_version = 1
|
|
141
154
|
FOR_PRIMARY_INDEX = ",".join(primary_index)
|
|
142
155
|
FOR_DATA_PARTITIONING = partitioning
|
|
156
|
+
logger_safe("info", "Generated new process_id=%s", process_id)
|
|
143
157
|
|
|
144
|
-
#
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
158
|
+
# Build entity_id string
|
|
159
|
+
ENTITY_ID__ = ','.join(sorted(entity_id.keys()))
|
|
160
|
+
logger_safe("debug", "Entity IDs: %s", ENTITY_ID__)
|
|
161
|
+
logger_safe("debug", "Feature names: %s", feature_names)
|
|
148
162
|
|
|
149
|
-
print('feature_version :',feature_version)
|
|
150
|
-
print('int(feature_version) :', int(feature_version))
|
|
151
163
|
if tdfs4ds.FEATURE_STORE_TIME == None:
|
|
152
164
|
|
|
153
165
|
|
|
@@ -402,16 +414,16 @@ def _register_process_view_merge(view_name, entity_id, feature_names, metadata={
|
|
|
402
414
|
"""
|
|
403
415
|
|
|
404
416
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
print(f"to update your dataset : dataset = run(process_id='{process_id}',return_dataset=True)")
|
|
417
|
+
logger_safe("info", "Process registered: process_id=%s", process_id)
|
|
418
|
+
logger_safe("info", "To rerun: run(process_id='%s')", process_id)
|
|
419
|
+
logger_safe("info", "To build dataset: dataset = run(process_id='%s', return_dataset=True)", process_id)
|
|
409
420
|
|
|
410
|
-
#
|
|
421
|
+
# Return queries
|
|
411
422
|
if kwargs.get('with_process_id'):
|
|
412
423
|
return query_upsert, process_id, query_upsert_dist, query_upsert_filtermanager
|
|
413
424
|
else:
|
|
414
425
|
return query_upsert, query_upsert_dist, query_upsert_filtermanager
|
|
426
|
+
|
|
415
427
|
@execute_query_wrapper
|
|
416
428
|
def _register_process_view_update_insert(view_name, entity_id, feature_names, metadata={}, entity_null_substitute={}, **kwargs):
|
|
417
429
|
"""
|
|
@@ -2,7 +2,7 @@ import datetime
|
|
|
2
2
|
import numpy as np # Needed for np.datetime64 handling in get_date_in_the_past
|
|
3
3
|
import teradataml as tdml
|
|
4
4
|
import tdfs4ds
|
|
5
|
-
from tdfs4ds import logger
|
|
5
|
+
from tdfs4ds import logger, logger_safe
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def get_hidden_table_name(table_name):
|
|
@@ -20,24 +20,60 @@ def get_hidden_table_name(table_name):
|
|
|
20
20
|
|
|
21
21
|
class FilterManager:
|
|
22
22
|
"""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
A utility for managing dynamic, versioned filter sets as database-backed views.
|
|
24
|
+
|
|
25
|
+
The FilterManager enables lightweight scenario management by storing multiple
|
|
26
|
+
filter definitions in a hidden Teradata table and exposing a public view that
|
|
27
|
+
dynamically switches between them by `filter_id`. Each row in the hidden table
|
|
28
|
+
represents a complete filter configuration. The active configuration is
|
|
29
|
+
controlled by updating the view definition rather than rewriting table data.
|
|
30
|
+
|
|
31
|
+
Key Features:
|
|
32
|
+
- Store multiple filter states (scenarios) indexed by `filter_id`
|
|
33
|
+
- Switch filter states instantly by updating a view
|
|
34
|
+
- Optionally include time-based slicing using a `BUSINESS_DATE` column
|
|
35
|
+
- Clone filters between managers (soft or hard clone modes)
|
|
36
|
+
- Prune obsolete filters to control table size
|
|
37
|
+
- Retrieve current and historical filter definitions
|
|
38
|
+
|
|
39
|
+
Workflow Overview:
|
|
40
|
+
1. Create a `FilterManager` pointing to a target view name.
|
|
41
|
+
2. Load one or more filter definitions using `load_filter()`.
|
|
42
|
+
3. Switch active filters using `update(filter_id)`.
|
|
43
|
+
4. Inspect the active filter via `display()` or view DDL.
|
|
44
|
+
5. Optionally prune or clone filters as needed.
|
|
45
|
+
|
|
46
|
+
How It Works Internally:
|
|
47
|
+
- A hidden table named `<view_name>_HIDDEN` stores filter definitions.
|
|
48
|
+
- A Teradata view named `<view_name>` exposes only the *active* filter row.
|
|
49
|
+
- Each filter automatically receives a sequential `filter_id`
|
|
50
|
+
(`ROW_NUMBER()` ordering ensures deterministic assignment).
|
|
51
|
+
- If time-based filtering is used via `time_column`, a `BUSINESS_DATE`
|
|
52
|
+
column is added and projected in all operations.
|
|
53
|
+
|
|
54
|
+
Parameters:
|
|
55
|
+
table_name (str): Public view name to manage or create.
|
|
56
|
+
schema_name (str): Teradata schema where artifacts will be created.
|
|
57
|
+
filter_id_name (str, optional): Name of the filter ID column. Defaults to `'filter_id'`.
|
|
58
|
+
time_column (str, optional): Optional name of a timestamp column from input DataFrames
|
|
59
|
+
that maps to a `BUSINESS_DATE` column for time-aware filters.
|
|
30
60
|
|
|
31
61
|
Attributes:
|
|
32
|
-
schema_name (str):
|
|
33
|
-
table_name (str):
|
|
34
|
-
view_name (str):
|
|
35
|
-
filter_id_name (str): Column
|
|
36
|
-
nb_filters (int | None): Number of filters
|
|
37
|
-
col_names (list[str] | None): Columns projected by the view (
|
|
38
|
-
time_filtering (bool | None):
|
|
62
|
+
schema_name (str): Target schema for view and hidden table.
|
|
63
|
+
table_name (str): Name of hidden table storing filters (auto-suffixed with `_HIDDEN`).
|
|
64
|
+
view_name (str): Name of public view pointing to current filter.
|
|
65
|
+
filter_id_name (str): Column containing filter ID.
|
|
66
|
+
nb_filters (int | None): Number of stored filters (None until initialized).
|
|
67
|
+
col_names (list[str] | None): Columns projected by the view (data columns only).
|
|
68
|
+
time_filtering (bool | None): True if time-based filtering enabled.
|
|
69
|
+
|
|
70
|
+
Notes:
|
|
71
|
+
- Database objects are only created when `load_filter()` is first called.
|
|
72
|
+
- Safe for iterative pipeline runs—auto-detects existing artifacts.
|
|
73
|
+
- Designed for large production tables and Teradata-native workflows.
|
|
39
74
|
"""
|
|
40
75
|
|
|
76
|
+
|
|
41
77
|
def __init__(self, table_name, schema_name, filter_id_name="filter_id", time_column=None):
|
|
42
78
|
"""
|
|
43
79
|
Initialize the FilterManager.
|
|
@@ -46,15 +82,6 @@ class FilterManager:
|
|
|
46
82
|
filter id, and time filtering status) are detected and cached. If they do
|
|
47
83
|
not exist yet, attributes are initialized but no objects are created until
|
|
48
84
|
`load_filter()` is called.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
table_name (str): Public view name to maintain.
|
|
52
|
-
schema_name (str): Schema where the view and hidden table live.
|
|
53
|
-
filter_id_name (str, optional): Name of the filter id column. Defaults to 'filter_id'.
|
|
54
|
-
time_column (str, optional): If provided, indicates the source column
|
|
55
|
-
in incoming DataFrames to copy into `BUSINESS_DATE` during `load_filter()`.
|
|
56
|
-
(Note: this parameter is remembered but the actual `BUSINESS_DATE`
|
|
57
|
-
column is only created/used when `load_filter(time_column=...)` is called.)
|
|
58
85
|
"""
|
|
59
86
|
self.schema_name = schema_name
|
|
60
87
|
self.table_name = get_hidden_table_name(table_name)
|
|
@@ -63,46 +90,45 @@ class FilterManager:
|
|
|
63
90
|
self.nb_filters = None
|
|
64
91
|
self.col_names = None
|
|
65
92
|
self.time_filtering = None
|
|
66
|
-
self._init_time_column = time_column #
|
|
93
|
+
self._init_time_column = time_column # Remember user hint for later
|
|
67
94
|
|
|
68
|
-
|
|
69
|
-
"
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
"view_name": self.view_name,
|
|
73
|
-
"table_name": self.table_name,
|
|
74
|
-
"filter_id_name": self.filter_id_name,
|
|
75
|
-
},
|
|
95
|
+
logger_safe(
|
|
96
|
+
"debug",
|
|
97
|
+
"Initializing FilterManager | schema_name=%s | view_name=%s | table_name=%s | filter_id_name=%s",
|
|
98
|
+
self.schema_name, self.view_name, self.table_name, self.filter_id_name
|
|
76
99
|
)
|
|
77
100
|
|
|
78
101
|
if self._exists():
|
|
79
|
-
|
|
80
|
-
"
|
|
81
|
-
|
|
102
|
+
logger_safe(
|
|
103
|
+
"info",
|
|
104
|
+
"Existing filter artifacts detected | schema_name=%s | view_name=%s | table_name=%s",
|
|
105
|
+
self.schema_name, self.view_name, self.table_name
|
|
82
106
|
)
|
|
107
|
+
|
|
83
108
|
df = tdml.DataFrame(tdml.in_schema(self.schema_name, self.table_name))
|
|
84
|
-
# First column is assumed to be
|
|
85
|
-
|
|
109
|
+
self.filter_id_name = df.columns[0] # First column is assumed to be filter id
|
|
110
|
+
|
|
86
111
|
self.nb_filters = tdml.execute_sql(
|
|
87
112
|
f"SEL MAX({self.filter_id_name}) AS nb_filters FROM {self.schema_name}.{self.table_name}"
|
|
88
113
|
).fetchall()[0][0]
|
|
114
|
+
|
|
89
115
|
self.time_filtering = self._istimefiltering()
|
|
90
116
|
self.col_names = df.columns[2:] if self.time_filtering else df.columns[1:]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
"time_filtering": self.time_filtering,
|
|
97
|
-
"col_names": list(self.col_names),
|
|
98
|
-
},
|
|
117
|
+
|
|
118
|
+
logger_safe(
|
|
119
|
+
"debug",
|
|
120
|
+
"Detected existing configuration | filter_id_name=%s | nb_filters=%s | time_filtering=%s | col_names=%s",
|
|
121
|
+
self.filter_id_name, self.nb_filters, self.time_filtering, list(self.col_names)
|
|
99
122
|
)
|
|
123
|
+
|
|
100
124
|
else:
|
|
101
|
-
|
|
102
|
-
"
|
|
103
|
-
|
|
125
|
+
logger_safe(
|
|
126
|
+
"info",
|
|
127
|
+
"No existing filter artifacts found; will be created by load_filter() | schema_name=%s | view_name=%s",
|
|
128
|
+
self.schema_name, self.view_name
|
|
104
129
|
)
|
|
105
130
|
|
|
131
|
+
|
|
106
132
|
def _istimefiltering(self):
|
|
107
133
|
"""
|
|
108
134
|
Determine if the hidden table includes a `BUSINESS_DATE` column.
|
|
@@ -148,7 +174,7 @@ class FilterManager:
|
|
|
148
174
|
Raises:
|
|
149
175
|
ValueError: If `time_column` is provided but not present in `df`.
|
|
150
176
|
"""
|
|
151
|
-
logger.info("Loading filters", extra={"rows":
|
|
177
|
+
logger.info("Loading filters", extra={"rows": df.shape[0], "time_column": time_column})
|
|
152
178
|
|
|
153
179
|
if time_column and time_column not in df.columns:
|
|
154
180
|
logger.error("Specified time_column not found in DataFrame.", extra={"time_column": time_column})
|
|
@@ -263,15 +289,17 @@ class FilterManager:
|
|
|
263
289
|
Raises:
|
|
264
290
|
ValueError: If filter artifacts do not exist yet.
|
|
265
291
|
"""
|
|
266
|
-
|
|
292
|
+
|
|
267
293
|
|
|
268
294
|
if not self._exists():
|
|
269
|
-
|
|
295
|
+
logger_safe("error", "Filter artifacts not initialized.")
|
|
270
296
|
raise ValueError("The filter has not been initialized with load_filter() or has been deleted.")
|
|
271
297
|
|
|
272
298
|
if self.time_filtering:
|
|
299
|
+
select_cols_str = ["BUSINESS_DATE"] + list(self.col_names)
|
|
273
300
|
select_cols = ",".join(["BUSINESS_DATE"] + list(self.col_names))
|
|
274
301
|
else:
|
|
302
|
+
select_cols_str = list(self.col_names)
|
|
275
303
|
select_cols = ",".join(self.col_names)
|
|
276
304
|
|
|
277
305
|
query = f"""
|
|
@@ -280,8 +308,14 @@ class FilterManager:
|
|
|
280
308
|
FROM {self.schema_name}.{self.table_name}
|
|
281
309
|
WHERE {self.filter_id_name} = {filter_id}
|
|
282
310
|
"""
|
|
283
|
-
|
|
311
|
+
logger_safe("info", "Updating active filter | %s", ','.join([c + ':' + v for c,v in zip(select_cols_str, tdml.execute_sql(f"SEL * FROM {self.schema_name}.{self.view_name}").fetchall()[0])]))
|
|
312
|
+
|
|
313
|
+
if getattr(tdfs4ds, "DEBUG_MODE", False):
|
|
314
|
+
logger_safe("debug", "Replacing view with new filter:\n%s", query)
|
|
315
|
+
|
|
284
316
|
tdml.execute_sql(query)
|
|
317
|
+
logger_safe("debug", "View %s.%s updated to filter_id=%s", self.schema_name, self.view_name, filter_id)
|
|
318
|
+
|
|
285
319
|
|
|
286
320
|
def display(self):
|
|
287
321
|
"""
|
tdfs4ds/utils/time_management.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import teradataml as tdml
|
|
2
2
|
import datetime
|
|
3
|
-
from tdfs4ds import logger
|
|
3
|
+
from tdfs4ds import logger, logger_safe
|
|
4
4
|
import re
|
|
5
5
|
|
|
6
6
|
import tdfs4ds
|
|
@@ -22,23 +22,62 @@ def get_hidden_table_name(table_name: str) -> str:
|
|
|
22
22
|
|
|
23
23
|
class TimeManager:
|
|
24
24
|
"""
|
|
25
|
-
Manage time
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
25
|
+
Manage versioned business time steps behind a Teradata-backed public view.
|
|
26
|
+
|
|
27
|
+
`TimeManager` stores a sequence of time “snapshots” in a hidden physical
|
|
28
|
+
table (`<view_name>_HIDDEN`) with two columns:
|
|
29
|
+
- `TIME_ID` (1..N): the step index, assigned deterministically via
|
|
30
|
+
`ROW_NUMBER()` over the input time column.
|
|
31
|
+
- `BUSINESS_DATE`: the business date/time associated with each step. When
|
|
32
|
+
loading, the SQL type is inferred and upcast to `TIMESTAMP WITH TIME ZONE`
|
|
33
|
+
when needed to preserve offsets.
|
|
34
|
+
|
|
35
|
+
A companion public view (`<view_name>`) always exposes the *current* business
|
|
36
|
+
date by filtering the hidden table on a single `TIME_ID`. Changing the
|
|
37
|
+
current step only rewrites the view definition—no data is mutated.
|
|
38
|
+
|
|
39
|
+
Key capabilities
|
|
40
|
+
- Load/replace the hidden table from a pandas DataFrame (`load_time_steps`).
|
|
41
|
+
- Switch the active time step by `TIME_ID` (`update`).
|
|
42
|
+
- Inspect the current date/time (`display`, `get_date_in_the_past`).
|
|
43
|
+
- Generate a timeline view up to (or strictly before) the current step
|
|
44
|
+
(`generate_timeline`).
|
|
45
|
+
- Prune older steps and renumber so the earliest remaining step becomes 1
|
|
46
|
+
(`prune_time`).
|
|
47
|
+
- Clone from another `TimeManager` (soft link or hard copy) and optionally
|
|
48
|
+
take ownership of the hidden table (`clone_timer`, `take_ownership`).
|
|
49
|
+
- Introspect the active step by parsing the view DDL (`get_current_timeid`,
|
|
50
|
+
`print_view_ddl`).
|
|
51
|
+
|
|
52
|
+
Workflow overview
|
|
53
|
+
1) Instantiate `TimeManager` with a target `view_name` and `schema_name`.
|
|
54
|
+
2) Call `load_time_steps(df, time_column)` to (re)create the hidden table and
|
|
55
|
+
point the public view at `TIME_ID = 1`.
|
|
56
|
+
3) Use `update(time_id)` to switch the active business date.
|
|
57
|
+
4) (Optional) Create derivative timeline views, prune older steps, or clone.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
table_name (str): Base public view name to manage (e.g., "MY_VIEW").
|
|
61
|
+
schema_name (str): Teradata schema/database that holds the artifacts.
|
|
62
|
+
|
|
63
|
+
Attributes
|
|
64
|
+
schema_name (str): Target schema for the view and hidden table.
|
|
65
|
+
table_name (str): Hidden table name (`<view_name>_HIDDEN`).
|
|
66
|
+
view_name (str): Public view name (`<view_name>`).
|
|
67
|
+
time_id (str): Name of the step identifier column (default: "time_id").
|
|
68
|
+
nb_time_steps (int | None): Number of steps detected after load/inspection.
|
|
69
|
+
data_type (str | None): SQL data type of `BUSINESS_DATE` (e.g., `DATE`,
|
|
70
|
+
`TIMESTAMP WITH TIME ZONE`), inferred during load/inspection.
|
|
71
|
+
|
|
72
|
+
Notes
|
|
73
|
+
- On initialization, if the hidden table already exists, metadata
|
|
74
|
+
(`data_type`, `nb_time_steps`) is auto-detected.
|
|
75
|
+
- `load_time_steps` will drop and recreate the hidden table to match the
|
|
76
|
+
inferred schema, then rebuild the public view.
|
|
77
|
+
- “Soft” cloning points this manager’s view at the source hidden table;
|
|
78
|
+
“hard” cloning copies the table into this schema and marks it owned.
|
|
79
|
+
- Ownership controls whether `_drop()` is allowed to remove the hidden
|
|
80
|
+
table (use `take_ownership` to promote ownership when appropriate).
|
|
42
81
|
"""
|
|
43
82
|
|
|
44
83
|
def __init__(self, table_name: str, schema_name: str) -> None:
|
|
@@ -133,7 +172,8 @@ class TimeManager:
|
|
|
133
172
|
logger.debug("Dropped existing table %s.%s (if existed).", self.schema_name, self.table_name)
|
|
134
173
|
except Exception as e:
|
|
135
174
|
# Not fatal; the table might not exist. Log at debug when in dev, warning otherwise.
|
|
136
|
-
|
|
175
|
+
e_str = str(e).split('\n')[0]
|
|
176
|
+
msg = f"Error dropping table {self.schema_name}.{self.table_name}: {e_str}"
|
|
137
177
|
if tdfs4ds.DEBUG_MODE:
|
|
138
178
|
logger.debug(msg)
|
|
139
179
|
else:
|
|
@@ -156,7 +196,7 @@ class TimeManager:
|
|
|
156
196
|
schema_name=self.schema_name,
|
|
157
197
|
if_exists="append"
|
|
158
198
|
)
|
|
159
|
-
logger.info("Inserted %s time steps into %s.%s.",
|
|
199
|
+
logger.info("Inserted %s time steps into %s.%s.", df_.shape[0], self.schema_name, self.table_name)
|
|
160
200
|
|
|
161
201
|
# Step 6: Update view
|
|
162
202
|
create_view_sql = f"""
|
|
@@ -240,12 +280,15 @@ class TimeManager:
|
|
|
240
280
|
FROM {self.schema_name}.{self.table_name}
|
|
241
281
|
WHERE TIME_ID = {time_id}
|
|
242
282
|
"""
|
|
243
|
-
if tdfs4ds
|
|
244
|
-
|
|
283
|
+
if getattr(tdfs4ds, "DEBUG_MODE", False):
|
|
284
|
+
logger_safe("debug", "Executing view update:\n%s", query)
|
|
285
|
+
|
|
245
286
|
tdml.execute_sql(query)
|
|
246
|
-
|
|
287
|
+
logger_safe("info", "Updated view %s.%s to TIME_ID=%s.", self.schema_name, self.view_name, time_id)
|
|
288
|
+
|
|
247
289
|
else:
|
|
248
|
-
|
|
290
|
+
logger_safe(
|
|
291
|
+
"warning",
|
|
249
292
|
"Cannot update view: hidden table %s.%s does not exist.",
|
|
250
293
|
self.schema_name, self.table_name
|
|
251
294
|
)
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=A-MJUMl06mJBwd94ByY8DZoatCL4A8r7mqe5u6EzCMw,55010
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -17,22 +17,22 @@ tdfs4ds/dataset/dataset.py,sha256=J_fgfsVdR9zSOXrUOqyotqsUD-GlQMGyuld6ueov45w,76
|
|
|
17
17
|
tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
|
|
18
18
|
tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
|
|
19
19
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
20
|
-
tdfs4ds/feature_store/feature_data_processing.py,sha256=
|
|
20
|
+
tdfs4ds/feature_store/feature_data_processing.py,sha256=JEtz1UpZY7oMaonuhzC8eTbZAL8SzrnLOpo0WTEDFUM,41697
|
|
21
21
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
|
|
22
22
|
tdfs4ds/feature_store/feature_store_management.py,sha256=pWM9sjppBgRIg3l1ksoDJsM1fnaZlWtnuE3JuOP_2mY,54736
|
|
23
23
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
24
24
|
tdfs4ds/process_store/process_followup.py,sha256=PvLcU7meg3ljBlPfuez3qwTVqpHHhVJxYxGqjgiHE8E,7265
|
|
25
|
-
tdfs4ds/process_store/process_query_administration.py,sha256=
|
|
26
|
-
tdfs4ds/process_store/process_registration_management.py,sha256=
|
|
25
|
+
tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
|
|
26
|
+
tdfs4ds/process_store/process_registration_management.py,sha256=2fFjt4Pmh3An1BUFvRX3xABSlQrlWiEiPQStH3A9Xpk,36130
|
|
27
27
|
tdfs4ds/process_store/process_store_catalog_management.py,sha256=eVUU9uanyXCUkzi2vcHbJPL9qFiXVasnCxPGr-r9EY8,16090
|
|
28
28
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
29
|
-
tdfs4ds/utils/filter_management.py,sha256=
|
|
29
|
+
tdfs4ds/utils/filter_management.py,sha256=JdCHkkw_L6vpmjPMMp3AY2ZwITGrwAvljHxZttgeWTg,24761
|
|
30
30
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
31
31
|
tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
|
|
32
32
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
33
|
-
tdfs4ds/utils/time_management.py,sha256=
|
|
33
|
+
tdfs4ds/utils/time_management.py,sha256=asIWvK5K81NNwAGqC-9Tv4Timscxyv0vyuPFs01whu0,31461
|
|
34
34
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
36
|
-
tdfs4ds-0.2.4.
|
|
37
|
-
tdfs4ds-0.2.4.
|
|
38
|
-
tdfs4ds-0.2.4.
|
|
35
|
+
tdfs4ds-0.2.4.33.dist-info/METADATA,sha256=0HSUyalUNwp7ZD6Z811pBaNbMb0GEAYsNSzFcAaEWnk,14326
|
|
36
|
+
tdfs4ds-0.2.4.33.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
37
|
+
tdfs4ds-0.2.4.33.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
38
|
+
tdfs4ds-0.2.4.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|