tdfs4ds 0.2.4.39__py3-none-any.whl → 0.2.4.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +8 -2
- tdfs4ds/process_store/process_followup.py +113 -2
- tdfs4ds/utils/filter_management.py +59 -0
- {tdfs4ds-0.2.4.39.dist-info → tdfs4ds-0.2.4.40.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.39.dist-info → tdfs4ds-0.2.4.40.dist-info}/RECORD +7 -7
- {tdfs4ds-0.2.4.39.dist-info → tdfs4ds-0.2.4.40.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.39.dist-info → tdfs4ds-0.2.4.40.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '0.2.4.
|
|
1
|
+
__version__ = '0.2.4.40'
|
|
2
2
|
import logging
|
|
3
3
|
import json
|
|
4
4
|
|
|
@@ -800,7 +800,13 @@ def _upload_features(
|
|
|
800
800
|
do_compute = True
|
|
801
801
|
if process_id and tdfs4ds.FEATURE_STORE_TIME:
|
|
802
802
|
# see if already computed
|
|
803
|
-
follow_up = tdfs4ds.process_store.process_followup.follow_up_report()
|
|
803
|
+
follow_up = tdfs4ds.process_store.process_followup.follow_up_report(process_id=process_id, filtermanager=filtermanager)
|
|
804
|
+
follow_up = follow_up[
|
|
805
|
+
(follow_up.STATUS == 'COMPLETED') &
|
|
806
|
+
(follow_up.VALIDTIME_DATE.isna() == False) &
|
|
807
|
+
(follow_up.VALIDTIME_DATE == tdfs4ds.FEATURE_STORE_TIME)
|
|
808
|
+
]
|
|
809
|
+
|
|
804
810
|
if follow_up.shape[0] > 0:
|
|
805
811
|
do_compute = False
|
|
806
812
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import tdfs4ds
|
|
2
2
|
from tdfs4ds.utils.query_management import execute_query_wrapper
|
|
3
3
|
import teradataml as tdml
|
|
4
|
+
from tdfs4ds import logger_safe, logger
|
|
4
5
|
|
|
5
6
|
@execute_query_wrapper
|
|
6
7
|
def follow_up_table_creation():
|
|
@@ -194,5 +195,115 @@ def followup_close(run_id, process_type, process_id, status='COMPLETED', filterm
|
|
|
194
195
|
raise
|
|
195
196
|
return query
|
|
196
197
|
|
|
197
|
-
|
|
198
|
-
|
|
198
|
+
from typing import Optional
|
|
199
|
+
|
|
200
|
+
def follow_up_report(filtermanager: Optional[object] = None, process_id: Optional[str] = None):
|
|
201
|
+
"""
|
|
202
|
+
Return a follow-up report as a `tdml.DataFrame`, optionally filtered by
|
|
203
|
+
`process_id` and/or a `filtermanager`'s applied filter.
|
|
204
|
+
|
|
205
|
+
Behavior by arguments:
|
|
206
|
+
- process_id is None and filtermanager is None:
|
|
207
|
+
Return all rows from SCHEMA.FOLLOW_UP_NAME, sorted by START_DATETIME desc.
|
|
208
|
+
- process_id is not None and filtermanager is None:
|
|
209
|
+
Return rows for the given PROCESS_ID.
|
|
210
|
+
- process_id is not None and filtermanager is not None:
|
|
211
|
+
Return rows for the given PROCESS_ID whose APPLIED_FILTER matches the
|
|
212
|
+
JSON_AGG of `filtermanager`'s columns coming from its schema/view.
|
|
213
|
+
- process_id is None and filtermanager is not None:
|
|
214
|
+
Return rows whose APPLIED_FILTER matches the JSON_AGG of `filtermanager`
|
|
215
|
+
(no PROCESS_ID constraint).
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
filtermanager: An object exposing `col_names`, `schema_name`, and `view_name`.
|
|
219
|
+
Its columns are aggregated via `JSON_AGG(col1, col2, ...)` to compare
|
|
220
|
+
against A.APPLIED_FILTER.
|
|
221
|
+
process_id: Optional process identifier used to filter by PROCESS_ID.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
tdml.DataFrame: The resulting dataframe sorted by START_DATETIME (descending).
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
ValueError: If `filtermanager` is provided but is missing required attributes
|
|
228
|
+
or has an empty `col_names` list.
|
|
229
|
+
RuntimeError: If the query fails.
|
|
230
|
+
"""
|
|
231
|
+
logger_safe("debug", "follow_up_report called with process_id=%s, filtermanager=%s",
|
|
232
|
+
process_id, type(filtermanager).__name__ if filtermanager else None)
|
|
233
|
+
|
|
234
|
+
table_fqn = f"{tdfs4ds.SCHEMA}.{tdfs4ds.FOLLOW_UP_NAME}"
|
|
235
|
+
|
|
236
|
+
# Case 1: No filters at all -> return full table
|
|
237
|
+
if process_id is None and filtermanager is None:
|
|
238
|
+
logger_safe("info", "Returning all follow-up rows (no filters).")
|
|
239
|
+
try:
|
|
240
|
+
return tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.FOLLOW_UP_NAME)) \
|
|
241
|
+
.sort('START_DATETIME', ascending=False)
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger_safe("error", "Failed to fetch all follow-up rows: %s", e)
|
|
244
|
+
raise RuntimeError("Database query failed while fetching follow-up report.") from e
|
|
245
|
+
|
|
246
|
+
# Helper to build the FILTER_MANAGER scalar subquery when filtermanager is provided
|
|
247
|
+
def _build_filter_manager_subquery(fm: object) -> str:
|
|
248
|
+
required_attrs = ("col_names", "schema_name", "view_name")
|
|
249
|
+
if not all(hasattr(fm, a) for a in required_attrs):
|
|
250
|
+
raise ValueError("filtermanager must have col_names, schema_name, and view_name.")
|
|
251
|
+
if not getattr(fm, "col_names", None):
|
|
252
|
+
raise ValueError("filtermanager.col_names must be a non-empty list.")
|
|
253
|
+
|
|
254
|
+
json_cols = ",".join(fm.col_names)
|
|
255
|
+
subq = f"""
|
|
256
|
+
(
|
|
257
|
+
SELECT JSON_AGG({json_cols}) AS APPLIED_FILTER
|
|
258
|
+
FROM {fm.schema_name}.{fm.view_name}
|
|
259
|
+
) FILTER_MANAGER
|
|
260
|
+
"""
|
|
261
|
+
logger_safe("debug", "Constructed FILTER_MANAGER subquery with columns: %s", json_cols)
|
|
262
|
+
return subq
|
|
263
|
+
|
|
264
|
+
# Defensive escaping for process_id if used in a literal (prefer bind params if available)
|
|
265
|
+
def _escape_literal(val: str) -> str:
|
|
266
|
+
return val.replace("'", "''")
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
# Case 2: process_id only
|
|
270
|
+
if process_id is not None and filtermanager is None:
|
|
271
|
+
pid = _escape_literal(process_id)
|
|
272
|
+
query = f"""
|
|
273
|
+
SELECT *
|
|
274
|
+
FROM {table_fqn}
|
|
275
|
+
WHERE PROCESS_ID = '{pid}'
|
|
276
|
+
"""
|
|
277
|
+
logger_safe("info", "Fetching follow-up rows filtered by PROCESS_ID only.")
|
|
278
|
+
return tdml.DataFrame.from_query(query).sort('START_DATETIME', ascending=False)
|
|
279
|
+
|
|
280
|
+
# Case 3: filtermanager only
|
|
281
|
+
if process_id is None and filtermanager is not None:
|
|
282
|
+
subq = _build_filter_manager_subquery(filtermanager)
|
|
283
|
+
query = f"""
|
|
284
|
+
SELECT A.*
|
|
285
|
+
FROM {table_fqn} A,
|
|
286
|
+
{subq}
|
|
287
|
+
WHERE CAST(A.APPLIED_FILTER AS VARCHAR(20000)) =
|
|
288
|
+
CAST(FILTER_MANAGER.APPLIED_FILTER AS VARCHAR(20000))
|
|
289
|
+
"""
|
|
290
|
+
logger_safe("info", "Fetching follow-up rows filtered by FILTER_MANAGER only.")
|
|
291
|
+
return tdml.DataFrame.from_query(query).sort('START_DATETIME', ascending=False)
|
|
292
|
+
|
|
293
|
+
# Case 4: both process_id and filtermanager
|
|
294
|
+
pid = _escape_literal(process_id) # type: ignore[arg-type]
|
|
295
|
+
subq = _build_filter_manager_subquery(filtermanager) # type: ignore[arg-type]
|
|
296
|
+
query = f"""
|
|
297
|
+
SELECT A.*
|
|
298
|
+
FROM {table_fqn} A,
|
|
299
|
+
{subq}
|
|
300
|
+
WHERE A.PROCESS_ID = '{pid}'
|
|
301
|
+
AND CAST(A.APPLIED_FILTER AS VARCHAR(20000)) =
|
|
302
|
+
CAST(FILTER_MANAGER.APPLIED_FILTER AS VARCHAR(20000))
|
|
303
|
+
"""
|
|
304
|
+
logger_safe("info", "Fetching follow-up rows filtered by PROCESS_ID and FILTER_MANAGER.")
|
|
305
|
+
return tdml.DataFrame.from_query(query).sort('START_DATETIME', ascending=False)
|
|
306
|
+
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger_safe("error", "Failed to fetch follow-up report: %s", e)
|
|
309
|
+
raise RuntimeError("Database query failed while fetching follow-up report.") from e
|
|
@@ -3,6 +3,7 @@ import numpy as np # Needed for np.datetime64 handling in get_date_in_the_past
|
|
|
3
3
|
import teradataml as tdml
|
|
4
4
|
import tdfs4ds
|
|
5
5
|
from tdfs4ds import logger, logger_safe
|
|
6
|
+
import json
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def get_hidden_table_name(table_name):
|
|
@@ -556,3 +557,61 @@ class FilterManager:
|
|
|
556
557
|
)
|
|
557
558
|
self._owns_hidden = True
|
|
558
559
|
return self
|
|
560
|
+
|
|
561
|
+
def get_filter_condition(self, output_type: str = "str"):
|
|
562
|
+
"""
|
|
563
|
+
Retrieve the currently applied filter condition from the database.
|
|
564
|
+
|
|
565
|
+
This method executes a SQL query that aggregates the filter columns from a
|
|
566
|
+
filter manager view and returns the result either as a Python dictionary
|
|
567
|
+
or a raw JSON string.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
output_type (str, optional): Format of the returned filter condition.
|
|
571
|
+
- "dict": Returns a Python dictionary (default).
|
|
572
|
+
- "str": Returns a raw JSON string.
|
|
573
|
+
|
|
574
|
+
Returns:
|
|
575
|
+
dict | str | None: The current filter condition in the requested format.
|
|
576
|
+
Returns None if no filter is present.
|
|
577
|
+
|
|
578
|
+
Raises:
|
|
579
|
+
ValueError: If `output_type` is not "dict" or "str".
|
|
580
|
+
RuntimeError: If the SQL execution fails.
|
|
581
|
+
|
|
582
|
+
Example:
|
|
583
|
+
>>> filters = self.get_filter_condition(output_type="dict")
|
|
584
|
+
>>> print(filters)
|
|
585
|
+
{'country': 'US', 'status': 'active'}
|
|
586
|
+
"""
|
|
587
|
+
logger_safe("debug", "Fetching current filter condition from the database")
|
|
588
|
+
|
|
589
|
+
# Build JSON_AGG expression safely
|
|
590
|
+
json_columns = ",".join(self.col_names)
|
|
591
|
+
|
|
592
|
+
query = f"""
|
|
593
|
+
SELECT JSON_AGG({json_columns}) AS applied_filter
|
|
594
|
+
FROM {self.schema_name}.{self.view_name} FILTER_MANAGER
|
|
595
|
+
"""
|
|
596
|
+
|
|
597
|
+
try:
|
|
598
|
+
result = tdml.execute_sql(query).fetchall()
|
|
599
|
+
except Exception as e:
|
|
600
|
+
logger_safe("error", "Failed to execute SQL for filter condition: %s", e)
|
|
601
|
+
raise RuntimeError("Database query failed while fetching filter condition") from e
|
|
602
|
+
|
|
603
|
+
# Handle no result
|
|
604
|
+
if not result or result[0][0] is None:
|
|
605
|
+
logger_safe("info", "No filter conditions found")
|
|
606
|
+
return None
|
|
607
|
+
|
|
608
|
+
json_result = result[0][0]
|
|
609
|
+
|
|
610
|
+
if output_type == "dict":
|
|
611
|
+
logger_safe("debug", "Returning filter as Python dictionary")
|
|
612
|
+
return json.loads(json_result)
|
|
613
|
+
elif output_type == "str":
|
|
614
|
+
logger_safe("debug", "Returning filter as JSON string")
|
|
615
|
+
return json_result
|
|
616
|
+
|
|
617
|
+
raise ValueError("Invalid output_type. Expected 'dict' or 'str'.")
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=Iaje0388sV4rstHa_fI1wRo2DxxXRgBh0N2r7X0Da_0,61203
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -21,18 +21,18 @@ tdfs4ds/feature_store/feature_data_processing.py,sha256=gjwypiTfwTyGyrP20v35Vu2u
|
|
|
21
21
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
|
|
22
22
|
tdfs4ds/feature_store/feature_store_management.py,sha256=mtPQkdMDhcOrhj9IAaH-FEP_znK53cYtEv8zXAbsigg,52123
|
|
23
23
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
24
|
-
tdfs4ds/process_store/process_followup.py,sha256=
|
|
24
|
+
tdfs4ds/process_store/process_followup.py,sha256=E4jgQahjhVRBbfAW3JXNLId7H5qV8ozRt-6PyAQuPzg,12583
|
|
25
25
|
tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
|
|
26
26
|
tdfs4ds/process_store/process_registration_management.py,sha256=2fFjt4Pmh3An1BUFvRX3xABSlQrlWiEiPQStH3A9Xpk,36130
|
|
27
27
|
tdfs4ds/process_store/process_store_catalog_management.py,sha256=eVUU9uanyXCUkzi2vcHbJPL9qFiXVasnCxPGr-r9EY8,16090
|
|
28
28
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
29
|
-
tdfs4ds/utils/filter_management.py,sha256=
|
|
29
|
+
tdfs4ds/utils/filter_management.py,sha256=fBcFKetmyyeiuBgrGZfcbf9YfCBXrDOnjoFLq6R9QcI,27047
|
|
30
30
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
31
31
|
tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
|
|
32
32
|
tdfs4ds/utils/query_management.py,sha256=kWDeTdsYcbpV5Tyhh-8uLRWvXh16nIdXNIJ97w76aNU,4848
|
|
33
33
|
tdfs4ds/utils/time_management.py,sha256=asIWvK5K81NNwAGqC-9Tv4Timscxyv0vyuPFs01whu0,31461
|
|
34
34
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
36
|
-
tdfs4ds-0.2.4.
|
|
37
|
-
tdfs4ds-0.2.4.
|
|
38
|
-
tdfs4ds-0.2.4.
|
|
35
|
+
tdfs4ds-0.2.4.40.dist-info/METADATA,sha256=WBbjnL-uXkEPdrqrnEr78QZmRrs5E4h6XD8Ss50W_H0,14326
|
|
36
|
+
tdfs4ds-0.2.4.40.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
37
|
+
tdfs4ds-0.2.4.40.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
38
|
+
tdfs4ds-0.2.4.40.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|