tdfs4ds 0.2.4.32__py3-none-any.whl → 0.2.4.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +387 -542
- tdfs4ds/feature_store/feature_data_processing.py +367 -299
- tdfs4ds/feature_store/feature_store_management.py +189 -167
- tdfs4ds/process_store/process_query_administration.py +1 -1
- tdfs4ds/process_store/process_registration_management.py +67 -55
- tdfs4ds/utils/filter_management.py +87 -53
- tdfs4ds/utils/time_management.py +67 -24
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.34.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.34.dist-info}/RECORD +11 -11
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.34.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.32.dist-info → tdfs4ds-0.2.4.34.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ import pandas as pd
|
|
|
9
9
|
import tqdm
|
|
10
10
|
import inspect
|
|
11
11
|
import re
|
|
12
|
+
from tdfs4ds import logger_safe, logger
|
|
12
13
|
|
|
13
14
|
@execute_query_wrapper
|
|
14
15
|
def feature_store_catalog_view_creation():
|
|
@@ -350,46 +351,47 @@ def register_features(entity_id, feature_names_types, primary_index = None, part
|
|
|
350
351
|
|
|
351
352
|
def _register_features_merge(entity_id, feature_names_types, primary_index=None, partitioning=''):
|
|
352
353
|
"""
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
354
|
+
Register or update feature definitions in the feature catalog, with temporal support.
|
|
355
|
+
|
|
356
|
+
This function builds (or refreshes) entries in the Teradata feature catalog from a
|
|
357
|
+
mapping of feature names to their metadata, computes the target feature store table
|
|
358
|
+
and view names, stages the metadata to a temporary table, and executes a MERGE into
|
|
359
|
+
the catalog (with optional VALIDTIME support based on `tdfs4ds.FEATURE_STORE_TIME`).
|
|
360
|
+
|
|
361
|
+
Parameters
|
|
362
|
+
----------
|
|
363
|
+
entity_id : dict[str, Any]
|
|
364
|
+
Mapping of entity-key column names to types. Only the keys (column names) are
|
|
365
|
+
required here; values are not used by this function.
|
|
366
|
+
feature_names_types : dict[str, dict]
|
|
367
|
+
Dict of feature name -> {"type": <SQL_TYPE>, "id": <int>} describing each
|
|
368
|
+
feature’s storage type and identifier in the catalog.
|
|
369
|
+
primary_index : list[str] | None, optional
|
|
370
|
+
Primary index column(s) to use when deriving the feature store table/view names.
|
|
371
|
+
If None, defaults are inferred by `get_feature_store_table_name`.
|
|
372
|
+
partitioning : str, optional
|
|
373
|
+
Partitioning expression or comma-separated column list used by
|
|
374
|
+
`get_feature_store_table_name`.
|
|
375
|
+
|
|
376
|
+
Returns
|
|
377
|
+
-------
|
|
378
|
+
pd.DataFrame
|
|
379
|
+
A dataframe of the features that were (up)registered, including:
|
|
380
|
+
FEATURE_NAME, FEATURE_TYPE, FEATURE_ID, FEATURE_TABLE, FEATURE_VIEW,
|
|
381
|
+
ENTITY_NAME, FEATURE_DATABASE, DATA_DOMAIN.
|
|
382
|
+
|
|
383
|
+
Notes
|
|
384
|
+
-----
|
|
385
|
+
- When `tdfs4ds.FEATURE_STORE_TIME is None`, uses CURRENT VALIDTIME (non-explicit start/end).
|
|
386
|
+
Otherwise uses `VALIDTIME PERIOD ('<FEATURE_STORE_TIME>', '<END_PERIOD>')` and adds
|
|
387
|
+
the valid-time start/end when inserting.
|
|
388
|
+
- Respects `tdfs4ds.DISPLAY_LOGS` via `logger_safe`.
|
|
388
389
|
"""
|
|
389
390
|
|
|
390
|
-
|
|
391
|
+
# --- VALIDTIME setup -----------------------------------------------------
|
|
392
|
+
if tdfs4ds.FEATURE_STORE_TIME is None:
|
|
391
393
|
validtime_statement = 'CURRENT VALIDTIME'
|
|
392
|
-
validtime_start =
|
|
394
|
+
validtime_start = "CAST(CURRENT_TIME AS TIMESTAMP(0) WITH TIME ZONE)"
|
|
393
395
|
else:
|
|
394
396
|
validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{tdfs4ds.END_PERIOD})'"
|
|
395
397
|
validtime_start = f"CAST('{tdfs4ds.FEATURE_STORE_TIME}' AS TIMESTAMP(0) WITH TIME ZONE)"
|
|
@@ -399,154 +401,174 @@ def _register_features_merge(entity_id, feature_names_types, primary_index=None,
|
|
|
399
401
|
else:
|
|
400
402
|
end_period_ = tdfs4ds.END_PERIOD
|
|
401
403
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
+
# --- Input checks & early exit ------------------------------------------
|
|
405
|
+
if not feature_names_types:
|
|
406
|
+
logger_safe("info", "register_features: no new features to register")
|
|
404
407
|
return
|
|
405
408
|
|
|
406
|
-
#
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
ENTITY_ID__ = ','.join([k for k in entity_id_list])
|
|
410
|
-
|
|
411
|
-
# Create a DataFrame from the feature_names_types dictionary
|
|
412
|
-
if len(feature_names_types.keys()) > 1:
|
|
413
|
-
df = pd.DataFrame(feature_names_types).transpose().reset_index()
|
|
414
|
-
df.columns = ['FEATURE_NAME', 'FEATURE_TYPE', 'FEATURE_ID']
|
|
415
|
-
else:
|
|
416
|
-
df = pd.DataFrame(columns=['FEATURE_NAME', 'FEATURE_TYPE', 'FEATURE_ID'])
|
|
417
|
-
k = list(feature_names_types.keys())[0]
|
|
418
|
-
df['FEATURE_NAME'] = [k]
|
|
419
|
-
df['FEATURE_TYPE'] = [feature_names_types[k]['type']]
|
|
420
|
-
df['FEATURE_ID'] = [feature_names_types[k]['id']]
|
|
421
|
-
|
|
422
|
-
|
|
409
|
+
# --- Entity columns (ordered, stable) -----------------------------------
|
|
410
|
+
entity_cols = sorted(list(entity_id.keys()))
|
|
411
|
+
ENTITY_ID__ = ",".join(entity_cols)
|
|
423
412
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
413
|
+
# --- Build dataframe safely (no transpose tricks) ------------------------
|
|
414
|
+
rows = []
|
|
415
|
+
for fname, meta in feature_names_types.items():
|
|
416
|
+
try:
|
|
417
|
+
rows.append({
|
|
418
|
+
"FEATURE_NAME": fname,
|
|
419
|
+
"FEATURE_TYPE": meta["type"],
|
|
420
|
+
"FEATURE_ID": meta["id"],
|
|
421
|
+
})
|
|
422
|
+
except KeyError as e:
|
|
423
|
+
logger_safe("error", "register_features: missing key %s in feature '%s' meta=%s", str(e), fname, meta)
|
|
424
|
+
raise
|
|
425
|
+
|
|
426
|
+
df = pd.DataFrame(rows, columns=["FEATURE_NAME", "FEATURE_TYPE", "FEATURE_ID"])
|
|
427
|
+
|
|
428
|
+
logger_safe(
|
|
429
|
+
"debug",
|
|
430
|
+
"register_features: features_count=%d | entity_cols=%s | primary_index=%s | partitioning=%s",
|
|
431
|
+
len(df),
|
|
432
|
+
entity_cols,
|
|
433
|
+
primary_index,
|
|
434
|
+
partitioning,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
# --- Compute feature table & view names ---------------------------------
|
|
438
|
+
# Use apply to preserve original order; get_feature_store_table_name returns (table, view)
|
|
439
|
+
df["FEATURE_TABLE"] = df.apply(
|
|
440
|
+
lambda row: get_feature_store_table_name(
|
|
441
|
+
entity_id,
|
|
442
|
+
row["FEATURE_TYPE"],
|
|
443
|
+
primary_index=primary_index,
|
|
444
|
+
partitioning=partitioning
|
|
445
|
+
)[0],
|
|
446
|
+
axis=1
|
|
447
|
+
)
|
|
448
|
+
df["FEATURE_VIEW"] = df.apply(
|
|
449
|
+
lambda row: get_feature_store_table_name(
|
|
450
|
+
entity_id,
|
|
451
|
+
row["FEATURE_TYPE"],
|
|
452
|
+
primary_index=primary_index,
|
|
453
|
+
partitioning=partitioning
|
|
454
|
+
)[1],
|
|
455
|
+
axis=1
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# --- Add catalog columns -------------------------------------------------
|
|
459
|
+
df["ENTITY_NAME"] = ENTITY_ID__
|
|
460
|
+
df["FEATURE_DATABASE"] = tdfs4ds.SCHEMA
|
|
461
|
+
df["DATA_DOMAIN"] = tdfs4ds.DATA_DOMAIN
|
|
462
|
+
|
|
463
|
+
# --- Stage to temp table -------------------------------------------------
|
|
464
|
+
tdml.copy_to_sql(
|
|
465
|
+
df,
|
|
466
|
+
table_name="temp",
|
|
467
|
+
schema_name=tdfs4ds.SCHEMA,
|
|
468
|
+
if_exists="replace",
|
|
469
|
+
primary_index="FEATURE_ID",
|
|
470
|
+
types={"FEATURE_ID": tdml.BIGINT},
|
|
471
|
+
)
|
|
472
|
+
logger_safe("debug", "register_features: staged %d rows to %s.temp", len(df), tdfs4ds.SCHEMA)
|
|
473
|
+
|
|
474
|
+
# --- Build MERGE statement ----------------------------------------------
|
|
475
|
+
if tdfs4ds.FEATURE_STORE_TIME is None:
|
|
476
|
+
# no explicit start/end in INSERT branch
|
|
456
477
|
query_merge = f"""
|
|
457
478
|
{validtime_statement}
|
|
458
|
-
MERGE INTO
|
|
479
|
+
MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
|
|
459
480
|
USING (
|
|
460
481
|
SELECT
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
WHEN MATCHED THEN
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
485
|
-
--,ENTITY_NAME = UPDATED_FEATURES.ENTITY_NAME -- modified
|
|
486
|
-
WHEN NOT MATCHED THEN
|
|
487
|
-
INSERT
|
|
482
|
+
CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
|
|
483
|
+
, A.FEATURE_NAME
|
|
484
|
+
, A.FEATURE_TYPE
|
|
485
|
+
, A.FEATURE_TABLE
|
|
486
|
+
, A.FEATURE_DATABASE
|
|
487
|
+
, A.FEATURE_VIEW
|
|
488
|
+
, A.ENTITY_NAME
|
|
489
|
+
, A.DATA_DOMAIN
|
|
490
|
+
FROM {tdfs4ds.SCHEMA}.temp A
|
|
491
|
+
LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
|
|
492
|
+
ON A.FEATURE_NAME = B.FEATURE_NAME
|
|
493
|
+
AND A.ENTITY_NAME = B.ENTITY_NAME
|
|
494
|
+
AND A.DATA_DOMAIN = B.DATA_DOMAIN
|
|
495
|
+
) UPDATED_FEATURES
|
|
496
|
+
ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
|
|
497
|
+
AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
|
|
498
|
+
AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
|
|
499
|
+
WHEN MATCHED THEN UPDATE SET
|
|
500
|
+
FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE
|
|
501
|
+
, FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE
|
|
502
|
+
, FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
|
|
503
|
+
, FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
504
|
+
WHEN NOT MATCHED THEN INSERT
|
|
488
505
|
( UPDATED_FEATURES.FEATURE_ID
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
506
|
+
, UPDATED_FEATURES.FEATURE_NAME
|
|
507
|
+
, UPDATED_FEATURES.FEATURE_TYPE
|
|
508
|
+
, UPDATED_FEATURES.FEATURE_TABLE
|
|
509
|
+
, UPDATED_FEATURES.FEATURE_DATABASE
|
|
510
|
+
, UPDATED_FEATURES.FEATURE_VIEW
|
|
511
|
+
, UPDATED_FEATURES.ENTITY_NAME
|
|
512
|
+
, UPDATED_FEATURES.DATA_DOMAIN
|
|
513
|
+
);
|
|
514
|
+
"""
|
|
498
515
|
else:
|
|
516
|
+
# insert with explicit valid-time start/end
|
|
499
517
|
query_merge = f"""
|
|
500
518
|
{validtime_statement}
|
|
501
|
-
MERGE INTO
|
|
519
|
+
MERGE INTO {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} EXISTING_FEATURES
|
|
502
520
|
USING (
|
|
503
521
|
SELECT
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
WHEN MATCHED THEN
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
528
|
-
--,ENTITY_NAME = UPDATED_FEATURES.ENTITY_NAME -- modified
|
|
529
|
-
WHEN NOT MATCHED THEN
|
|
530
|
-
INSERT
|
|
522
|
+
CASE WHEN B.FEATURE_ID IS NULL THEN A.FEATURE_ID ELSE B.FEATURE_ID END AS FEATURE_ID
|
|
523
|
+
, A.FEATURE_NAME
|
|
524
|
+
, A.FEATURE_TYPE
|
|
525
|
+
, A.FEATURE_TABLE
|
|
526
|
+
, A.FEATURE_DATABASE
|
|
527
|
+
, A.FEATURE_VIEW
|
|
528
|
+
, A.ENTITY_NAME
|
|
529
|
+
, A.DATA_DOMAIN
|
|
530
|
+
FROM {tdfs4ds.SCHEMA}.temp A
|
|
531
|
+
LEFT JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
|
|
532
|
+
ON A.FEATURE_NAME = B.FEATURE_NAME
|
|
533
|
+
AND A.ENTITY_NAME = B.ENTITY_NAME
|
|
534
|
+
AND A.DATA_DOMAIN = B.DATA_DOMAIN
|
|
535
|
+
) UPDATED_FEATURES
|
|
536
|
+
ON UPDATED_FEATURES.FEATURE_ID = EXISTING_FEATURES.FEATURE_ID
|
|
537
|
+
AND UPDATED_FEATURES.FEATURE_NAME = EXISTING_FEATURES.FEATURE_NAME
|
|
538
|
+
AND UPDATED_FEATURES.DATA_DOMAIN = EXISTING_FEATURES.DATA_DOMAIN
|
|
539
|
+
WHEN MATCHED THEN UPDATE SET
|
|
540
|
+
FEATURE_TABLE = UPDATED_FEATURES.FEATURE_TABLE
|
|
541
|
+
, FEATURE_TYPE = UPDATED_FEATURES.FEATURE_TYPE
|
|
542
|
+
, FEATURE_DATABASE = UPDATED_FEATURES.FEATURE_DATABASE
|
|
543
|
+
, FEATURE_VIEW = UPDATED_FEATURES.FEATURE_VIEW
|
|
544
|
+
WHEN NOT MATCHED THEN INSERT
|
|
531
545
|
( UPDATED_FEATURES.FEATURE_ID
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
546
|
+
, UPDATED_FEATURES.FEATURE_NAME
|
|
547
|
+
, UPDATED_FEATURES.FEATURE_TYPE
|
|
548
|
+
, UPDATED_FEATURES.FEATURE_TABLE
|
|
549
|
+
, UPDATED_FEATURES.FEATURE_DATABASE
|
|
550
|
+
, UPDATED_FEATURES.FEATURE_VIEW
|
|
551
|
+
, UPDATED_FEATURES.ENTITY_NAME
|
|
552
|
+
, UPDATED_FEATURES.DATA_DOMAIN
|
|
553
|
+
, {validtime_start}
|
|
554
|
+
, '{end_period_}'
|
|
555
|
+
);
|
|
556
|
+
"""
|
|
542
557
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
# Execute the update and insert queries
|
|
558
|
+
logger_safe("debug", "register_features: merge_sql_preview=%s", " ".join(query_merge.split())[:400] + " ...")
|
|
559
|
+
|
|
560
|
+
# --- Execute MERGE -------------------------------------------------------
|
|
547
561
|
execute_query(query_merge)
|
|
562
|
+
logger_safe(
|
|
563
|
+
"info",
|
|
564
|
+
"register_features: merged %d features into %s.%s",
|
|
565
|
+
len(df),
|
|
566
|
+
tdfs4ds.SCHEMA,
|
|
567
|
+
tdfs4ds.FEATURE_CATALOG_NAME,
|
|
568
|
+
)
|
|
548
569
|
|
|
549
570
|
return df
|
|
571
|
+
|
|
550
572
|
def _register_features_update_insert(entity_id, feature_names_types, primary_index = None, partitioning = ''):
|
|
551
573
|
"""
|
|
552
574
|
Registers or updates feature definitions in a Teradata database's feature catalog, associating entity identifiers
|
|
@@ -28,7 +28,7 @@ def list_processes():
|
|
|
28
28
|
return tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.PROCESS_CATALOG_NAME_VIEW))
|
|
29
29
|
except Exception as e:
|
|
30
30
|
print(str(e))
|
|
31
|
-
print(
|
|
31
|
+
print(tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.PROCESS_CATALOG_NAME_VIEW)).show_query())
|
|
32
32
|
|
|
33
33
|
def list_processes_feature_split():
|
|
34
34
|
"""
|
|
@@ -3,6 +3,7 @@ import tdfs4ds
|
|
|
3
3
|
from tdfs4ds.utils.query_management import execute_query_wrapper
|
|
4
4
|
import uuid
|
|
5
5
|
import json
|
|
6
|
+
from tdfs4ds import logger,logger_safe
|
|
6
7
|
|
|
7
8
|
@execute_query_wrapper
|
|
8
9
|
def register_process_view(view_name, entity_id, feature_names, metadata={}, entity_null_substitute = {}, **kwargs):
|
|
@@ -74,80 +75,91 @@ def _register_process_view_merge(view_name, entity_id, feature_names, metadata={
|
|
|
74
75
|
- Requires 'tdml' module for DataFrame operations and 'uuid' for generating unique identifiers.
|
|
75
76
|
"""
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if type(view_name) == tdml.dataframe.dataframe.DataFrame:
|
|
78
|
+
# Handle teradataml DataFrame input
|
|
79
|
+
if isinstance(view_name, tdml.dataframe.dataframe.DataFrame):
|
|
80
80
|
try:
|
|
81
81
|
view_name = view_name._table_name
|
|
82
|
-
except:
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
except Exception:
|
|
83
|
+
logger_safe(
|
|
84
|
+
"error",
|
|
85
|
+
"Invalid DataFrame for view registration. Use: tdml.DataFrame(<table/view>). Crystallize if needed."
|
|
86
|
+
)
|
|
85
87
|
raise
|
|
86
88
|
|
|
89
|
+
# Prevent using temporary teradataml views
|
|
87
90
|
if view_name.split('.')[1].startswith('ml__'):
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
91
|
+
logger_safe(
|
|
92
|
+
"error",
|
|
93
|
+
"Invalid view name '%s': starts with 'ml__'. Please crystallize your view first.",
|
|
94
|
+
view_name
|
|
95
|
+
)
|
|
96
|
+
raise ValueError("Invalid process view name: temporary teradataml views are not allowed.")
|
|
97
|
+
|
|
98
|
+
# Get optional arguments
|
|
92
99
|
filtermanager = kwargs.get('filtermanager', None)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
# Get data distribution related inputs:
|
|
97
|
-
primary_index = kwargs.get('primary_index', [e for e in entity_id.keys()])
|
|
100
|
+
query_upsert_filtermanager = None
|
|
101
|
+
primary_index = kwargs.get('primary_index', list(entity_id.keys()))
|
|
98
102
|
partitioning = kwargs.get('partitioning', '').replace("'", '"')
|
|
99
103
|
|
|
100
104
|
if primary_index is None:
|
|
101
|
-
primary_index =
|
|
105
|
+
primary_index = list(entity_id.keys())
|
|
102
106
|
|
|
107
|
+
feature_names = ','.join(feature_names)
|
|
103
108
|
|
|
109
|
+
# Validtime period
|
|
110
|
+
end_period_ = '9999-01-01 00:00:00' if tdfs4ds.END_PERIOD == 'UNTIL_CHANGED' else tdfs4ds.END_PERIOD
|
|
111
|
+
validtime_statement = (
|
|
112
|
+
'CURRENT VALIDTIME'
|
|
113
|
+
if tdfs4ds.FEATURE_STORE_TIME is None
|
|
114
|
+
else f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{end_period_})'"
|
|
115
|
+
)
|
|
104
116
|
|
|
105
|
-
|
|
106
|
-
feature_names = ','.join(feature_names)
|
|
117
|
+
logger_safe("info", "Registering process view: %s", view_name)
|
|
107
118
|
|
|
108
|
-
#
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
119
|
+
# Check if view already exists in catalog
|
|
120
|
+
query_process_id = f"""
|
|
121
|
+
SEL PROCESS_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW}
|
|
122
|
+
WHERE view_name = '{view_name}'
|
|
123
|
+
"""
|
|
124
|
+
process_id_result = tdml.execute_sql(query_process_id).fetchall()
|
|
113
125
|
|
|
114
|
-
if
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{end_period_})'"
|
|
126
|
+
if process_id_result:
|
|
127
|
+
process_id = process_id_result[0][0]
|
|
128
|
+
logger_safe("info", "Updating existing process_id=%s", process_id)
|
|
118
129
|
|
|
130
|
+
query_feature_version = f"""
|
|
131
|
+
SEL PROCESS_VERSION FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW}
|
|
132
|
+
WHERE view_name = '{view_name}'
|
|
133
|
+
"""
|
|
134
|
+
feature_version = tdml.execute_sql(query_feature_version).fetchall()[0][0]
|
|
119
135
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if len(query_primary_index_res)>0:
|
|
129
|
-
FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING = tdml.execute_sql(query_primary_index).fetchall()[0]
|
|
136
|
+
query_primary_index = f"""
|
|
137
|
+
SEL FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING
|
|
138
|
+
FROM {tdfs4ds.SCHEMA}.{tdfs4ds.DATA_DISTRIBUTION_NAME}
|
|
139
|
+
WHERE process_id = '{process_id}'
|
|
140
|
+
"""
|
|
141
|
+
dist_res = tdml.execute_sql(query_primary_index).fetchall()
|
|
142
|
+
if dist_res:
|
|
143
|
+
FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING = dist_res[0]
|
|
130
144
|
else:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
""
|
|
145
|
+
logger_safe(
|
|
146
|
+
"error",
|
|
147
|
+
"Missing data distribution info for existing process %s. Check distribution table.",
|
|
148
|
+
process_id
|
|
149
|
+
)
|
|
150
|
+
raise ValueError("Missing distribution info.")
|
|
137
151
|
else:
|
|
138
|
-
# Generating a unique process identifier
|
|
139
152
|
process_id = str(uuid.uuid4())
|
|
140
153
|
feature_version = 1
|
|
141
154
|
FOR_PRIMARY_INDEX = ",".join(primary_index)
|
|
142
155
|
FOR_DATA_PARTITIONING = partitioning
|
|
156
|
+
logger_safe("info", "Generated new process_id=%s", process_id)
|
|
143
157
|
|
|
144
|
-
#
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
158
|
+
# Build entity_id string
|
|
159
|
+
ENTITY_ID__ = ','.join(sorted(entity_id.keys()))
|
|
160
|
+
logger_safe("debug", "Entity IDs: %s", ENTITY_ID__)
|
|
161
|
+
logger_safe("debug", "Feature names: %s", feature_names)
|
|
148
162
|
|
|
149
|
-
print('feature_version :',feature_version)
|
|
150
|
-
print('int(feature_version) :', int(feature_version))
|
|
151
163
|
if tdfs4ds.FEATURE_STORE_TIME == None:
|
|
152
164
|
|
|
153
165
|
|
|
@@ -402,16 +414,16 @@ def _register_process_view_merge(view_name, entity_id, feature_names, metadata={
|
|
|
402
414
|
"""
|
|
403
415
|
|
|
404
416
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
print(f"to update your dataset : dataset = run(process_id='{process_id}',return_dataset=True)")
|
|
417
|
+
logger_safe("info", "Process registered: process_id=%s", process_id)
|
|
418
|
+
logger_safe("info", "To rerun: run(process_id='%s')", process_id)
|
|
419
|
+
logger_safe("info", "To build dataset: dataset = run(process_id='%s', return_dataset=True)", process_id)
|
|
409
420
|
|
|
410
|
-
#
|
|
421
|
+
# Return queries
|
|
411
422
|
if kwargs.get('with_process_id'):
|
|
412
423
|
return query_upsert, process_id, query_upsert_dist, query_upsert_filtermanager
|
|
413
424
|
else:
|
|
414
425
|
return query_upsert, query_upsert_dist, query_upsert_filtermanager
|
|
426
|
+
|
|
415
427
|
@execute_query_wrapper
|
|
416
428
|
def _register_process_view_update_insert(view_name, entity_id, feature_names, metadata={}, entity_null_substitute={}, **kwargs):
|
|
417
429
|
"""
|