tdfs4ds 0.2.4.25__py3-none-any.whl → 0.2.4.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +70 -16
- tdfs4ds/feature_store/feature_query_retrieval.py +60 -49
- tdfs4ds/feature_store/feature_store_management.py +42 -54
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.27.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.27.dist-info}/RECORD +7 -7
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.27.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.25.dist-info → tdfs4ds-0.2.4.27.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '0.2.4.
|
|
1
|
+
__version__ = '0.2.4.27'
|
|
2
2
|
import logging
|
|
3
3
|
# Setup the logger
|
|
4
4
|
logging.basicConfig(
|
|
@@ -935,6 +935,10 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
|
|
|
935
935
|
selected_features : dict
|
|
936
936
|
A dictionary where the keys are feature table names, and the values are lists of tuples
|
|
937
937
|
(feature_id, feature_version, feature_name) specifying the features to retrieve.
|
|
938
|
+
NOTE: feature_version may be either:
|
|
939
|
+
- a single UUID string, or
|
|
940
|
+
- a list of dicts like:
|
|
941
|
+
{"process_id": <UUID>, "process_view_name": <str>}
|
|
938
942
|
|
|
939
943
|
view_name : str
|
|
940
944
|
The name of the view to be created in the database.
|
|
@@ -1004,6 +1008,24 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
|
|
|
1004
1008
|
# Sort the entity ID list for consistent query generation
|
|
1005
1009
|
list_entity_id.sort()
|
|
1006
1010
|
|
|
1011
|
+
# Helpers
|
|
1012
|
+
import re
|
|
1013
|
+
def _sanitize_identifier(name: str) -> str:
|
|
1014
|
+
# Keep letters, numbers, and underscores; replace others with '_'
|
|
1015
|
+
return re.sub(r'[^0-9A-Za-z_]', '_', name)
|
|
1016
|
+
|
|
1017
|
+
used_alias_counts = {} # base_alias -> count
|
|
1018
|
+
|
|
1019
|
+
def _unique_alias(base: str) -> str:
|
|
1020
|
+
"""
|
|
1021
|
+
Ensure alias uniqueness: if base already used, append _2, _3, ...
|
|
1022
|
+
"""
|
|
1023
|
+
if base not in used_alias_counts:
|
|
1024
|
+
used_alias_counts[base] = 1
|
|
1025
|
+
return base
|
|
1026
|
+
used_alias_counts[base] += 1
|
|
1027
|
+
return f"{base}_{used_alias_counts[base]}"
|
|
1028
|
+
|
|
1007
1029
|
# Initialize sub-query construction
|
|
1008
1030
|
tdfs4ds.logger.info("Generating the sub-queries for feature retrieval.")
|
|
1009
1031
|
sub_queries = []
|
|
@@ -1014,21 +1036,52 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
|
|
|
1014
1036
|
# Construct sub-queries for each feature
|
|
1015
1037
|
for k, v in list_features.items():
|
|
1016
1038
|
for feature_id, feature_version, feature_name in v:
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1039
|
+
|
|
1040
|
+
# Multiple processes: list of dicts
|
|
1041
|
+
if isinstance(feature_version, list):
|
|
1042
|
+
for item in feature_version:
|
|
1043
|
+
process_id = item.get("process_id")
|
|
1044
|
+
process_view_name = item.get("process_view_name") or "PROCESS"
|
|
1045
|
+
base_alias = _sanitize_identifier(f"{feature_name}_{process_view_name}")
|
|
1046
|
+
alias = _unique_alias(base_alias)
|
|
1047
|
+
|
|
1048
|
+
txt_where = f"(FEATURE_ID = {feature_id} AND FEATURE_VERSION='{process_id}')"
|
|
1049
|
+
feature_str = ',B1.FEATURE_VALUE AS ' + alias
|
|
1050
|
+
|
|
1051
|
+
sub_queries.append(
|
|
1052
|
+
{
|
|
1053
|
+
'feature_name': alias,
|
|
1054
|
+
'query': f"""
|
|
1055
|
+
SEQUENCED VALIDTIME
|
|
1056
|
+
SELECT
|
|
1057
|
+
{txt_entity}
|
|
1058
|
+
{feature_str}
|
|
1059
|
+
FROM {k} B1
|
|
1060
|
+
WHERE {txt_where}
|
|
1061
|
+
"""
|
|
1062
|
+
}
|
|
1063
|
+
)
|
|
1064
|
+
|
|
1065
|
+
# Single UUID
|
|
1066
|
+
else:
|
|
1067
|
+
base_alias = _sanitize_identifier(feature_name)
|
|
1068
|
+
alias = _unique_alias(base_alias)
|
|
1069
|
+
|
|
1070
|
+
txt_where = f"(FEATURE_ID = {feature_id} AND FEATURE_VERSION='{feature_version}')"
|
|
1071
|
+
feature_str = ',B1.FEATURE_VALUE AS ' + alias
|
|
1072
|
+
sub_queries.append(
|
|
1073
|
+
{
|
|
1074
|
+
'feature_name': alias,
|
|
1075
|
+
'query': f"""
|
|
1076
|
+
SEQUENCED VALIDTIME
|
|
1077
|
+
SELECT
|
|
1078
|
+
{txt_entity}
|
|
1079
|
+
{feature_str}
|
|
1080
|
+
FROM {k} B1
|
|
1081
|
+
WHERE {txt_where}
|
|
1082
|
+
"""
|
|
1083
|
+
}
|
|
1084
|
+
)
|
|
1032
1085
|
|
|
1033
1086
|
# Handle case where no features are available
|
|
1034
1087
|
if len(sub_queries) == 0:
|
|
@@ -1102,6 +1155,7 @@ def build_dataset(entity_id, selected_features, view_name, schema_name=None, com
|
|
|
1102
1155
|
return tdml.DataFrame.from_table(tdml.in_schema(schema_name, view_name))
|
|
1103
1156
|
|
|
1104
1157
|
|
|
1158
|
+
|
|
1105
1159
|
def build_dataset_opt(entity_id, selected_features, view_name = None, schema_name=tdfs4ds.SCHEMA,
|
|
1106
1160
|
comment='dataset', no_temporal=False, time_manager=None, query_only=False, entity_null_substitute={},
|
|
1107
1161
|
other=None, time_column=None, filtermanager = None, filter_conditions = None
|
|
@@ -249,48 +249,49 @@ def get_list_features(entity_name, domain=None):
|
|
|
249
249
|
return tdml.DataFrame.from_query(query)
|
|
250
250
|
|
|
251
251
|
|
|
252
|
-
def get_feature_versions(entity_name, features, domain=None
|
|
252
|
+
def get_feature_versions(entity_name, features, domain=None):
|
|
253
253
|
"""
|
|
254
|
-
Retrieve
|
|
255
|
-
from a given data domain. This function allows fetching either all versions or
|
|
256
|
-
just the latest versions of the features.
|
|
254
|
+
Retrieve version UUID(s) for the given features of an entity within a domain.
|
|
257
255
|
|
|
258
256
|
Parameters:
|
|
259
|
-
entity_name (str
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
domain (str, optional): The data domain to filter the feature versions.
|
|
263
|
-
Defaults to None, where a predefined domain is used.
|
|
264
|
-
latest_version_only (bool, optional): Flag to fetch only the latest version
|
|
265
|
-
of each feature. Defaults to True.
|
|
266
|
-
version_lag (int, optional): The number of versions to lag behind the latest.
|
|
267
|
-
Only effective if latest_version_only is True. Defaults to 0.
|
|
257
|
+
- entity_name (str): The entity name to which the features belong.
|
|
258
|
+
- features (str | list[str]): Feature name or list of feature names.
|
|
259
|
+
- domain (str, optional): Data domain to filter on. If None, defaults to tdfs4ds.DATA_DOMAIN.
|
|
268
260
|
|
|
269
261
|
Returns:
|
|
270
|
-
dict
|
|
262
|
+
- dict[str, str | list[dict]]: Maps each requested feature name to either:
|
|
263
|
+
- a single version UUID string if exactly one row exists, or
|
|
264
|
+
- a list of dicts if multiple rows exist; each dict has:
|
|
265
|
+
{
|
|
266
|
+
"process_id": <FEATURE_VERSION UUID>,
|
|
267
|
+
"process_view_name": <PROCESS_VIEW_NAME string>
|
|
268
|
+
}
|
|
269
|
+
If a requested feature has no entries, it will be present with value None.
|
|
270
|
+
|
|
271
|
+
Notes:
|
|
272
|
+
- Uses {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} as A and
|
|
273
|
+
{tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME} as B (must exist) joined on PROCESS_ID.
|
|
274
|
+
- Respects tdfs4ds.DEBUG_MODE to print the generated SQL.
|
|
271
275
|
"""
|
|
272
276
|
|
|
273
|
-
#
|
|
277
|
+
# Normalize inputs
|
|
278
|
+
if isinstance(features, str):
|
|
279
|
+
features = [features]
|
|
280
|
+
|
|
274
281
|
if domain is None:
|
|
275
282
|
domain = tdfs4ds.DATA_DOMAIN
|
|
276
283
|
|
|
284
|
+
# Basic escaping for single quotes in values used in SQL literals
|
|
285
|
+
def _esc(s: str) -> str:
|
|
286
|
+
return s.replace("'", "''")
|
|
277
287
|
|
|
278
|
-
|
|
279
|
-
# Convert the entity_name to a string if it is a list
|
|
280
|
-
if type(entity_name) == list:
|
|
281
|
-
entity_name.sort()
|
|
282
|
-
entity_name = ','.join(entity_name)
|
|
283
|
-
|
|
284
|
-
# Preparing the feature names for inclusion in the SQL query
|
|
285
|
-
if type(features) == list:
|
|
286
|
-
features = ["'" + f + "'" for f in features]
|
|
287
|
-
else:
|
|
288
|
-
features = "'" + features + "'"
|
|
288
|
+
features_lits = ",".join(f"'{_esc(f)}'" for f in features)
|
|
289
289
|
|
|
290
290
|
query = f"""
|
|
291
291
|
SELECT
|
|
292
292
|
A.FEATURE_NAME
|
|
293
293
|
, B.PROCESS_ID AS FEATURE_VERSION
|
|
294
|
+
, B.VIEW_NAME AS PROCESS_VIEW_NAME
|
|
294
295
|
FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} A
|
|
295
296
|
INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW_FEATURE_SPLIT} B
|
|
296
297
|
ON A.DATA_DOMAIN = B.DATA_DOMAIN
|
|
@@ -298,35 +299,45 @@ def get_feature_versions(entity_name, features, domain=None, latest_version_only
|
|
|
298
299
|
AND A.FEATURE_NAME = B.FEATURE_NAME
|
|
299
300
|
WHERE A.DATA_DOMAIN = '{domain}'
|
|
300
301
|
AND A.ENTITY_NAME = '{entity_name}'
|
|
301
|
-
AND A.FEATURE_NAME IN ({
|
|
302
|
+
AND A.FEATURE_NAME IN ({features_lits})
|
|
302
303
|
"""
|
|
303
304
|
|
|
304
|
-
|
|
305
|
-
df = tdml.DataFrame.from_query(query).to_pandas()
|
|
306
|
-
|
|
307
|
-
# if df is empty
|
|
308
|
-
if df.shape[0] == 0:
|
|
309
|
-
print('the features you are requesting for this entity and data domain do not exist. Here is what you requested:')
|
|
310
|
-
print('feature store database :', tdfs4ds.SCHEMA)
|
|
311
|
-
print('feature catalog :', tdfs4ds.FEATURE_CATALOG_NAME_VIEW)
|
|
312
|
-
print('entity name :', entity_name)
|
|
313
|
-
print('data domain :', domain)
|
|
314
|
-
print('features :', ','.join(features))
|
|
315
|
-
print('')
|
|
305
|
+
if tdfs4ds.DEBUG_MODE:
|
|
316
306
|
print(query)
|
|
317
|
-
return
|
|
318
307
|
|
|
319
|
-
|
|
320
|
-
|
|
308
|
+
rows = tdml.execute_sql(query).fetchall()
|
|
309
|
+
|
|
310
|
+
# Initialize result for all requested features
|
|
311
|
+
result = {f: None for f in features}
|
|
312
|
+
|
|
313
|
+
# Collect (version, view) per feature, deduplicating while preserving order
|
|
314
|
+
tmp = {f: [] for f in features}
|
|
315
|
+
seen = {f: set() for f in features}
|
|
316
|
+
|
|
317
|
+
for feat, version, view_name in rows:
|
|
318
|
+
key = (version, view_name)
|
|
319
|
+
if key not in seen.setdefault(feat, set()):
|
|
320
|
+
seen[feat].add(key)
|
|
321
|
+
tmp.setdefault(feat, []).append(key)
|
|
322
|
+
|
|
323
|
+
# Shape:
|
|
324
|
+
# - if exactly one row: return UUID string
|
|
325
|
+
# - if multiple rows: list of {"process_id": <uuid>, "process_view_name": <str>}
|
|
326
|
+
for feat in result:
|
|
327
|
+
pairs = tmp.get(feat, [])
|
|
328
|
+
if len(pairs) == 0:
|
|
329
|
+
result[feat] = None
|
|
330
|
+
elif len(pairs) == 1:
|
|
331
|
+
result[feat] = pairs[0][0] # UUID only
|
|
332
|
+
else:
|
|
333
|
+
result[feat] = [
|
|
334
|
+
{"process_id": ver, "process_view_name": view}
|
|
335
|
+
for (ver, view) in pairs
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
return result
|
|
321
339
|
|
|
322
|
-
# results in dictionary:
|
|
323
|
-
results = {row['FEATURE_NAME']: row['FEATURE_VERSION'] for i, row in df.iterrows()}
|
|
324
|
-
if tdfs4ds.DEBUG_MODE == True:
|
|
325
|
-
print('---> RESULTS <---')
|
|
326
|
-
print(results)
|
|
327
340
|
|
|
328
|
-
# Returning the results as a dictionary with feature names as keys and their versions as values
|
|
329
|
-
return results
|
|
330
341
|
def get_entity_tables(entity_id, data_domain=None):
|
|
331
342
|
"""
|
|
332
343
|
Retrieves a list of table names associated with a given entity ID or IDs from a feature catalog within a specific data domain.
|
|
@@ -869,39 +869,6 @@ def Gettdtypes(tddf, features_columns, entity_id):
|
|
|
869
869
|
# Increment the feature ID for the next iteration.
|
|
870
870
|
feature_id += 1
|
|
871
871
|
|
|
872
|
-
# # Iterate over the data types of the columns in the DataFrame.
|
|
873
|
-
# for k, v in types.items():
|
|
874
|
-
# # If the column name does not exist in the feature catalog table and is in the list of feature column names...
|
|
875
|
-
# if k.upper() not in [n.upper() for n in existing_features] and k.upper() in [n.upper() for n in features_columns]:
|
|
876
|
-
# # If the data type of the column is integer...
|
|
877
|
-
# if 'int' in str(v.lower()):
|
|
878
|
-
# # Add an entry to the result dictionary for the column name with its data type and new feature ID.
|
|
879
|
-
# res[k] = {'type': 'BIGINT', 'id': feature_id}
|
|
880
|
-
# # If the data type of the column is float...
|
|
881
|
-
# elif 'float' in str(v.lower()):
|
|
882
|
-
# # Add an entry to the result dictionary for the column name with its data type and new feature ID.
|
|
883
|
-
# res[k] = {'type': 'FLOAT', 'id': feature_id}
|
|
884
|
-
# # If the data type of the column is varchar with unicode encoding ...
|
|
885
|
-
# elif 'unicode' in str(v.lower()):
|
|
886
|
-
# res[k] = {'type': 'VARCHAR_UNICODE', 'id': feature_id}
|
|
887
|
-
# # Print a message that the data type is not yet managed.
|
|
888
|
-
# #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
889
|
-
# # If the data type of the column is varchar with unicode encoding ...
|
|
890
|
-
# elif 'latin' in str(v.lower()):
|
|
891
|
-
# res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
|
|
892
|
-
# # Print a message that the data type is not yet managed.
|
|
893
|
-
# #if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
894
|
-
# elif 'decimal' in str(v.lower()):
|
|
895
|
-
# res[k] = {'type': 'DECIMAL', 'id': feature_id}
|
|
896
|
-
# # Print a message that the data type is not yet managed.
|
|
897
|
-
# # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
898
|
-
# else:
|
|
899
|
-
# res[k] = {'type': 'VARCHAR_LATIN', 'id': feature_id}
|
|
900
|
-
# # Print a message that the data type is not yet managed.
|
|
901
|
-
# # if tdfs4ds.DISPLAY_LOGS: print(f'{k} has a type that is not yet managed')
|
|
902
|
-
# # Increment the feature ID for the next iteration.
|
|
903
|
-
# feature_id += 1
|
|
904
|
-
|
|
905
872
|
# Return the result dictionary.
|
|
906
873
|
return res
|
|
907
874
|
|
|
@@ -979,36 +946,50 @@ def tdstone2_Gettdtypes(existing_model, entity_id, display_logs=False):
|
|
|
979
946
|
# Return the dictionary containing feature names, types, and IDs.
|
|
980
947
|
return res
|
|
981
948
|
|
|
982
|
-
def delete_feature(feature_name, data_domain=None):
|
|
949
|
+
def delete_feature(feature_name, entity_id, data_domain=None):
|
|
983
950
|
"""
|
|
984
|
-
Delete the values of a specific feature from the feature table
|
|
951
|
+
Delete the values of a specific feature for given entities from the feature table
|
|
952
|
+
within a specified data domain.
|
|
985
953
|
|
|
986
954
|
This function constructs and executes two SQL queries against a Teradata database
|
|
987
|
-
to remove a feature specified by its name. The first query
|
|
988
|
-
where the feature resides, based on the feature name
|
|
989
|
-
deletes the feature from the
|
|
955
|
+
to remove a feature specified by its name and entity identifiers. The first query
|
|
956
|
+
retrieves the table name where the feature resides, based on the feature name,
|
|
957
|
+
entity, and data domain. The second query deletes the feature values from the
|
|
958
|
+
identified table.
|
|
990
959
|
|
|
991
960
|
Parameters:
|
|
992
961
|
- feature_name (str): The name of the feature to be removed.
|
|
993
|
-
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
962
|
+
- entity_id (str or list of str): Entity identifier(s). If a string is provided,
|
|
963
|
+
it will be converted to a single-element list. The list is always sorted
|
|
964
|
+
alphabetically before use.
|
|
965
|
+
- data_domain (str, optional): The data domain where the feature is located.
|
|
966
|
+
If not specified, the function uses the default data domain defined in
|
|
967
|
+
`tdfs4ds.DATA_DOMAIN`.
|
|
968
|
+
|
|
969
|
+
Behavior:
|
|
970
|
+
- The function checks if the `DEBUG_MODE` flag in the `tdfs4ds` module is set to True.
|
|
971
|
+
If so, it prints the generated SQL queries and the resolved table name for debugging.
|
|
972
|
+
- If the feature table cannot be resolved, the function returns without executing
|
|
973
|
+
a delete query.
|
|
998
974
|
|
|
999
|
-
|
|
975
|
+
Returns:
|
|
976
|
+
- None
|
|
1000
977
|
|
|
1001
978
|
Note:
|
|
1002
979
|
- The function assumes the presence of a module `tdfs4ds` with predefined constants
|
|
1003
|
-
such as `DATA_DOMAIN`, `SCHEMA`, `
|
|
980
|
+
such as `DATA_DOMAIN`, `SCHEMA`, `FEATURE_CATALOG_NAME_VIEW`, and a flag `DEBUG_MODE`.
|
|
1004
981
|
- It also assumes a `tdml` module or object with an `execute_sql` method capable of
|
|
1005
982
|
executing SQL queries against a Teradata database and fetching the results.
|
|
1006
983
|
|
|
1007
984
|
Raises:
|
|
1008
|
-
-
|
|
1009
|
-
|
|
985
|
+
- Exceptions related to SQL execution or connection issues may be raised but are not
|
|
986
|
+
explicitly handled, except for printing the error message.
|
|
1010
987
|
"""
|
|
1011
988
|
|
|
989
|
+
if isinstance(entity_id, str):
|
|
990
|
+
entity_id = [entity_id]
|
|
991
|
+
entity_id = sorted(entity_id)
|
|
992
|
+
|
|
1012
993
|
if data_domain is None:
|
|
1013
994
|
data_domain = tdfs4ds.DATA_DOMAIN
|
|
1014
995
|
|
|
@@ -1016,17 +997,19 @@ def delete_feature(feature_name, data_domain=None):
|
|
|
1016
997
|
SEL FEATURE_DATABASE||'.'||FEATURE_TABLE AS TABLE_NAME
|
|
1017
998
|
FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
|
|
1018
999
|
WHERE FEATURE_NAME = '{feature_name}'
|
|
1019
|
-
AND DATA_DOMAIN = '{data_domain}'
|
|
1000
|
+
AND DATA_DOMAIN = '{data_domain}'
|
|
1001
|
+
AND ENTITY_NAME = '{','.join([e.upper() for e in entity_id])}'"""
|
|
1020
1002
|
if tdfs4ds.DEBUG_MODE:
|
|
1021
1003
|
print(query0)
|
|
1022
1004
|
|
|
1023
1005
|
table_name = tdml.execute_sql(query0).fetchall()
|
|
1024
|
-
if len(table_name)>0:
|
|
1006
|
+
if len(table_name) > 0:
|
|
1025
1007
|
table_name = table_name[0][0]
|
|
1026
1008
|
else:
|
|
1027
1009
|
return
|
|
1028
1010
|
if tdfs4ds.DEBUG_MODE:
|
|
1029
1011
|
print('table name : ', table_name)
|
|
1012
|
+
|
|
1030
1013
|
query = f"""
|
|
1031
1014
|
DELETE {table_name}
|
|
1032
1015
|
WHERE FEATURE_ID = (
|
|
@@ -1044,6 +1027,7 @@ def delete_feature(feature_name, data_domain=None):
|
|
|
1044
1027
|
|
|
1045
1028
|
return
|
|
1046
1029
|
|
|
1030
|
+
|
|
1047
1031
|
def remove_feature(feature_name, entity_id, data_domain=None):
|
|
1048
1032
|
"""
|
|
1049
1033
|
Attempts to remove a specific feature from the feature catalog and any associated data,
|
|
@@ -1060,7 +1044,9 @@ def remove_feature(feature_name, entity_id, data_domain=None):
|
|
|
1060
1044
|
|
|
1061
1045
|
Parameters:
|
|
1062
1046
|
- feature_name (str): The name of the feature to be removed.
|
|
1063
|
-
- entity_id (list of str):
|
|
1047
|
+
- entity_id (str or list of str): Entity identifier(s). If a string is provided,
|
|
1048
|
+
it will be converted to a single-element list. The list is always sorted
|
|
1049
|
+
alphabetically before use.
|
|
1064
1050
|
- data_domain (str, optional): The data domain where the feature is located. If not provided,
|
|
1065
1051
|
the function uses the default data domain from the `tdfs4ds.DATA_DOMAIN` setting.
|
|
1066
1052
|
|
|
@@ -1084,16 +1070,19 @@ def remove_feature(feature_name, entity_id, data_domain=None):
|
|
|
1084
1070
|
- SQL execution or connection exceptions might occur but are not explicitly handled by this function.
|
|
1085
1071
|
"""
|
|
1086
1072
|
|
|
1073
|
+
if isinstance(entity_id, str):
|
|
1074
|
+
entity_id = [entity_id]
|
|
1075
|
+
entity_id = sorted(entity_id)
|
|
1076
|
+
|
|
1087
1077
|
if data_domain is None:
|
|
1088
1078
|
data_domain = tdfs4ds.DATA_DOMAIN
|
|
1089
1079
|
|
|
1090
1080
|
try:
|
|
1091
|
-
delete_feature(feature_name, data_domain)
|
|
1081
|
+
delete_feature(feature_name, entity_id, data_domain)
|
|
1092
1082
|
except Exception as e:
|
|
1093
1083
|
print(str(e).split('\n')[0])
|
|
1094
1084
|
return
|
|
1095
1085
|
|
|
1096
|
-
entity_id.sort()
|
|
1097
1086
|
query = f"""
|
|
1098
1087
|
NONSEQUENCED VALIDTIME DELETE {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME}
|
|
1099
1088
|
WHERE FEATURE_NAME = '{feature_name}'
|
|
@@ -1102,7 +1091,6 @@ def remove_feature(feature_name, entity_id, data_domain=None):
|
|
|
1102
1091
|
"""
|
|
1103
1092
|
if tdfs4ds.DEBUG_MODE:
|
|
1104
1093
|
print(query)
|
|
1094
|
+
|
|
1105
1095
|
tdml.execute_sql(query)
|
|
1106
1096
|
return
|
|
1107
|
-
|
|
1108
|
-
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=sHzEWvxrBA_DBbOBJOsFuIxz0qX9MAY3zdS20gnCz_Q,66290
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -18,8 +18,8 @@ tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22
|
|
|
18
18
|
tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
|
|
19
19
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
20
20
|
tdfs4ds/feature_store/feature_data_processing.py,sha256=rvpnFrV6Tmg8C6xcSQLT_lrFYqZsdSzFXmS-4suK9qg,42847
|
|
21
|
-
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=
|
|
22
|
-
tdfs4ds/feature_store/feature_store_management.py,sha256=
|
|
21
|
+
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=0ZLJWtV13tjaUdYCiQvPvYWxKs0f_3LZ2HgfQzHyaW4,33705
|
|
22
|
+
tdfs4ds/feature_store/feature_store_management.py,sha256=ufIBTdrnHBvGdXggavJoTVoZjOHFtH5ZiYqJr5eIBhg,54713
|
|
23
23
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
24
24
|
tdfs4ds/process_store/process_followup.py,sha256=PvLcU7meg3ljBlPfuez3qwTVqpHHhVJxYxGqjgiHE8E,7265
|
|
25
25
|
tdfs4ds/process_store/process_query_administration.py,sha256=DsIt97cBoJ7NcpQzbQt55eUFNgXGdOMm5Hh2aX5v0PY,7762
|
|
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
|
|
|
32
32
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
33
33
|
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
34
34
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
36
|
-
tdfs4ds-0.2.4.
|
|
37
|
-
tdfs4ds-0.2.4.
|
|
38
|
-
tdfs4ds-0.2.4.
|
|
35
|
+
tdfs4ds-0.2.4.27.dist-info/METADATA,sha256=0zXOf1EjCvIPgXK3EyOtMDlF4ZB5nArvMsKcqFqknTg,14326
|
|
36
|
+
tdfs4ds-0.2.4.27.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
37
|
+
tdfs4ds-0.2.4.27.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
38
|
+
tdfs4ds-0.2.4.27.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|