teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/README.md +210 -0
- teradataml/__init__.py +1 -1
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +162 -76
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/__init__.py +2 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
- teradataml/analytics/json_parser/metadata.py +22 -4
- teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
- teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
- teradataml/analytics/sqle/__init__.py +3 -0
- teradataml/analytics/utils.py +4 -1
- teradataml/automl/__init__.py +2369 -464
- teradataml/automl/autodataprep/__init__.py +15 -0
- teradataml/automl/custom_json_utils.py +184 -112
- teradataml/automl/data_preparation.py +113 -58
- teradataml/automl/data_transformation.py +154 -53
- teradataml/automl/feature_engineering.py +113 -53
- teradataml/automl/feature_exploration.py +548 -25
- teradataml/automl/model_evaluation.py +260 -32
- teradataml/automl/model_training.py +399 -206
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/aed_utils.py +11 -2
- teradataml/common/bulk_exposed_utils.py +4 -2
- teradataml/common/constants.py +62 -2
- teradataml/common/garbagecollector.py +50 -21
- teradataml/common/messagecodes.py +47 -2
- teradataml/common/messages.py +19 -1
- teradataml/common/sqlbundle.py +23 -6
- teradataml/common/utils.py +116 -10
- teradataml/context/aed_context.py +16 -10
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/byom_example.json +5 -0
- teradataml/data/creditcard_data.csv +284618 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/load_example_data.py +29 -11
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/teradataml_example.json +67 -0
- teradataml/dataframe/copy_to.py +714 -54
- teradataml/dataframe/dataframe.py +1153 -33
- teradataml/dataframe/dataframe_utils.py +8 -3
- teradataml/dataframe/functions.py +168 -1
- teradataml/dataframe/setop.py +4 -1
- teradataml/dataframe/sql.py +141 -9
- teradataml/dbutils/dbutils.py +470 -35
- teradataml/dbutils/filemgr.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +456 -142
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/scriptmgmt/UserEnv.py +234 -34
- teradataml/scriptmgmt/lls_utils.py +43 -17
- teradataml/sdk/_json_parser.py +1 -1
- teradataml/sdk/api_client.py +9 -6
- teradataml/sdk/modelops/_client.py +3 -0
- teradataml/series/series.py +12 -7
- teradataml/store/feature_store/constants.py +601 -234
- teradataml/store/feature_store/feature_store.py +2886 -616
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +5831 -214
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/table_operator_util.py +1 -1
- teradataml/table_operators/templates/dataframe_register.template +6 -2
- teradataml/table_operators/templates/dataframe_udf.template +6 -2
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +93 -0
- teradataml/utils/internal_buffer.py +2 -2
- teradataml/utils/utils.py +41 -2
- teradataml/utils/validators.py +694 -17
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
|
@@ -7,28 +7,42 @@ Secondary Owner: adithya.avvaru@teradata.com
|
|
|
7
7
|
|
|
8
8
|
This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
|
|
9
9
|
"""
|
|
10
|
-
|
|
10
|
+
import os.path
|
|
11
|
+
import operator
|
|
12
|
+
import random
|
|
13
|
+
from functools import reduce
|
|
11
14
|
from sqlalchemy import literal_column
|
|
12
|
-
from teradataml.context.context import get_connection
|
|
13
|
-
from teradataml.common.constants import SQLConstants
|
|
15
|
+
from teradataml.context.context import get_connection, _get_current_databasename
|
|
16
|
+
from teradataml.common.constants import SQLConstants, AccessQueries
|
|
14
17
|
from teradataml.common.exceptions import TeradataMlException
|
|
15
18
|
from teradataml.common.messages import Messages
|
|
16
19
|
from teradataml.common.messagecodes import MessageCodes
|
|
17
20
|
from teradataml.dataframe.sql import _SQLColumnExpression as Col
|
|
18
|
-
from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke,
|
|
21
|
+
from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, \
|
|
22
|
+
_update_data, _delete_data, db_transaction, db_list_tables, _insert_data, \
|
|
23
|
+
_is_trigger_exist, db_drop_view, _get_quoted_object_name
|
|
19
24
|
from teradataml.store.feature_store.constants import *
|
|
25
|
+
from teradataml.store.feature_store.mind_map import _TD_FS_MindMap_Template
|
|
20
26
|
from teradataml.store.feature_store.models import *
|
|
27
|
+
from teradataml.store.feature_store.constants import _FeatureStoreDFContainer
|
|
21
28
|
from teradataml.common.sqlbundle import SQLBundle
|
|
22
29
|
from teradataml.utils.validators import _Validators
|
|
30
|
+
from teradataml.store.feature_store.utils import _FSUtils
|
|
23
31
|
|
|
24
32
|
|
|
25
33
|
class FeatureStore:
|
|
26
34
|
"""Class for FeatureStore."""
|
|
27
35
|
|
|
28
|
-
def __init__(self,
|
|
36
|
+
def __init__(self,
|
|
37
|
+
repo,
|
|
38
|
+
data_domain=None,
|
|
39
|
+
check=True):
|
|
29
40
|
"""
|
|
30
41
|
DESCRIPTION:
|
|
31
42
|
Method to create FeatureStore in teradataml.
|
|
43
|
+
Note:
|
|
44
|
+
* One should establish a connection to Vantage using create_context()
|
|
45
|
+
before creating a FeatureStore object.
|
|
32
46
|
|
|
33
47
|
PARAMETERS:
|
|
34
48
|
repo:
|
|
@@ -36,6 +50,21 @@ class FeatureStore:
|
|
|
36
50
|
Specifies the repository name.
|
|
37
51
|
Types: str.
|
|
38
52
|
|
|
53
|
+
data_domain:
|
|
54
|
+
Optional Argument.
|
|
55
|
+
Specifies the data domain to which FeatureStore points to.
|
|
56
|
+
Note:
|
|
57
|
+
* If not specified, then default database name is considered as data domain.
|
|
58
|
+
Types: str
|
|
59
|
+
|
|
60
|
+
check:
|
|
61
|
+
Optional Argument.
|
|
62
|
+
Specifies whether to check the existence of the Feature store DB objects or not.
|
|
63
|
+
When set to True, the method checks for the existence of Feature store DB objects.
|
|
64
|
+
Otherwise, the method does not verify the existence of Feature store DB objects.
|
|
65
|
+
Default Value: True
|
|
66
|
+
Types: bool
|
|
67
|
+
|
|
39
68
|
RETURNS:
|
|
40
69
|
Object of FeatureStore.
|
|
41
70
|
|
|
@@ -43,18 +72,28 @@ class FeatureStore:
|
|
|
43
72
|
None
|
|
44
73
|
|
|
45
74
|
EXAMPLES:
|
|
46
|
-
|
|
75
|
+
# Example 1: Create an instance of FeatureStore for repository 'vfs_v1'.
|
|
47
76
|
>>> from teradataml import FeatureStore
|
|
48
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
77
|
+
>>> fs = FeatureStore(repo='vfs_v1')
|
|
78
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
79
|
+
|
|
80
|
+
>>> fs.setup()
|
|
81
|
+
True
|
|
82
|
+
|
|
49
83
|
>>> fs
|
|
50
|
-
|
|
51
|
-
>>>
|
|
84
|
+
VantageFeatureStore(abc)-v2.0.0
|
|
52
85
|
"""
|
|
53
86
|
argument_validation_params = []
|
|
54
87
|
argument_validation_params.append(["repo", repo, False, (str), True])
|
|
55
88
|
|
|
56
89
|
# Validate argument types
|
|
57
90
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
91
|
+
|
|
92
|
+
connection = get_connection()
|
|
93
|
+
if connection is None:
|
|
94
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
|
|
95
|
+
MessageCodes.CONNECTION_FAILURE)
|
|
96
|
+
|
|
58
97
|
# Do not validate the existance of repo as it consumes a network call.
|
|
59
98
|
self.__repo = repo
|
|
60
99
|
self.__version = ""
|
|
@@ -66,24 +105,181 @@ class FeatureStore:
|
|
|
66
105
|
self.__df_container = {}
|
|
67
106
|
|
|
68
107
|
# Store the table names here. Then use this where ever required.
|
|
69
|
-
self.__table_names =
|
|
70
|
-
|
|
71
|
-
# Declare getter's for getting the corresponding DataFrame's.
|
|
72
|
-
|
|
73
|
-
self.
|
|
74
|
-
self.
|
|
75
|
-
self.
|
|
76
|
-
self.__get_feature_group_df = lambda :
|
|
77
|
-
self.__get_archived_feature_group_df = lambda :
|
|
78
|
-
self.__get_entity_df = lambda :
|
|
79
|
-
self.__get_archived_entity_df = lambda :
|
|
80
|
-
self.__get_data_source_df = lambda :
|
|
81
|
-
self.__get_archived_data_source_df = lambda :
|
|
108
|
+
self.__table_names = EFS_DB_COMPONENTS
|
|
109
|
+
|
|
110
|
+
# Declare getter's for getting the corresponding DataFrame's using _FeatureStoreDFContainer directly.
|
|
111
|
+
# Only keep the lambda functions that are actually used in the codebase
|
|
112
|
+
self.__get_features_df = lambda : _FeatureStoreDFContainer.get_df("feature", self.__repo, self.__data_domain)
|
|
113
|
+
self.__get_features_wog_df = lambda : _FeatureStoreDFContainer.get_df("feature_wog", self.__repo, self.__data_domain)
|
|
114
|
+
self.__get_archived_features_df = lambda : _FeatureStoreDFContainer.get_df("feature_staging", self.__repo, self.__data_domain)
|
|
115
|
+
self.__get_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group", self.__repo, self.__data_domain)
|
|
116
|
+
self.__get_archived_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
|
|
117
|
+
self.__get_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity", self.__repo, self.__data_domain)
|
|
118
|
+
self.__get_archived_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity_staging", self.__repo, self.__data_domain)
|
|
119
|
+
self.__get_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source", self.__repo, self.__data_domain)
|
|
120
|
+
self.__get_archived_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source_staging", self.__repo, self.__data_domain)
|
|
121
|
+
self.__get_dataset_catalog_df = lambda : _FeatureStoreDFContainer.get_df("dataset_catalog", self.__repo, self.__data_domain)
|
|
122
|
+
self.__get_data_domain_df = lambda : _FeatureStoreDFContainer.get_df("data_domain", self.__repo, self.__data_domain)
|
|
123
|
+
self.__get_feature_process_df = lambda : _FeatureStoreDFContainer.get_df("feature_process", self.__repo, self.__data_domain)
|
|
124
|
+
self.__get_features_metadata_df = lambda : _FeatureStoreDFContainer.get_df("feature_metadata", self.__repo, self.__data_domain)
|
|
125
|
+
self.__get_feature_info_df = lambda: _FeatureStoreDFContainer.get_df("feature_info", self.__repo, self.__data_domain)
|
|
126
|
+
self.__get_dataset_features_df = lambda: _FeatureStoreDFContainer.get_df("dataset_features", self.__repo, self.__data_domain)
|
|
127
|
+
self.__get_feature_runs_df = lambda : _FeatureStoreDFContainer.get_df("feature_runs", self.__repo, self.__data_domain)
|
|
128
|
+
self.__get_without_valid_period_df = lambda df: df.drop(columns=['ValidPeriod'])
|
|
129
|
+
self.__get_feature_version = lambda: _FeatureStoreDFContainer.get_df("feature_version", self.__repo, self.__data_domain)
|
|
82
130
|
|
|
83
131
|
self.__good_status = "Good"
|
|
84
132
|
self.__bad_status = "Bad"
|
|
85
133
|
self.__repaired_status = "Repaired"
|
|
86
134
|
|
|
135
|
+
self.__data_domain = data_domain if data_domain is not None else _get_current_databasename()
|
|
136
|
+
|
|
137
|
+
self.__repo_exists = connection.dialect._get_database_names(connection, self.__repo)
|
|
138
|
+
|
|
139
|
+
if check:
|
|
140
|
+
return self.__validate_repo_exists()
|
|
141
|
+
else:
|
|
142
|
+
# If check is False, then do not check for the existence of DB objects.
|
|
143
|
+
self.__add_data_domain()
|
|
144
|
+
|
|
145
|
+
def __validate_repo_exists(self):
|
|
146
|
+
"""
|
|
147
|
+
Validate the repository.
|
|
148
|
+
|
|
149
|
+
PARAMETERS:
|
|
150
|
+
None
|
|
151
|
+
|
|
152
|
+
RETURNS:
|
|
153
|
+
None
|
|
154
|
+
|
|
155
|
+
RAISES:
|
|
156
|
+
ValueError: If the repo is invalid.
|
|
157
|
+
"""
|
|
158
|
+
# Check whether the repo exists or not.
|
|
159
|
+
if not self.__repo_exists:
|
|
160
|
+
print("Repo {} does not exist. Run FeatureStore.setup() " \
|
|
161
|
+
"to create the repo and setup FeatureStore.".format(self.__repo))
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
# Check whether all the EFS tables exist or not.
|
|
165
|
+
existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
|
|
166
|
+
if not existing_tabs.empty:
|
|
167
|
+
existing_tables = set(existing_tabs['TableName'].tolist())
|
|
168
|
+
all_tables_exist = all(val in existing_tables for val in EFS_TABLES.values())
|
|
169
|
+
else:
|
|
170
|
+
all_tables_exist = False
|
|
171
|
+
# Check whether all the EFS triggers exist or not.
|
|
172
|
+
all_triggers_exist, num_trigger_exist = _is_trigger_exist(self.__repo, list(EFS_TRIGGERS.values()))
|
|
173
|
+
|
|
174
|
+
# Check whether all the EFS tables and triggers exist or not.
|
|
175
|
+
# If exists, then insert the data domain name into _efs_data_domain table.
|
|
176
|
+
if all_tables_exist and all_triggers_exist:
|
|
177
|
+
self.__add_data_domain()
|
|
178
|
+
# If all the tables and triggers are available, then
|
|
179
|
+
# FeatureStore is ready to use.
|
|
180
|
+
print("FeatureStore is ready to use.")
|
|
181
|
+
# All table and triggers does not exist.
|
|
182
|
+
# If the count of tables and triggers is 0, then
|
|
183
|
+
# FeatureStore is not setup.
|
|
184
|
+
elif num_trigger_exist == 0 and len(existing_tabs) == 0:
|
|
185
|
+
print("FeatureStore is not setup(). Run FeatureStore.setup() to setup FeatureStore.")
|
|
186
|
+
else:
|
|
187
|
+
print("Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.")
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def data_domain(self):
|
|
191
|
+
"""
|
|
192
|
+
DESCRIPTION:
|
|
193
|
+
Get the data domain.
|
|
194
|
+
|
|
195
|
+
PARAMETERS:
|
|
196
|
+
None
|
|
197
|
+
|
|
198
|
+
RETURNS:
|
|
199
|
+
str
|
|
200
|
+
|
|
201
|
+
RAISES:
|
|
202
|
+
None
|
|
203
|
+
|
|
204
|
+
EXAMPLES:
|
|
205
|
+
# Example 1: Use existing FeatureStore 'vfs_v1' to get the data domain.
|
|
206
|
+
>>> from teradataml import FeatureStore
|
|
207
|
+
>>> fs = FeatureStore(repo='vfs_v1', data_domain='test_domain')
|
|
208
|
+
FeatureStore is ready to use.
|
|
209
|
+
>>> fs.data_domain
|
|
210
|
+
'test_domain'
|
|
211
|
+
"""
|
|
212
|
+
return self.__data_domain
|
|
213
|
+
|
|
214
|
+
@data_domain.setter
|
|
215
|
+
def data_domain(self, value):
|
|
216
|
+
"""
|
|
217
|
+
DESCRIPTION:
|
|
218
|
+
Set the data domain.
|
|
219
|
+
|
|
220
|
+
PARAMETERS:
|
|
221
|
+
value:
|
|
222
|
+
Required Argument.
|
|
223
|
+
Specifies the data domain name.
|
|
224
|
+
Types: str.
|
|
225
|
+
|
|
226
|
+
RETURNS:
|
|
227
|
+
None.
|
|
228
|
+
|
|
229
|
+
RAISES:
|
|
230
|
+
None
|
|
231
|
+
|
|
232
|
+
EXAMPLES:
|
|
233
|
+
# Example 1: Create or use existing FeatureStore for repository 'abc' and
|
|
234
|
+
# then change the data domain to 'xyz'.
|
|
235
|
+
>>> from teradataml import FeatureStore
|
|
236
|
+
>>> fs = FeatureStore('abc')
|
|
237
|
+
FeatureStore is ready to use.
|
|
238
|
+
|
|
239
|
+
# Set the data domain to 'xyz'.
|
|
240
|
+
>>> fs.data_domain = 'xyz'
|
|
241
|
+
|
|
242
|
+
# Get the data domain.
|
|
243
|
+
>>> fs.data_domain
|
|
244
|
+
'xyz'
|
|
245
|
+
"""
|
|
246
|
+
argument_validation_params = []
|
|
247
|
+
argument_validation_params.append(["value", value, False, (str), True])
|
|
248
|
+
|
|
249
|
+
# Validate argument types
|
|
250
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
251
|
+
|
|
252
|
+
# Set the data domain value.
|
|
253
|
+
self.__data_domain = value
|
|
254
|
+
self.__add_data_domain()
|
|
255
|
+
|
|
256
|
+
def __add_data_domain(self):
|
|
257
|
+
"""
|
|
258
|
+
DESCRIPTION:
|
|
259
|
+
Internal method to add the data domain.
|
|
260
|
+
|
|
261
|
+
PARAMETERS:
|
|
262
|
+
data_domain:
|
|
263
|
+
Required Argument.
|
|
264
|
+
Specifies the data domain name.
|
|
265
|
+
Types: str.
|
|
266
|
+
|
|
267
|
+
RETURNS:
|
|
268
|
+
None.
|
|
269
|
+
|
|
270
|
+
RAISES:
|
|
271
|
+
None
|
|
272
|
+
|
|
273
|
+
EXAMPLES:
|
|
274
|
+
>>> self.__add_data_domain()
|
|
275
|
+
"""
|
|
276
|
+
# Add the data domain to the EFS_DATA_DOMAINS table.
|
|
277
|
+
_insert_data(table_name=self.__table_names['data_domain'],
|
|
278
|
+
schema_name=self.__repo,
|
|
279
|
+
values=(self.__data_domain, dt.utcnow()),
|
|
280
|
+
columns=["name", "created_time"],
|
|
281
|
+
ignore_errors=[2801])
|
|
282
|
+
|
|
87
283
|
@property
|
|
88
284
|
def repo(self):
|
|
89
285
|
"""
|
|
@@ -100,11 +296,14 @@ class FeatureStore:
|
|
|
100
296
|
None
|
|
101
297
|
|
|
102
298
|
EXAMPLES:
|
|
299
|
+
# Example 1: Get the repository name from FeatureStore.
|
|
103
300
|
>>> from teradataml import FeatureStore
|
|
104
301
|
>>> fs = FeatureStore('vfs_v1')
|
|
302
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
303
|
+
|
|
304
|
+
# Get the repository name.
|
|
105
305
|
>>> fs.repo
|
|
106
|
-
vfs_v1
|
|
107
|
-
>>>
|
|
306
|
+
'vfs_v1'
|
|
108
307
|
"""
|
|
109
308
|
return self.__repo
|
|
110
309
|
|
|
@@ -131,8 +330,16 @@ class FeatureStore:
|
|
|
131
330
|
# then change the repository to 'xyz'.
|
|
132
331
|
>>> from teradataml import FeatureStore
|
|
133
332
|
>>> fs = FeatureStore('abc')
|
|
333
|
+
FeatureStore is ready to use.
|
|
334
|
+
|
|
335
|
+
# Get the repository name.
|
|
336
|
+
>>> fs.repo
|
|
337
|
+
'abc'
|
|
338
|
+
|
|
339
|
+
# Set the repository to 'xyz'.
|
|
134
340
|
>>> fs.repo = 'xyz'
|
|
135
|
-
>>>
|
|
341
|
+
>>> fs.repo
|
|
342
|
+
'xyz'
|
|
136
343
|
"""
|
|
137
344
|
argument_validation_params = []
|
|
138
345
|
argument_validation_params.append(["value", value, False, (str), True])
|
|
@@ -141,7 +348,12 @@ class FeatureStore:
|
|
|
141
348
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
142
349
|
# remove all entries from container so they will be automatically
|
|
143
350
|
# point to new repo for subsequent API's.
|
|
351
|
+
self.__repo_exists = get_connection().dialect._get_database_names(get_connection(),
|
|
352
|
+
value)
|
|
353
|
+
self.__validate_repo_exists()
|
|
354
|
+
|
|
144
355
|
self.__df_container.clear()
|
|
356
|
+
|
|
145
357
|
self.__version = None
|
|
146
358
|
|
|
147
359
|
# Set the repo value.
|
|
@@ -160,6 +372,19 @@ class FeatureStore:
|
|
|
160
372
|
|
|
161
373
|
RAISES:
|
|
162
374
|
None
|
|
375
|
+
|
|
376
|
+
EXAMPLES:
|
|
377
|
+
>>> from teradataml import FeatureStore
|
|
378
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
379
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
380
|
+
|
|
381
|
+
# Setup FeatureStore for this repository.
|
|
382
|
+
>>> fs.setup()
|
|
383
|
+
|
|
384
|
+
# Get the string representation of FeatureStore.
|
|
385
|
+
>>> fs
|
|
386
|
+
'VantageFeatureStore(vfs_v1)-v2.0.0'
|
|
387
|
+
|
|
163
388
|
"""
|
|
164
389
|
s = "VantageFeatureStore({})".format(self.__repo)
|
|
165
390
|
try:
|
|
@@ -183,7 +408,7 @@ class FeatureStore:
|
|
|
183
408
|
None
|
|
184
409
|
"""
|
|
185
410
|
if not self.__version:
|
|
186
|
-
sql = "SELECT version FROM {}.{}".format(self.__repo,
|
|
411
|
+
sql = "SELECT version FROM {}.{}".format(self.__repo, self.__table_names['version'])
|
|
187
412
|
self.__version = next(execute_sql(sql))[0]
|
|
188
413
|
return self.__version
|
|
189
414
|
|
|
@@ -203,14 +428,23 @@ class FeatureStore:
|
|
|
203
428
|
None
|
|
204
429
|
|
|
205
430
|
EXAMPLES:
|
|
206
|
-
|
|
431
|
+
>>> from teradataml import FeatureStore
|
|
432
|
+
# Example 1: List all the FeatureStore repositories using FeatureStore class.
|
|
207
433
|
>>> FeatureStore.list_repos()
|
|
208
434
|
repos
|
|
209
435
|
0 vfs_v1
|
|
210
|
-
|
|
436
|
+
|
|
437
|
+
# Example 2: List all the FeatureStore repositories using FeatureStore object.
|
|
438
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
439
|
+
FeatureStore is ready to use.
|
|
440
|
+
|
|
441
|
+
>>> fs.list_repos()
|
|
442
|
+
repos
|
|
443
|
+
0 vfs_v1
|
|
444
|
+
|
|
211
445
|
"""
|
|
212
446
|
return DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
|
|
213
|
-
|
|
447
|
+
EFS_DB_COMPONENTS['version']))
|
|
214
448
|
|
|
215
449
|
def setup(self, perm_size='10e9', spool_size='10e8'):
|
|
216
450
|
"""
|
|
@@ -250,77 +484,74 @@ class FeatureStore:
|
|
|
250
484
|
TeradatamlException
|
|
251
485
|
|
|
252
486
|
EXAMPLES:
|
|
253
|
-
# Setup FeatureStore for
|
|
487
|
+
# Example 1: Setup FeatureStore for repository 'vfs_v1'.
|
|
254
488
|
>>> from teradataml import FeatureStore
|
|
489
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
255
490
|
>>> fs = FeatureStore("vfs_v1")
|
|
491
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
492
|
+
|
|
493
|
+
# Setup FeatureStore for this repository.
|
|
256
494
|
>>> fs.setup()
|
|
257
495
|
True
|
|
258
|
-
>>>
|
|
259
|
-
"""
|
|
260
496
|
|
|
261
|
-
|
|
262
|
-
|
|
497
|
+
>>> fs
|
|
498
|
+
VantageFeatureStore(vfs_v1)-v2.0.0
|
|
499
|
+
|
|
500
|
+
# Example 2: Setup FeatureStore for repository 'vfs_v2' with custom perm_size and spool_size.
|
|
501
|
+
# Create FeatureStore for repo 'vfs_v2'.
|
|
502
|
+
>>> fs = FeatureStore("vfs_v2")
|
|
503
|
+
Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
504
|
+
|
|
505
|
+
# Setup FeatureStore for this repository.
|
|
506
|
+
>>> fs.setup(perm_size='50e6', spool_size='50e6')
|
|
507
|
+
True
|
|
508
|
+
|
|
509
|
+
>>> fs
|
|
510
|
+
VantageFeatureStore(vfs_v2)-v2.0.0
|
|
263
511
|
|
|
512
|
+
"""
|
|
264
513
|
# If repo does not exist, then create it.
|
|
265
|
-
if not
|
|
514
|
+
if not self.__repo_exists:
|
|
266
515
|
_create_database(self.__repo, perm_size, spool_size)
|
|
267
516
|
|
|
268
517
|
# Check whether version table exists or not. If exist, assume all
|
|
269
518
|
# tables are available.
|
|
270
519
|
all_tables_exist = get_connection().dialect.has_table(
|
|
271
|
-
get_connection(),
|
|
520
|
+
get_connection(), self.__table_names['version'], schema=self.__repo)
|
|
272
521
|
|
|
273
522
|
if not all_tables_exist:
|
|
274
|
-
# Create the tables.
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
triggers_specs = [
|
|
293
|
-
EFS_FEATURES_TRG,
|
|
294
|
-
EFS_GROUP_FEATURES_TRG,
|
|
295
|
-
EFS_FEATURE_GROUP_TRG,
|
|
296
|
-
EFS_DATA_SOURCE_TRG,
|
|
297
|
-
EFS_ENTITY_TRG,
|
|
298
|
-
EFS_ENTITY_XREF_TRG
|
|
299
|
-
]
|
|
300
|
-
|
|
301
|
-
for table_spec in table_specs + staging_table_specs:
|
|
302
|
-
params_ = {"table_name": table_spec["table_name"],
|
|
303
|
-
"columns": table_spec["columns"],
|
|
304
|
-
"primary_index": table_spec.get("primary_index"),
|
|
305
|
-
"unique": True if table_spec.get("primary_index") else False,
|
|
306
|
-
"schema_name": self.__repo,
|
|
307
|
-
"set_table": False
|
|
308
|
-
}
|
|
309
|
-
if "foreign_keys" in table_spec:
|
|
310
|
-
params_["foreign_key_constraint"] = table_spec.get("foreign_keys")
|
|
311
|
-
|
|
312
|
-
_create_table(**params_)
|
|
313
|
-
|
|
314
|
-
for trigger_spec in triggers_specs:
|
|
315
|
-
execute_sql(trigger_spec.format(schema_name=self.__repo))
|
|
523
|
+
# Create the object tables.
|
|
524
|
+
for table_spec, table_name in EFS_TABLES.items():
|
|
525
|
+
execute_sql(table_spec.format(self.__repo, table_name))
|
|
526
|
+
# Create the Triggers.
|
|
527
|
+
for trigger_spec, trg_name in EFS_TRIGGERS.items():
|
|
528
|
+
alter_name = trg_name.split('_trg')[0]
|
|
529
|
+
insert_name = self.__repo+'.'+alter_name+'_staging'
|
|
530
|
+
execute_sql(trigger_spec.format(self.__repo, trg_name,
|
|
531
|
+
alter_name, insert_name))
|
|
532
|
+
|
|
533
|
+
# Create feature versions view.
|
|
534
|
+
sql = EFS_FEATURE_VERSION.format(self.__repo,
|
|
535
|
+
EFS_DB_COMPONENTS['feature_version'],
|
|
536
|
+
self.__repo,
|
|
537
|
+
self.__table_names['feature_process']
|
|
538
|
+
)
|
|
539
|
+
execute_sql(sql)
|
|
316
540
|
|
|
317
541
|
# After the setup is done, populate the version.
|
|
318
|
-
insert_model = "insert into {}.{} values (?, ?);".format(self.__repo,
|
|
319
|
-
execute_sql(insert_model, (
|
|
542
|
+
insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, self.__table_names['version'])
|
|
543
|
+
execute_sql(insert_model, (EFS_VERSION_, datetime.datetime.now()))
|
|
320
544
|
|
|
321
|
-
|
|
545
|
+
# Create the data domain in _efs_data_domain table.
|
|
546
|
+
self.__add_data_domain()
|
|
547
|
+
|
|
548
|
+
if self.__repo_exists and all_tables_exist:
|
|
322
549
|
print("EFS is already setup for the repo {}.".format(self.__repo))
|
|
323
550
|
|
|
551
|
+
# Set the repo_exists to True
|
|
552
|
+
self.__repo_exists = True
|
|
553
|
+
return True
|
|
554
|
+
|
|
324
555
|
@property
|
|
325
556
|
def grant(self):
|
|
326
557
|
"""
|
|
@@ -340,8 +571,10 @@ class FeatureStore:
|
|
|
340
571
|
|
|
341
572
|
EXAMPLES:
|
|
342
573
|
>>> from teradataml import FeatureStore
|
|
343
|
-
# Create FeatureStore for repo '
|
|
344
|
-
>>> fs = FeatureStore("
|
|
574
|
+
# Create FeatureStore for repo 'vfs_v2'.
|
|
575
|
+
>>> fs = FeatureStore("vfs_v2")
|
|
576
|
+
Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
577
|
+
|
|
345
578
|
# Setup FeatureStore for this repository.
|
|
346
579
|
>>> fs.setup()
|
|
347
580
|
True
|
|
@@ -359,9 +592,8 @@ class FeatureStore:
|
|
|
359
592
|
True
|
|
360
593
|
|
|
361
594
|
"""
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
return Grant(list(table_names.values()))
|
|
595
|
+
return Grant(objects=AccessQueries,
|
|
596
|
+
database=self.__repo)
|
|
365
597
|
|
|
366
598
|
@property
|
|
367
599
|
def revoke(self):
|
|
@@ -384,6 +616,8 @@ class FeatureStore:
|
|
|
384
616
|
>>> from teradataml import FeatureStore
|
|
385
617
|
# Create FeatureStore for repo 'vfs_v1'.
|
|
386
618
|
>>> fs = FeatureStore("vfs_v1")
|
|
619
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
620
|
+
|
|
387
621
|
# Setup FeatureStore for this repository.
|
|
388
622
|
>>> fs.setup()
|
|
389
623
|
True
|
|
@@ -400,9 +634,8 @@ class FeatureStore:
|
|
|
400
634
|
>>> fs.revoke.read_write('BoB')
|
|
401
635
|
True
|
|
402
636
|
"""
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
return Revoke(list(table_names.values()))
|
|
637
|
+
return Revoke(objects=AccessQueries,
|
|
638
|
+
database=self.__repo)
|
|
406
639
|
|
|
407
640
|
def repair(self):
|
|
408
641
|
"""
|
|
@@ -422,134 +655,110 @@ class FeatureStore:
|
|
|
422
655
|
bool
|
|
423
656
|
|
|
424
657
|
RAISES:
|
|
425
|
-
|
|
658
|
+
None
|
|
426
659
|
|
|
427
660
|
EXAMPLES:
|
|
428
|
-
# Repair FeatureStore repo 'vfs_v1'.
|
|
661
|
+
# Example 1: Repair FeatureStore repo 'vfs_v1'.
|
|
662
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
429
663
|
>>> from teradataml import FeatureStore
|
|
430
664
|
>>> fs = FeatureStore("vfs_v1")
|
|
431
|
-
|
|
432
|
-
True
|
|
433
|
-
>>>
|
|
434
|
-
"""
|
|
435
|
-
|
|
436
|
-
# Repair Features, Entities and DataSources first. Then FeatureGroup and then Group Features.
|
|
437
|
-
group_features_ = [EFS_GROUP_FEATURES_STAGING_SPEC, EFS_GROUP_FEATURES_SPEC, EFS_GROUP_FEATURES_TRG, "GroupFeatures"]
|
|
438
|
-
feature_group_ = [EFS_FEATURE_GROUP_STAGING_SPEC, EFS_FEATURE_GROUP_SPEC, EFS_FEATURE_GROUP_TRG, "FeatureGroup"]
|
|
439
|
-
featuers_ = [EFS_FEATURES_STAGING_SPEC, EFS_FEATURES_SPEC, EFS_FEATURES_TRG, "Feature"]
|
|
440
|
-
entities_ = [EFS_ENTITY_STAGING_SPEC, EFS_ENTITY_SPEC, EFS_ENTITY_TRG, "Entity"]
|
|
441
|
-
entities_xref_ = [EFS_ENTITY_XREF_STAGING_SPEC, EFS_ENTITY_XREF_SPEC, EFS_ENTITY_XREF_TRG, "EntityXref"]
|
|
442
|
-
data_sources_ = [EFS_DATA_SOURCE_STAGING_SPEC, EFS_DATA_SOURCE_SPEC, EFS_DATA_SOURCE_TRG, "DataSource"]
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
for staging_table_, table_, trigger, obj_name in (group_features_, feature_group_, featuers_, entities_, entities_xref_, data_sources_):
|
|
446
|
-
status = []
|
|
447
|
-
print("Repairing objects related to {}.".format(obj_name))
|
|
448
|
-
|
|
449
|
-
status.append(self.__try_create_table(staging_table_))
|
|
450
|
-
status.append(self.__try_create_table(table_))
|
|
451
|
-
status.append(self.__try_create_trigger(trigger, "{}_trg".format(table_["table_name"])))
|
|
452
|
-
|
|
453
|
-
# Let user know about status.
|
|
454
|
-
# If any of the status is Bad, then repair is failed.
|
|
455
|
-
# Else, If any of the status is Repaired, then sucessfully repaired.
|
|
456
|
-
# Else no need to repair the object.
|
|
457
|
-
if self.__bad_status in status:
|
|
458
|
-
print("Unable to repair objects related to {}.".format(obj_name))
|
|
459
|
-
else:
|
|
460
|
-
if self.__repaired_status in status:
|
|
461
|
-
print("Successfully repaired objects related to {}.".format(obj_name))
|
|
462
|
-
else:
|
|
463
|
-
print("{} objects are good and do not need any repair.".format(obj_name))
|
|
464
|
-
|
|
465
|
-
# Repair the version table.
|
|
466
|
-
status = self.__try_create_table(EFS_VERSION_SPEC)
|
|
467
|
-
if status == self.__repaired_status:
|
|
468
|
-
# After the setup is done, populate the version.
|
|
469
|
-
insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
|
|
470
|
-
execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
|
|
471
|
-
|
|
472
|
-
return True
|
|
473
|
-
|
|
474
|
-
def __try_create_table(self, table_spec):
|
|
475
|
-
"""
|
|
476
|
-
DESCRIPTION:
|
|
477
|
-
Internal function to create a table from table spec.
|
|
478
|
-
|
|
479
|
-
PARAMETERS:
|
|
480
|
-
table_spec:
|
|
481
|
-
Required Argument.
|
|
482
|
-
Specifies the spec for the corresponding table.
|
|
483
|
-
Types: dict
|
|
665
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
484
666
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
Note:
|
|
488
|
-
Method can return three different values of strings.
|
|
489
|
-
* Good - When table to create already exists.
|
|
490
|
-
* Repaired - When is created.
|
|
491
|
-
* Bad - When table not exists and method unable to create table.
|
|
667
|
+
# Setup FeatureStore for this repository.
|
|
668
|
+
>>> fs.setup()
|
|
492
669
|
|
|
493
|
-
|
|
494
|
-
|
|
670
|
+
# Drop the data_source_staging table to simulate the missing object.
|
|
671
|
+
>>> from teradataml import db_drop_table
|
|
672
|
+
>>> db_drop_table(schema_name='vfs_v1', table_name=EFS_DB_COMPONENTS['data_source_staging'])
|
|
495
673
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
try:
|
|
500
|
-
_create_table(table_spec["table_name"],
|
|
501
|
-
columns=table_spec["columns"],
|
|
502
|
-
primary_index=table_spec.get("primary_index"),
|
|
503
|
-
unique=True if table_spec.get("primary_index") else False,
|
|
504
|
-
schema_name=self.__repo,
|
|
505
|
-
set_table=False)
|
|
506
|
-
return self.__repaired_status
|
|
507
|
-
except Exception as e:
|
|
508
|
-
if "Table '{}' already exists".format(table_spec["table_name"]) in str(e):
|
|
509
|
-
return self.__good_status
|
|
510
|
-
else:
|
|
511
|
-
print(str(e))
|
|
512
|
-
return self.__bad_status
|
|
674
|
+
# Verify the missing object by creating FeatureStore again.
|
|
675
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
676
|
+
Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.
|
|
513
677
|
|
|
514
|
-
|
|
678
|
+
>>> fs.repair()
|
|
679
|
+
Successfully repaired the following objects: _efs_data_source_staging
|
|
680
|
+
True
|
|
515
681
|
"""
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
682
|
+
# Check whether the repo exists or not.
|
|
683
|
+
if not self.__repo_exists:
|
|
684
|
+
print("Repo '{}' does not exist. Run FeatureStore.setup() " \
|
|
685
|
+
"to create the repo and setup FeatureStore.".format(self.__repo))
|
|
686
|
+
return False
|
|
687
|
+
|
|
688
|
+
# Get all existing EFS tables in the repo
|
|
689
|
+
existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
|
|
690
|
+
existing_tables = set(existing_tabs['TableName'].tolist())
|
|
691
|
+
|
|
692
|
+
# Get non-existing tables in the order of EFS_TABLES.values()
|
|
693
|
+
non_existing_tables = {
|
|
694
|
+
table_spec: table_name
|
|
695
|
+
for table_spec, table_name in EFS_TABLES.items()
|
|
696
|
+
if table_name not in existing_tables
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
# Get all existing EFS triggers in the repo
|
|
700
|
+
sql = SQLBundle()._get_sql_query(SQLConstants.SQL_LIST_TRIGGERS).format(self.__repo, '_efs%')
|
|
701
|
+
existing_triggers = {row[0] for row in execute_sql(sql).fetchall()}
|
|
702
|
+
|
|
703
|
+
# Get non-existing triggers in the order of EFS_TRIGGERS.values()
|
|
704
|
+
non_existing_triggers = {
|
|
705
|
+
trigger_spec: trigger_name
|
|
706
|
+
for trigger_spec, trigger_name in EFS_TRIGGERS.items()
|
|
707
|
+
if trigger_name not in existing_triggers
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
# Check if feature_version view exists (it shows up in existing_tables from db_list_tables)
|
|
711
|
+
feature_version_exists = self.__table_names['feature_version'] in existing_tables
|
|
712
|
+
|
|
713
|
+
# Return False only if all tables, triggers, and views exist
|
|
714
|
+
if not non_existing_tables and not non_existing_triggers and feature_version_exists:
|
|
715
|
+
print("repo '{}' is ready to use and do not need any repair.".format(self.__repo))
|
|
716
|
+
return False
|
|
537
717
|
|
|
538
|
-
|
|
539
|
-
|
|
718
|
+
failed_creation = []
|
|
719
|
+
created = []
|
|
720
|
+
# Iterating over EFS_TABLES based on the non-existing tables
|
|
721
|
+
for table_spec, table_name in non_existing_tables.items():
|
|
722
|
+
try:
|
|
723
|
+
execute_sql(table_spec.format(self.__repo, table_name))
|
|
724
|
+
created.append(table_name)
|
|
725
|
+
except Exception as e:
|
|
726
|
+
# If any table creation fails, then add it to the failed list
|
|
727
|
+
failed_creation.append((f"Table '{table_name}'", str(e)))
|
|
728
|
+
|
|
729
|
+
# Iterating over EFS_TRIGGERS based on the non-existing triggers
|
|
730
|
+
for trigger_spec, trigger_name in non_existing_triggers.items():
|
|
731
|
+
alter_name = trigger_name.split('_trg')[0]
|
|
732
|
+
insert_name = self.__repo + '.' + alter_name + '_staging'
|
|
733
|
+
try:
|
|
734
|
+
execute_sql(trigger_spec.format(self.__repo, trigger_name,
|
|
735
|
+
alter_name, insert_name))
|
|
736
|
+
created.append(trigger_name)
|
|
737
|
+
except Exception as e:
|
|
738
|
+
# If any trigger creation fails, then add it to the failed list
|
|
739
|
+
failed_creation.append((f"Trigger '{trigger_name}'", str(e)))
|
|
740
|
+
|
|
741
|
+
# Create feature versions view if it doesn't exist
|
|
742
|
+
if not feature_version_exists:
|
|
743
|
+
try:
|
|
744
|
+
sql = EFS_FEATURE_VERSION.format(self.__repo,
|
|
745
|
+
EFS_DB_COMPONENTS['feature_version'],
|
|
746
|
+
self.__repo,
|
|
747
|
+
self.__table_names['feature_process'])
|
|
748
|
+
execute_sql(sql)
|
|
749
|
+
created.append(EFS_DB_COMPONENTS['feature_version'])
|
|
750
|
+
except Exception as e:
|
|
751
|
+
failed_creation.append((f"View '{EFS_DB_COMPONENTS['feature_version']}'", str(e)))
|
|
752
|
+
|
|
753
|
+
# If any of the table or trigger creation fails, then return False
|
|
754
|
+
if failed_creation:
|
|
755
|
+
print("The following objects could not be repaired:")
|
|
756
|
+
for obj, reason in failed_creation:
|
|
757
|
+
print(f" - {obj}: {reason}")
|
|
758
|
+
return False
|
|
540
759
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
"""
|
|
544
|
-
try:
|
|
545
|
-
execute_sql(trigger_spec.format(schema_name=self.__repo))
|
|
546
|
-
return self.__repaired_status
|
|
547
|
-
except Exception as e:
|
|
548
|
-
if "Trigger '{}' already exists".format(trigger_name) in str(e):
|
|
549
|
-
return self.__good_status
|
|
550
|
-
else:
|
|
551
|
-
print("Unable to create trigger '{}'. Error - {}".format(trigger_name, str(e)))
|
|
552
|
-
return self.__bad_status
|
|
760
|
+
print("Successfully repaired the following objects: {}".format(", ".join(created)))
|
|
761
|
+
return True
|
|
553
762
|
|
|
554
763
|
def list_features(self, archived=False) -> DataFrame:
|
|
555
764
|
"""
|
|
@@ -573,38 +782,45 @@ class FeatureStore:
|
|
|
573
782
|
|
|
574
783
|
EXAMPLES:
|
|
575
784
|
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
576
|
-
>>> load_example_data('dataframe', 'sales')
|
|
577
|
-
# Create FeatureStore for repo 'vfs_v1'.
|
|
578
|
-
>>> fs = FeatureStore("vfs_v1")
|
|
579
785
|
# Create teradataml DataFrame.
|
|
786
|
+
>>> load_example_data("dataframe", "sales")
|
|
580
787
|
>>> df = DataFrame("sales")
|
|
788
|
+
|
|
789
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
790
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
791
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
792
|
+
# Setup FeatureStore for this repository.
|
|
793
|
+
>>> fs.setup()
|
|
794
|
+
True
|
|
795
|
+
|
|
581
796
|
# Create a FeatureGroup from teradataml DataFrame.
|
|
582
797
|
>>> fg = FeatureGroup.from_DataFrame(name='sales',
|
|
583
798
|
... entity_columns='accounts',
|
|
584
799
|
... df=df,
|
|
585
|
-
...
|
|
800
|
+
... timestamp_column='datetime')
|
|
586
801
|
# Apply the FeatureGroup to FeatureStore.
|
|
587
802
|
>>> fs.apply(fg)
|
|
588
803
|
True
|
|
589
804
|
|
|
590
805
|
# Example 1: List all the effective Features in the repo 'vfs_v1'.
|
|
591
806
|
>>> fs.list_features()
|
|
592
|
-
|
|
593
|
-
name
|
|
594
|
-
|
|
595
|
-
Jan
|
|
596
|
-
|
|
597
|
-
Feb
|
|
598
|
-
>>>
|
|
807
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
808
|
+
name data_domain
|
|
809
|
+
Apr ALICE 4 Apr None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:31.262501 None sales
|
|
810
|
+
Jan ALICE 2 Jan None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.056273 None sales
|
|
811
|
+
Mar ALICE 3 Mar None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.678060 None sales
|
|
812
|
+
Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None sales
|
|
599
813
|
|
|
600
814
|
# Example 2: List all the archived Features in the repo 'vfs_v1'.
|
|
601
815
|
# Note: Feature can only be archived when it is not associated with any Group.
|
|
602
816
|
# Let's remove Feature 'Feb' from FeatureGroup.
|
|
603
|
-
>>> fg.
|
|
817
|
+
>>> fg.remove_feature(fs.get_feature('Feb'))
|
|
604
818
|
True
|
|
819
|
+
|
|
605
820
|
# Apply the modified FeatureGroup to FeatureStore.
|
|
606
821
|
>>> fs.apply(fg)
|
|
607
822
|
True
|
|
823
|
+
|
|
608
824
|
# Archive Feature 'Feb'.
|
|
609
825
|
>>> fs.archive_feature('Feb')
|
|
610
826
|
Feature 'Feb' is archived.
|
|
@@ -612,8 +828,8 @@ class FeatureStore:
|
|
|
612
828
|
|
|
613
829
|
# List all the archived Features in the repo 'vfs_v1'.
|
|
614
830
|
>>> fs.list_features(archived=True)
|
|
615
|
-
|
|
616
|
-
0 Feb Feb None
|
|
831
|
+
id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
|
|
832
|
+
0 1 Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None 2025-07-28 03:19:58.950000 sales
|
|
617
833
|
>>>
|
|
618
834
|
"""
|
|
619
835
|
return self.__get_archived_features_df() if archived else self.__get_features_df()
|
|
@@ -640,25 +856,31 @@ class FeatureStore:
|
|
|
640
856
|
|
|
641
857
|
EXAMPLES:
|
|
642
858
|
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
643
|
-
>>> load_example_data('dataframe', 'sales')
|
|
644
859
|
# Create FeatureStore for repo 'vfs_v1'.
|
|
645
860
|
>>> fs = FeatureStore("vfs_v1")
|
|
861
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
862
|
+
# Setup FeatureStore for this repository.
|
|
863
|
+
>>> fs.setup()
|
|
864
|
+
True
|
|
865
|
+
|
|
646
866
|
# Create teradataml DataFrame.
|
|
867
|
+
>>> load_example_data("dataframe", "sales")
|
|
647
868
|
>>> df = DataFrame("sales")
|
|
869
|
+
|
|
648
870
|
# Create a FeatureGroup from teradataml DataFrame.
|
|
649
871
|
>>> fg = FeatureGroup.from_DataFrame(name='sales',
|
|
650
872
|
... entity_columns='accounts',
|
|
651
873
|
... df=df,
|
|
652
|
-
...
|
|
874
|
+
... timestamp_column='datetime')
|
|
653
875
|
# Apply the FeatureGroup to FeatureStore.
|
|
654
876
|
>>> fs.apply(fg)
|
|
655
877
|
True
|
|
656
878
|
|
|
657
879
|
# Example 1: List all the effective Entities in the repo 'vfs_v1'.
|
|
658
880
|
>>> fs.list_entities()
|
|
659
|
-
|
|
660
|
-
name
|
|
661
|
-
sales accounts
|
|
881
|
+
description creation_time modified_time entity_column
|
|
882
|
+
name data_domain
|
|
883
|
+
sales ALICE None 2025-07-28 03:17:31.558796 2025-07-28 03:19:41.233953 accounts
|
|
662
884
|
>>>
|
|
663
885
|
|
|
664
886
|
# Example 2: List all the archived Entities in the repo 'vfs_v1'.
|
|
@@ -668,10 +890,13 @@ class FeatureStore:
|
|
|
668
890
|
>>> entity = Entity('store_sales', columns=df.accounts)
|
|
669
891
|
# Update new entity to FeatureGroup.
|
|
670
892
|
>>> fg.apply(entity)
|
|
893
|
+
True
|
|
894
|
+
|
|
671
895
|
# Update FeatureGroup to FeatureStore. This will update Entity
|
|
672
896
|
# from 'sales' to 'store_sales' for FeatureGroup 'sales'.
|
|
673
897
|
>>> fs.apply(fg)
|
|
674
898
|
True
|
|
899
|
+
|
|
675
900
|
# Let's archive Entity 'sales' since it is not part of any FeatureGroup.
|
|
676
901
|
>>> fs.archive_entity('sales')
|
|
677
902
|
Entity 'sales' is archived.
|
|
@@ -680,8 +905,9 @@ class FeatureStore:
|
|
|
680
905
|
|
|
681
906
|
# List the archived entities.
|
|
682
907
|
>>> fs.list_entities(archived=True)
|
|
683
|
-
|
|
684
|
-
|
|
908
|
+
description creation_time modified_time entity_column
|
|
909
|
+
name data_domain
|
|
910
|
+
store_sales ALICE None 2025-07-28 03:23:40.322424 None accounts
|
|
685
911
|
>>>
|
|
686
912
|
"""
|
|
687
913
|
return self.__get_archived_entity_df() if archived else self.__get_entity_df()
|
|
@@ -708,11 +934,17 @@ class FeatureStore:
|
|
|
708
934
|
|
|
709
935
|
EXAMPLES:
|
|
710
936
|
>>> from teradataml import DataSource, FeatureStore, load_example_data
|
|
711
|
-
>>> load_example_data("dataframe", "admissions_train")
|
|
712
937
|
# Create teradataml DataFrame.
|
|
713
|
-
>>>
|
|
938
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
939
|
+
>>> admissions = DataFrame("admissions_train")
|
|
940
|
+
|
|
714
941
|
# Create FeatureStore for repo 'vfs_v1'.
|
|
715
942
|
>>> fs = FeatureStore("vfs_v1")
|
|
943
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
944
|
+
# Setup FeatureStore for this repository.
|
|
945
|
+
>>> fs.setup()
|
|
946
|
+
True
|
|
947
|
+
|
|
716
948
|
# Create DataSource using teradataml DataFrame.
|
|
717
949
|
>>> ds = DataSource(name='admissions', source=admissions)
|
|
718
950
|
# Apply the DataSource to FeatureStore.
|
|
@@ -721,21 +953,20 @@ class FeatureStore:
|
|
|
721
953
|
|
|
722
954
|
# Example 1: List all the effective DataSources in the repo 'vfs_v1'.
|
|
723
955
|
>>> fs.list_data_sources()
|
|
724
|
-
|
|
725
|
-
name
|
|
726
|
-
admissions
|
|
727
|
-
>>>
|
|
956
|
+
description timestamp_column source creation_time modified_time
|
|
957
|
+
name data_domain
|
|
958
|
+
admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None
|
|
728
959
|
|
|
729
960
|
# Example 2: List all the archived DataSources in the repo 'vfs_v1'.
|
|
730
961
|
# Let's first archive the DataSource.
|
|
731
962
|
>>> fs.archive_data_source('admissions')
|
|
732
963
|
DataSource 'admissions' is archived.
|
|
733
964
|
True
|
|
965
|
+
|
|
734
966
|
# List archived DataSources.
|
|
735
967
|
>>> fs.list_data_sources(archived=True)
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
admissions None None select * from "admissions_train" 2024-09-30 12:05:39.220000
|
|
968
|
+
name data_domain description timestamp_column source creation_time modified_time archived_time
|
|
969
|
+
0 admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None 2025-07-28 03:28:17.160000
|
|
739
970
|
>>>
|
|
740
971
|
"""
|
|
741
972
|
return self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
|
|
@@ -762,11 +993,17 @@ class FeatureStore:
|
|
|
762
993
|
|
|
763
994
|
EXAMPLES:
|
|
764
995
|
>>> from teradataml import FeatureGroup, FeatureStore, load_example_data
|
|
765
|
-
>>> load_example_data("dataframe", "admissions_train")
|
|
766
996
|
# Create teradataml DataFrame.
|
|
997
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
767
998
|
>>> admissions=DataFrame("admissions_train")
|
|
999
|
+
|
|
768
1000
|
# Create FeatureStore for repo 'vfs_v1'.
|
|
769
1001
|
>>> fs = FeatureStore("vfs_v1")
|
|
1002
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1003
|
+
# Setup FeatureStore for this repository.
|
|
1004
|
+
>>> fs.setup()
|
|
1005
|
+
True
|
|
1006
|
+
|
|
770
1007
|
# Create a FeatureGroup from DataFrame.
|
|
771
1008
|
>>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
|
|
772
1009
|
# Apply FeatureGroup to FeatureStore.
|
|
@@ -775,60 +1012,285 @@ class FeatureStore:
|
|
|
775
1012
|
|
|
776
1013
|
# Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
|
|
777
1014
|
>>> fs.list_feature_groups()
|
|
778
|
-
|
|
779
|
-
name
|
|
780
|
-
admissions
|
|
781
|
-
>>>
|
|
1015
|
+
description data_source_name entity_name creation_time modified_time
|
|
1016
|
+
name data_domain
|
|
1017
|
+
admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None
|
|
782
1018
|
|
|
783
1019
|
# Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
|
|
784
1020
|
# Let's first archive the FeatureGroup.
|
|
785
1021
|
>>> fs.archive_feature_group("admissions")
|
|
786
1022
|
True
|
|
787
|
-
|
|
1023
|
+
|
|
788
1024
|
# List archived FeatureGroups.
|
|
789
1025
|
>>> fs.list_feature_groups(archived=True)
|
|
790
|
-
name description data_source_name entity_name archived_time
|
|
791
|
-
0 admissions None admissions admissions
|
|
1026
|
+
name data_domain description data_source_name entity_name creation_time modified_time archived_time
|
|
1027
|
+
0 admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None 2025-07-28 03:31:04.550000
|
|
792
1028
|
>>>
|
|
793
1029
|
"""
|
|
794
1030
|
return self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
|
|
795
1031
|
|
|
796
|
-
def
|
|
1032
|
+
def list_data_domains(self) -> DataFrame:
|
|
797
1033
|
"""
|
|
798
1034
|
DESCRIPTION:
|
|
799
|
-
|
|
1035
|
+
Lists all the data domains.
|
|
800
1036
|
|
|
801
1037
|
PARAMETERS:
|
|
802
|
-
|
|
803
|
-
Required Argument.
|
|
804
|
-
Specifies the name of the feature to get.
|
|
805
|
-
Types: str
|
|
1038
|
+
None
|
|
806
1039
|
|
|
807
1040
|
RETURNS:
|
|
808
|
-
|
|
1041
|
+
teradataml DataFrame
|
|
809
1042
|
|
|
810
1043
|
RAISES:
|
|
811
|
-
|
|
1044
|
+
None
|
|
812
1045
|
|
|
813
1046
|
EXAMPLES:
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
1047
|
+
# Example 1: List all the data domains in the repo 'vfs_v1'.
|
|
1048
|
+
>>> from teradataml import FeatureStore
|
|
1049
|
+
# Create FeatureStore for repo 'vfs_v1' with data_domain 'd1'.
|
|
1050
|
+
>>> fs = FeatureStore("vfs_v1", data_domain='d1')
|
|
1051
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1052
|
+
|
|
1053
|
+
# List all the data domains in the repo 'vfs_v1'.
|
|
1054
|
+
>>> fs.list_data_domains()
|
|
1055
|
+
name created_time
|
|
1056
|
+
0 d1 2025-04-30 11:21:40.123456
|
|
1057
|
+
"""
|
|
1058
|
+
return self.__get_data_domain_df()
|
|
1059
|
+
|
|
1060
|
+
def list_feature_processes(self, archived=False) -> DataFrame:
|
|
1061
|
+
"""
|
|
1062
|
+
DESCRIPTION:
|
|
1063
|
+
Lists all the feature processes.
|
|
1064
|
+
|
|
1065
|
+
PARAMETERS:
|
|
1066
|
+
archived:
|
|
1067
|
+
Optional Argument.
|
|
1068
|
+
Specifies whether to retrieve archived feature processes or not.
|
|
1069
|
+
When set to True, archived feature processes in FeatureStore are listed.
|
|
1070
|
+
Otherwise, all feature processes are listed.
|
|
1071
|
+
Default Value: False
|
|
1072
|
+
Types: bool
|
|
1073
|
+
|
|
1074
|
+
RETURNS:
|
|
1075
|
+
teradataml DataFrame
|
|
1076
|
+
|
|
1077
|
+
RAISES:
|
|
1078
|
+
None
|
|
1079
|
+
|
|
1080
|
+
EXAMPLES:
|
|
1081
|
+
# Example 1: List all the feature processes in the repo 'vfs_v1'.
|
|
1082
|
+
>>> from teradataml import FeatureStore
|
|
1083
|
+
|
|
1084
|
+
# Create FeatureStore 'vfs_v1' or use existing one.
|
|
1085
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1086
|
+
FeatureStore is ready to use.
|
|
1087
|
+
|
|
1088
|
+
# Load the sales data.
|
|
1089
|
+
>>> load_example_data("dataframe", "sales")
|
|
1090
|
+
>>> df = DataFrame("sales")
|
|
1091
|
+
|
|
1092
|
+
# Create a feature process.
|
|
1093
|
+
>>> from teradataml import FeatureProcess
|
|
1094
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1095
|
+
... data_domain='sales',
|
|
1096
|
+
... object=df,
|
|
1097
|
+
... entity="accounts",
|
|
1098
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1099
|
+
>>> fp.run()
|
|
1100
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1101
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1102
|
+
|
|
1103
|
+
# List all the feature processes in the repo 'vfs_v1'.
|
|
1104
|
+
>>> fs.list_feature_processes()
|
|
1105
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
1106
|
+
process_id
|
|
1107
|
+
5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 9999-12-31 23:59:59.999999+00:
|
|
1108
|
+
|
|
1109
|
+
# Example 2: List all the archived feature processes in the repo 'vfs_v1'.
|
|
1110
|
+
|
|
1111
|
+
# Let's check the archived feature processes before archiving feature process.
|
|
1112
|
+
>>> fs.list_feature_processes(archived=True)
|
|
1113
|
+
process_id start_time end_time status filter as_of_start as_of_end failure_reason
|
|
1114
|
+
|
|
1115
|
+
# Archive the feature process by passing the process_id.
|
|
1116
|
+
>>> fs.archive_feature_process('5747082b-4acb-11f0-a2d7-f020ffe7fe09')
|
|
1117
|
+
Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
|
|
1118
|
+
Feature 'Feb' is archived from metadata.
|
|
1119
|
+
Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
1120
|
+
Feature 'Jan' is archived from metadata.
|
|
1121
|
+
Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
1122
|
+
Feature 'Mar' is archived from metadata.
|
|
1123
|
+
Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
1124
|
+
Feature 'Apr' is archived from metadata.
|
|
1125
|
+
FeatureProcess with process id '5747082b-4acb-11f0-a2d7-f020ffe7fe09' is archived.
|
|
1126
|
+
True
|
|
1127
|
+
|
|
1128
|
+
# List all the archived feature processes in the repo 'vfs_v1'.
|
|
1129
|
+
>>> fs.list_feature_processes(archived=True)
|
|
1130
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
1131
|
+
process_id
|
|
1132
|
+
5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 2025-06-16 16:04:32.260000+00:
|
|
1133
|
+
|
|
1134
|
+
"""
|
|
1135
|
+
validate_params = []
|
|
1136
|
+
validate_params.append(["archived", archived, True, bool, True])
|
|
1137
|
+
# Validate argument types
|
|
1138
|
+
_Validators._validate_function_arguments(validate_params)
|
|
1139
|
+
|
|
1140
|
+
f_process_df = self.__get_without_valid_period_df(self.__get_feature_process_df())
|
|
1141
|
+
f_process_df = f_process_df[f_process_df.data_domain == self.__data_domain]
|
|
1142
|
+
|
|
1143
|
+
if archived:
|
|
1144
|
+
# Filter out the active feature process. Only archived features are returned.
|
|
1145
|
+
f_process_df = f_process_df[(Col("valid_end") <= Col('current_timestamp'))]
|
|
1146
|
+
|
|
1147
|
+
return f_process_df
|
|
1148
|
+
|
|
1149
|
+
def list_feature_runs(self):
|
|
1150
|
+
"""
|
|
1151
|
+
DESCRIPTION:
|
|
1152
|
+
Lists all the feature runs in the FeatureStore.
|
|
1153
|
+
|
|
1154
|
+
PARAMETERS:
|
|
1155
|
+
None
|
|
1156
|
+
|
|
1157
|
+
RETURNS:
|
|
1158
|
+
teradataml DataFrame
|
|
1159
|
+
|
|
1160
|
+
RAISES:
|
|
1161
|
+
None
|
|
1162
|
+
|
|
1163
|
+
EXAMPLES:
|
|
1164
|
+
# Example 1: List all the feature runs in the repo 'vfs_v1'.
|
|
1165
|
+
>>> from teradataml import FeatureStore
|
|
1166
|
+
|
|
1167
|
+
# Create a FeatureStore 'vfs_v1' or use existing one.
|
|
1168
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1169
|
+
FeatureStore is ready to use.
|
|
1170
|
+
|
|
1171
|
+
# Load the sales data.
|
|
1172
|
+
>>> load_example_data("dataframe", "sales")
|
|
1173
|
+
>>> df = DataFrame("sales")
|
|
1174
|
+
|
|
1175
|
+
# Create a feature process.
|
|
1176
|
+
>>> from teradataml import FeatureProcess
|
|
1177
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1178
|
+
... data_domain='test_domain',
|
|
1179
|
+
... object=df,
|
|
1180
|
+
... entity='accounts',
|
|
1181
|
+
... features=['Mar', 'Apr'])
|
|
1182
|
+
>>> fp.run(filters=[df.accounts=='Alpha Co', "accounts='Jones LLC'"])
|
|
1183
|
+
Process '11b62599-692f-11f0-ad19-f020ffe7fe09' started.
|
|
1184
|
+
Ingesting the features for filter 'accounts = 'Alpha Co'' to catalog.
|
|
1185
|
+
Ingesting the features for filter 'accounts='Jones LLC'' to catalog.
|
|
1186
|
+
Process '11b62599-692f-11f0-ad19-f020ffe7fe09' completed.
|
|
1187
|
+
True
|
|
1188
|
+
|
|
1189
|
+
# List all the feature runs in the repo 'vfs_v1'.
|
|
1190
|
+
>>> fs.list_feature_runs()
|
|
1191
|
+
process_id data_domain start_time end_time status filter as_of_start as_of_end failure_reason
|
|
1192
|
+
run_id
|
|
1193
|
+
1 11b62599-692f-11f0-ad19-f020ffe7fe09 test_domain 2025-07-25 08:12:13.001968 2025-07-25 08:12:13.001968 completed accounts = 'Alpha Co', accounts='Jones LLC' None None None
|
|
1194
|
+
"""
|
|
1195
|
+
return self.__get_feature_runs_df()
|
|
1196
|
+
|
|
1197
|
+
def list_dataset_catalogs(self) -> DataFrame:
|
|
1198
|
+
"""
|
|
1199
|
+
DESCRIPTION:
|
|
1200
|
+
Lists all the dataset catalogs.
|
|
1201
|
+
|
|
1202
|
+
PARAMETERS:
|
|
1203
|
+
None
|
|
1204
|
+
|
|
1205
|
+
RETURNS:
|
|
1206
|
+
teradataml DataFrame
|
|
1207
|
+
|
|
1208
|
+
RAISES:
|
|
1209
|
+
None
|
|
1210
|
+
|
|
1211
|
+
EXAMPLES:
|
|
1212
|
+
# Example 1: List all the dataset catalogs in the repo 'vfs_v1'.
|
|
1213
|
+
>>> from teradataml import FeatureStore
|
|
1214
|
+
|
|
1215
|
+
# Create FeatureStore 'vfs_v1' or use existing one.
|
|
1216
|
+
>>> fs = FeatureStore("vfs_v1", data_domain='sales')
|
|
1217
|
+
FeatureStore is ready to use.
|
|
1218
|
+
|
|
1219
|
+
# Load the sales data.
|
|
1220
|
+
>>> load_example_data("dataframe", "sales")
|
|
1221
|
+
>>> df = DataFrame("sales")
|
|
1222
|
+
|
|
1223
|
+
# Create a feature process.
|
|
1224
|
+
>>> from teradataml import FeatureProcess
|
|
1225
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1226
|
+
... data_domain='sales',
|
|
1227
|
+
... object=df,
|
|
1228
|
+
... entity="accounts",
|
|
1229
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1230
|
+
>>> fp.run()
|
|
1231
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1232
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1233
|
+
|
|
1234
|
+
# create a dataset catalog.
|
|
1235
|
+
>>> from teradataml import DatasetCatalog
|
|
1236
|
+
>>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
|
|
1237
|
+
>>> dataset = dc.build_dataset(entity='accounts',
|
|
1238
|
+
... selected_features = {
|
|
1239
|
+
... 'Jan': '5747082b-4acb-11f0-a2d7-f020ffe7fe09',
|
|
1240
|
+
... 'Feb': '5747082b-4acb-11f0-a2d7-f020ffe7fe09'},
|
|
1241
|
+
... view_name='ds_jan_feb',
|
|
1242
|
+
... description='Dataset with Jan and Feb features')
|
|
1243
|
+
|
|
1244
|
+
# List all the dataset catalogs in the repo 'vfs_v1'.
|
|
1245
|
+
>>> fs.list_dataset_catalogs()
|
|
1246
|
+
data_domain name entity_name database_name description valid_start valid_end
|
|
1247
|
+
id
|
|
1248
|
+
4f763a7b-8920-448c-87af-432e7d36c9cb sales ds_jan_feb accounts vfs_v1 Dataset with Jan and Feb features 2025-06-16 16:15:17.577637+00: 9999-12-31 23:59:59.999999+00:
|
|
1249
|
+
"""
|
|
1250
|
+
return self.__get_without_valid_period_df(self.__get_dataset_catalog_df())
|
|
1251
|
+
|
|
1252
|
+
def get_feature(self, name):
|
|
1253
|
+
"""
|
|
1254
|
+
DESCRIPTION:
|
|
1255
|
+
Retrieve the feature.
|
|
1256
|
+
|
|
1257
|
+
PARAMETERS:
|
|
1258
|
+
name:
|
|
1259
|
+
Required Argument.
|
|
1260
|
+
Specifies the name of the feature to get.
|
|
1261
|
+
Types: str
|
|
1262
|
+
|
|
1263
|
+
RETURNS:
|
|
1264
|
+
Feature.
|
|
1265
|
+
|
|
1266
|
+
RAISES:
|
|
1267
|
+
TeradataMLException
|
|
1268
|
+
|
|
1269
|
+
EXAMPLES:
|
|
1270
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
1271
|
+
# Create DataFrame on sales data.
|
|
1272
|
+
>>> load_example_data("dataframe", "sales")
|
|
1273
|
+
>>> df = DataFrame("sales")
|
|
1274
|
+
>>> df
|
|
1275
|
+
Feb Jan Mar Apr datetime
|
|
1276
|
+
accounts
|
|
822
1277
|
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
823
1278
|
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
824
1279
|
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
825
1280
|
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
826
1281
|
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
827
|
-
|
|
1282
|
+
|
|
1283
|
+
# Create a FeatureStore for repo 'vfs_v1'.
|
|
1284
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1285
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1286
|
+
# Setup FeatureStore for this repository.
|
|
1287
|
+
>>> fs.setup()
|
|
1288
|
+
True
|
|
1289
|
+
|
|
828
1290
|
# Create Feature for column 'Mar' with name 'sales_mar'.
|
|
829
1291
|
>>> feature = Feature('sales_mar', column=df.Mar)
|
|
1292
|
+
|
|
830
1293
|
# Apply the Feature to FeatureStore.
|
|
831
|
-
>>> fs = FeatureStore("vfs_v1")
|
|
832
1294
|
>>> fs.apply(feature)
|
|
833
1295
|
True
|
|
834
1296
|
|
|
@@ -836,7 +1298,6 @@ class FeatureStore:
|
|
|
836
1298
|
>>> feature = fs.get_feature('sales_mar')
|
|
837
1299
|
>>> feature
|
|
838
1300
|
Feature(name=sales_mar)
|
|
839
|
-
>>>
|
|
840
1301
|
"""
|
|
841
1302
|
argument_validation_params = []
|
|
842
1303
|
argument_validation_params.append(["name", name, False, (str), True])
|
|
@@ -844,16 +1305,24 @@ class FeatureStore:
|
|
|
844
1305
|
# Validate argument types
|
|
845
1306
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
846
1307
|
|
|
847
|
-
|
|
848
|
-
df =
|
|
1308
|
+
# Check if the feature exists in the current data domain.
|
|
1309
|
+
df = self.__get_features_wog_df()
|
|
1310
|
+
df = df[(df['name'] == name) &
|
|
1311
|
+
(df['data_domain'] == self.__data_domain)]
|
|
849
1312
|
|
|
850
|
-
#
|
|
1313
|
+
# If no records found, check if the feature exists in any domain.
|
|
851
1314
|
if df.shape[0] == 0:
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
msg_code
|
|
1315
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'feature')
|
|
1316
|
+
if res:
|
|
1317
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1318
|
+
error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
|
|
1319
|
+
self.__data_domain, res)
|
|
1320
|
+
else:
|
|
1321
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1322
|
+
error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
|
|
1323
|
+
self.__data_domain)
|
|
855
1324
|
raise TeradataMlException(error_msg, msg_code)
|
|
856
|
-
|
|
1325
|
+
|
|
857
1326
|
return Feature._from_df(df)
|
|
858
1327
|
|
|
859
1328
|
def get_group_features(self, group_name):
|
|
@@ -875,11 +1344,10 @@ class FeatureStore:
|
|
|
875
1344
|
|
|
876
1345
|
EXAMPLES:
|
|
877
1346
|
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
878
|
-
|
|
879
|
-
>>> load_example_data("dataframe", "sales")
|
|
1347
|
+
|
|
880
1348
|
# Create DataFrame on sales data.
|
|
1349
|
+
>>> load_example_data("dataframe", "sales")
|
|
881
1350
|
>>> df = DataFrame("sales")
|
|
882
|
-
>>> df
|
|
883
1351
|
>>> df
|
|
884
1352
|
Feb Jan Mar Apr datetime
|
|
885
1353
|
accounts
|
|
@@ -888,12 +1356,18 @@ class FeatureStore:
|
|
|
888
1356
|
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
889
1357
|
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
890
1358
|
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
891
|
-
|
|
1359
|
+
|
|
1360
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1361
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1362
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1363
|
+
# Setup FeatureStore for this repository.
|
|
1364
|
+
>>> fs.setup()
|
|
1365
|
+
True
|
|
1366
|
+
|
|
892
1367
|
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
893
1368
|
>>> fg = FeatureGroup.from_DataFrame(
|
|
894
|
-
... name="sales", df=df, entity_columns="accounts",
|
|
1369
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
|
|
895
1370
|
# Apply the FeatureGroup to FeatureStore.
|
|
896
|
-
>>> fs = FeatureStore("vfs_v1")
|
|
897
1371
|
>>> fs.apply(fg)
|
|
898
1372
|
True
|
|
899
1373
|
|
|
@@ -911,13 +1385,21 @@ class FeatureStore:
|
|
|
911
1385
|
|
|
912
1386
|
# Select active features.
|
|
913
1387
|
features_df = self.__get_features_df()
|
|
914
|
-
features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) &
|
|
1388
|
+
features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) &
|
|
1389
|
+
(features_df.group_name == group_name) &
|
|
1390
|
+
(features_df.data_domain == self.__data_domain))]
|
|
915
1391
|
|
|
916
1392
|
# Check if a feature with that group name exists or not. If not, raise error.
|
|
917
1393
|
if features_df.shape[0] == 0:
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
msg_code
|
|
1394
|
+
res = _FSUtils._get_data_domains(self.__repo, group_name, 'group_features')
|
|
1395
|
+
if res:
|
|
1396
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1397
|
+
error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
|
|
1398
|
+
self.__data_domain, res)
|
|
1399
|
+
else:
|
|
1400
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1401
|
+
error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
|
|
1402
|
+
self.__data_domain)
|
|
921
1403
|
raise TeradataMlException(error_msg, msg_code)
|
|
922
1404
|
|
|
923
1405
|
return Feature._from_df(features_df)
|
|
@@ -941,9 +1423,8 @@ class FeatureStore:
|
|
|
941
1423
|
|
|
942
1424
|
EXAMPLES:
|
|
943
1425
|
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
944
|
-
# Load the sales data to Vantage.
|
|
945
|
-
>>> load_example_data("dataframe", "sales")
|
|
946
1426
|
# Create DataFrame on sales data.
|
|
1427
|
+
>>> load_example_data("dataframe", "sales")
|
|
947
1428
|
>>> df = DataFrame("sales")
|
|
948
1429
|
>>> df
|
|
949
1430
|
Feb Jan Mar Apr datetime
|
|
@@ -953,12 +1434,18 @@ class FeatureStore:
|
|
|
953
1434
|
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
954
1435
|
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
955
1436
|
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
956
|
-
|
|
1437
|
+
|
|
1438
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1439
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1440
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1441
|
+
# Setup FeatureStore for this repository.
|
|
1442
|
+
>>> fs.setup()
|
|
1443
|
+
True
|
|
1444
|
+
|
|
957
1445
|
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
958
1446
|
>>> fg = FeatureGroup.from_DataFrame(
|
|
959
|
-
... name="sales", df=df, entity_columns="accounts",
|
|
1447
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
|
|
960
1448
|
# Apply the FeatureGroup to FeatureStore.
|
|
961
|
-
>>> fs = FeatureStore("vfs_v1")
|
|
962
1449
|
>>> fs.apply(fg)
|
|
963
1450
|
True
|
|
964
1451
|
|
|
@@ -975,20 +1462,28 @@ class FeatureStore:
|
|
|
975
1462
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
976
1463
|
|
|
977
1464
|
df = self.list_feature_groups()
|
|
978
|
-
df = df[df
|
|
1465
|
+
df = df[(df['name'] == name) &
|
|
1466
|
+
(df['data_domain'] == self.__data_domain)]
|
|
979
1467
|
|
|
980
|
-
# Check if a feature with that name exists or not. If not, raise error.
|
|
1468
|
+
# Check if a feature group with that name exists or not. If not, raise error.
|
|
981
1469
|
if df.shape[0] == 0:
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
msg_code
|
|
1470
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'feature_group')
|
|
1471
|
+
if res:
|
|
1472
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1473
|
+
error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
|
|
1474
|
+
self.__data_domain, res)
|
|
1475
|
+
else:
|
|
1476
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1477
|
+
error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
|
|
1478
|
+
self.__data_domain)
|
|
985
1479
|
raise TeradataMlException(error_msg, msg_code)
|
|
986
1480
|
|
|
987
1481
|
return FeatureGroup._from_df(df,
|
|
988
1482
|
self.__repo,
|
|
989
1483
|
self.__get_features_df(),
|
|
990
1484
|
self.__get_entity_df(),
|
|
991
|
-
self.__get_data_source_df()
|
|
1485
|
+
self.__get_data_source_df(),
|
|
1486
|
+
data_domain=self.__data_domain
|
|
992
1487
|
)
|
|
993
1488
|
|
|
994
1489
|
def get_entity(self, name):
|
|
@@ -1010,9 +1505,8 @@ class FeatureStore:
|
|
|
1010
1505
|
|
|
1011
1506
|
EXAMPLES:
|
|
1012
1507
|
>>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
|
|
1013
|
-
# Load the admissions data to Vantage.
|
|
1014
|
-
>>> load_example_data("dataframe", "admissions_train")
|
|
1015
1508
|
# Create DataFrame on admissions data.
|
|
1509
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1016
1510
|
>>> df = DataFrame("admissions_train")
|
|
1017
1511
|
>>> df
|
|
1018
1512
|
masters gpa stats programming admitted
|
|
@@ -1027,20 +1521,24 @@ class FeatureStore:
|
|
|
1027
1521
|
26 yes 3.57 Advanced Advanced 1
|
|
1028
1522
|
19 yes 1.98 Advanced Advanced 0
|
|
1029
1523
|
13 no 4.00 Advanced Novice 1
|
|
1030
|
-
|
|
1524
|
+
|
|
1525
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1526
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1527
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1528
|
+
# Setup FeatureStore for this repository.
|
|
1529
|
+
>>> fs.setup()
|
|
1530
|
+
True
|
|
1531
|
+
|
|
1031
1532
|
# Create Entity for column 'id' with name 'admissions_id'.
|
|
1032
1533
|
>>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
|
|
1033
1534
|
# Apply the Entity to FeatureStore 'vfs_v1'.
|
|
1034
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1035
1535
|
>>> fs.apply(entity)
|
|
1036
1536
|
True
|
|
1037
|
-
>>>
|
|
1038
1537
|
|
|
1039
1538
|
# Get the Entity 'admissions_id' from repo 'vfs_v1'
|
|
1040
1539
|
>>> entity = fs.get_entity('admissions_id')
|
|
1041
1540
|
>>> entity
|
|
1042
1541
|
Entity(name=admissions_id)
|
|
1043
|
-
>>>
|
|
1044
1542
|
"""
|
|
1045
1543
|
argument_validation_params = []
|
|
1046
1544
|
argument_validation_params.append(["name", name, False, (str), True])
|
|
@@ -1049,14 +1547,22 @@ class FeatureStore:
|
|
|
1049
1547
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
1050
1548
|
|
|
1051
1549
|
df = self.__get_entity_df()
|
|
1052
|
-
df = df[df
|
|
1550
|
+
df = df[(df['name'] == name) &
|
|
1551
|
+
(df['data_domain'] == self.__data_domain)]
|
|
1053
1552
|
|
|
1054
1553
|
# Check if entity with that name exists or not. If not, raise error.
|
|
1055
1554
|
if df.shape[0] == 0:
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
msg_code
|
|
1555
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'entity')
|
|
1556
|
+
if res:
|
|
1557
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1558
|
+
error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
|
|
1559
|
+
self.__data_domain, res)
|
|
1560
|
+
else:
|
|
1561
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1562
|
+
error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
|
|
1563
|
+
self.__data_domain)
|
|
1059
1564
|
raise TeradataMlException(error_msg, msg_code)
|
|
1565
|
+
|
|
1060
1566
|
return Entity._from_df(df)
|
|
1061
1567
|
|
|
1062
1568
|
def get_data_source(self, name):
|
|
@@ -1078,9 +1584,8 @@ class FeatureStore:
|
|
|
1078
1584
|
|
|
1079
1585
|
EXAMPLES:
|
|
1080
1586
|
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1081
|
-
# Load the admissions data to Vantage.
|
|
1082
|
-
>>> load_example_data("dataframe", "admissions_train")
|
|
1083
1587
|
# Create DataFrame on admissions data.
|
|
1588
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1084
1589
|
>>> df = DataFrame("admissions_train")
|
|
1085
1590
|
>>> df
|
|
1086
1591
|
masters gpa stats programming admitted
|
|
@@ -1095,20 +1600,24 @@ class FeatureStore:
|
|
|
1095
1600
|
26 yes 3.57 Advanced Advanced 1
|
|
1096
1601
|
19 yes 1.98 Advanced Advanced 0
|
|
1097
1602
|
13 no 4.00 Advanced Novice 1
|
|
1098
|
-
|
|
1603
|
+
|
|
1604
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1605
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1606
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1607
|
+
# Setup FeatureStore for this repository.
|
|
1608
|
+
>>> fs.setup()
|
|
1609
|
+
True
|
|
1610
|
+
|
|
1099
1611
|
# Create DataSource using DataFrame 'df' with name 'admissions'.
|
|
1100
1612
|
>>> ds = DataSource('admissions', source=df)
|
|
1101
1613
|
# Apply the DataSource to FeatureStore 'vfs_v1'.
|
|
1102
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1103
1614
|
>>> fs.apply(ds)
|
|
1104
1615
|
True
|
|
1105
|
-
>>>
|
|
1106
1616
|
|
|
1107
1617
|
# Get the DataSource 'admissions' from repo 'vfs_v1'
|
|
1108
1618
|
>>> ds = fs.get_data_source('admissions')
|
|
1109
1619
|
>>> ds
|
|
1110
1620
|
DataSource(name=admissions)
|
|
1111
|
-
>>>
|
|
1112
1621
|
"""
|
|
1113
1622
|
argument_validation_params = []
|
|
1114
1623
|
argument_validation_params.append(["name", name, False, (str), True])
|
|
@@ -1117,102 +1626,265 @@ class FeatureStore:
|
|
|
1117
1626
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
1118
1627
|
|
|
1119
1628
|
df = self.__get_data_source_df()
|
|
1120
|
-
df = df[df
|
|
1629
|
+
df = df[(df['name'] == name) &
|
|
1630
|
+
(df['data_domain'] == self.__data_domain)]
|
|
1121
1631
|
|
|
1122
|
-
# Check if a
|
|
1632
|
+
# Check if a data source with that name exists or not. If not, raise error.
|
|
1123
1633
|
if df.shape[0] == 0:
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
msg_code
|
|
1634
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'data_source')
|
|
1635
|
+
if res:
|
|
1636
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1637
|
+
error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
|
|
1638
|
+
self.__data_domain, res)
|
|
1639
|
+
else:
|
|
1640
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1641
|
+
error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
|
|
1642
|
+
self.__data_domain)
|
|
1127
1643
|
raise TeradataMlException(error_msg, msg_code)
|
|
1128
1644
|
|
|
1129
1645
|
return DataSource._from_df(df)
|
|
1130
|
-
|
|
1131
|
-
def
|
|
1646
|
+
|
|
1647
|
+
def get_feature_process(self, object, entity=None, features=None, description=None):
|
|
1132
1648
|
"""
|
|
1133
1649
|
DESCRIPTION:
|
|
1134
|
-
|
|
1135
|
-
not available for any further processing. Set the status as 'active' with
|
|
1136
|
-
"set_features_active()" method.
|
|
1650
|
+
Retrieves the FeatureProcess object.
|
|
1137
1651
|
|
|
1138
1652
|
PARAMETERS:
|
|
1139
|
-
|
|
1653
|
+
object:
|
|
1140
1654
|
Required Argument.
|
|
1141
|
-
Specifies the
|
|
1142
|
-
|
|
1655
|
+
Specifies the source to ingest feature values. It can be one of the following:
|
|
1656
|
+
* teradataml DataFrame
|
|
1657
|
+
* Feature group
|
|
1658
|
+
* Process id
|
|
1659
|
+
Notes:
|
|
1660
|
+
* If "object" is of type teradataml DataFrame, then "entity"
|
|
1661
|
+
and "features" should be provided.
|
|
1662
|
+
* If "object" is of type str, then it is considered as
|
|
1663
|
+
as process id of an existing FeatureProcess and reruns the
|
|
1664
|
+
process. Entity and features are taken from the existing
|
|
1665
|
+
feature process. Hence, the arguments "entity" and "features"
|
|
1666
|
+
are ignored.
|
|
1667
|
+
* If "object" is of type FeatureGroup, then entity and features
|
|
1668
|
+
are taken from the FeatureGroup. Hence, the arguments "entity"
|
|
1669
|
+
and "features" are ignored.
|
|
1670
|
+
Types: DataFrame or FeatureGroup or str
|
|
1671
|
+
|
|
1672
|
+
entity:
|
|
1673
|
+
Optional Argument.
|
|
1674
|
+
Specifies Entity for DataFrame.
|
|
1675
|
+
Notes:
|
|
1676
|
+
* Ignored when "object" is of type FeatureGroup or str.
|
|
1677
|
+
* If a string or list of strings is provided, then "object" should
|
|
1678
|
+
have these columns in it.
|
|
1679
|
+
* If Entity object is provided, then associated columns in Entity
|
|
1680
|
+
object should be present in DataFrame.
|
|
1681
|
+
Types: Entity or str or list of str
|
|
1682
|
+
|
|
1683
|
+
features:
|
|
1684
|
+
Optional Argument.
|
|
1685
|
+
Specifies list of features to be considered in feature process. Feature
|
|
1686
|
+
ingestion takes place only for these features.
|
|
1687
|
+
Note:
|
|
1688
|
+
* Ignored when "object" is of type FeatureGroup or str.
|
|
1689
|
+
Types: Feature or list of Feature or str or list of str.
|
|
1690
|
+
|
|
1691
|
+
description:
|
|
1692
|
+
Optional Argument.
|
|
1693
|
+
Specifies description for the FeatureProcess.
|
|
1694
|
+
Types: str
|
|
1143
1695
|
|
|
1144
1696
|
RETURNS:
|
|
1145
|
-
|
|
1697
|
+
FeatureProcess
|
|
1146
1698
|
|
|
1147
1699
|
RAISES:
|
|
1148
|
-
|
|
1700
|
+
None.
|
|
1149
1701
|
|
|
1150
1702
|
EXAMPLES:
|
|
1151
|
-
>>> from teradataml import
|
|
1152
|
-
# Load the admissions data to Vantage.
|
|
1153
|
-
>>> load_example_data("dataframe", "admissions_train")
|
|
1154
|
-
# Create DataFrame on admissions data.
|
|
1155
|
-
>>> df = DataFrame("admissions_train")
|
|
1156
|
-
>>> df
|
|
1157
|
-
masters gpa stats programming admitted
|
|
1158
|
-
id
|
|
1159
|
-
34 yes 3.85 Advanced Beginner 0
|
|
1160
|
-
32 yes 3.46 Advanced Beginner 0
|
|
1161
|
-
11 no 3.13 Advanced Advanced 1
|
|
1162
|
-
40 yes 3.95 Novice Beginner 0
|
|
1163
|
-
38 yes 2.65 Advanced Beginner 1
|
|
1164
|
-
36 no 3.00 Advanced Novice 0
|
|
1165
|
-
7 yes 2.33 Novice Novice 1
|
|
1166
|
-
26 yes 3.57 Advanced Advanced 1
|
|
1167
|
-
19 yes 1.98 Advanced Advanced 0
|
|
1168
|
-
13 no 4.00 Advanced Novice 1
|
|
1169
|
-
>>>
|
|
1170
|
-
# Create FeatureGroup from DataFrame df.
|
|
1171
|
-
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
1172
|
-
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
1703
|
+
>>> from teradataml import FeatureStore
|
|
1173
1704
|
>>> fs = FeatureStore('vfs_v1')
|
|
1174
|
-
|
|
1705
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1706
|
+
# Setup FeatureStore for this repository.
|
|
1707
|
+
>>> fs.setup()
|
|
1175
1708
|
True
|
|
1176
|
-
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
1177
|
-
>>> fg = fs.get_feature_group('admissions')
|
|
1178
|
-
>>> fg
|
|
1179
|
-
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1180
1709
|
|
|
1181
|
-
#
|
|
1182
|
-
>>>
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
>>>
|
|
1187
|
-
|
|
1188
|
-
|
|
1710
|
+
# Load the admissions data to Vantage.
|
|
1711
|
+
>>> from teradataml import DataFrame, load_example_data
|
|
1712
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1713
|
+
>>> admission_df = DataFrame("admissions_train")
|
|
1714
|
+
|
|
1715
|
+
>>> fp = FeatureProcess(repo='vfs_v1',
|
|
1716
|
+
... data_domain='d1',
|
|
1717
|
+
... object=admission_df,
|
|
1718
|
+
... entity='id',
|
|
1719
|
+
... features=['stats', 'programming', 'admitted'])
|
|
1720
|
+
>>> fp.run()
|
|
1721
|
+
Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' started.
|
|
1722
|
+
Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' completed.
|
|
1723
|
+
|
|
1724
|
+
>>> fs.get_feature_process(object='0d365f08-66b0-11f0-88ff-b0dcef8381ea')
|
|
1725
|
+
FeatureProcess(repo=vfs_v1, data_domain=d1, process_id=0d365f08-66b0-11f0-88ff-b0dcef8381ea)
|
|
1189
1726
|
"""
|
|
1190
|
-
return self.
|
|
1191
|
-
|
|
1192
|
-
|
|
1727
|
+
return FeatureProcess(repo=self.__repo,
|
|
1728
|
+
data_domain=self.__data_domain,
|
|
1729
|
+
object=object,
|
|
1730
|
+
entity=entity,
|
|
1731
|
+
features=features,
|
|
1732
|
+
description=description
|
|
1733
|
+
)
|
|
1734
|
+
|
|
1735
|
+
def get_feature_catalog(self):
|
|
1193
1736
|
"""
|
|
1194
1737
|
DESCRIPTION:
|
|
1195
|
-
|
|
1196
|
-
"set_features_inactive()" method. Note that, inactive features are
|
|
1197
|
-
not available for any further processing.
|
|
1738
|
+
Retrieves FeatureCatalog based on the feature store's repo and data domain.
|
|
1198
1739
|
|
|
1199
1740
|
PARAMETERS:
|
|
1200
|
-
|
|
1201
|
-
Required Argument.
|
|
1202
|
-
Specifies the name(s) of the feature(s).
|
|
1203
|
-
Types: str OR list of str
|
|
1741
|
+
None.
|
|
1204
1742
|
|
|
1205
1743
|
RETURNS:
|
|
1206
|
-
|
|
1744
|
+
FeatureCatalog
|
|
1207
1745
|
|
|
1208
1746
|
RAISES:
|
|
1209
|
-
|
|
1747
|
+
None.
|
|
1210
1748
|
|
|
1211
1749
|
EXAMPLES:
|
|
1212
|
-
>>> from teradataml import
|
|
1213
|
-
#
|
|
1214
|
-
>>>
|
|
1215
|
-
|
|
1750
|
+
>>> from teradataml import FeatureStore
|
|
1751
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1752
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1753
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1754
|
+
# Setup FeatureStore for this repository.
|
|
1755
|
+
>>> fs.setup()
|
|
1756
|
+
True
|
|
1757
|
+
|
|
1758
|
+
# Load the sales data to Vantage.
|
|
1759
|
+
from teradataml import load_example_data
|
|
1760
|
+
>>> load_example_data("dataframe", "sales")
|
|
1761
|
+
>>> df = DataFrame("sales")
|
|
1762
|
+
|
|
1763
|
+
# Create a feature process.
|
|
1764
|
+
>>> from teradataml import FeatureProcess
|
|
1765
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1766
|
+
... data_domain='sales',
|
|
1767
|
+
... object=df,
|
|
1768
|
+
... entity="accounts",
|
|
1769
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1770
|
+
>>> fp.run()
|
|
1771
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1772
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1773
|
+
|
|
1774
|
+
# Get FeatureCatalog from FeatureStore.
|
|
1775
|
+
>>> fs.get_feature_catalog()
|
|
1776
|
+
FeatureCatalog(repo=vfs_v1, data_domain=sales)
|
|
1777
|
+
"""
|
|
1778
|
+
return FeatureCatalog(repo=self.__repo,
|
|
1779
|
+
data_domain=self.__data_domain)
|
|
1780
|
+
|
|
1781
|
+
def get_data_domain(self):
|
|
1782
|
+
"""
|
|
1783
|
+
DESCRIPTION:
|
|
1784
|
+
Retrieves DataDomain based on the feature store's repo and data domain.
|
|
1785
|
+
|
|
1786
|
+
PARAMETERS:
|
|
1787
|
+
None
|
|
1788
|
+
|
|
1789
|
+
RETURNS:
|
|
1790
|
+
DataDomain
|
|
1791
|
+
|
|
1792
|
+
RAISES:
|
|
1793
|
+
None.
|
|
1794
|
+
|
|
1795
|
+
EXAMPLES:
|
|
1796
|
+
>>> from teradataml import FeatureStore
|
|
1797
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1798
|
+
>>> fs = FeatureStore('vfs_v1', data_domain='sales')
|
|
1799
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1800
|
+
# Setup FeatureStore for this repository.
|
|
1801
|
+
>>> fs.setup()
|
|
1802
|
+
|
|
1803
|
+
# Get DataDomain from FeatureStore.
|
|
1804
|
+
>>> fs.get_data_domain()
|
|
1805
|
+
DataDomain(repo=vfs_v1, data_domain=sales)
|
|
1806
|
+
"""
|
|
1807
|
+
return DataDomain(repo=self.__repo,
|
|
1808
|
+
data_domain=self.__data_domain)
|
|
1809
|
+
|
|
1810
|
+
def get_dataset_catalog(self):
|
|
1811
|
+
"""
|
|
1812
|
+
DESCRIPTION:
|
|
1813
|
+
Retrieves DatasetCatalog based on the feature store's repo and data domain.
|
|
1814
|
+
|
|
1815
|
+
PARAMETERS:
|
|
1816
|
+
None.
|
|
1817
|
+
|
|
1818
|
+
RETURNS:
|
|
1819
|
+
DatasetCatalog
|
|
1820
|
+
|
|
1821
|
+
RAISES:
|
|
1822
|
+
None.
|
|
1823
|
+
|
|
1824
|
+
EXAMPLES:
|
|
1825
|
+
>>> from teradataml import FeatureStore
|
|
1826
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1827
|
+
>>> fs = FeatureStore('vfs_v1', data_domain='sales')
|
|
1828
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1829
|
+
# Setup FeatureStore for this repository.
|
|
1830
|
+
>>> fs.setup()
|
|
1831
|
+
|
|
1832
|
+
# Load the sales data to Vantage.
|
|
1833
|
+
>>> from teradataml import load_example_data
|
|
1834
|
+
>>> load_example_data("dataframe", "sales")
|
|
1835
|
+
>>> df = DataFrame("sales")
|
|
1836
|
+
|
|
1837
|
+
# Create a feature process.
|
|
1838
|
+
>>> from teradataml import FeatureProcess
|
|
1839
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1840
|
+
... data_domain='sales',
|
|
1841
|
+
... object=df,
|
|
1842
|
+
... entity="accounts",
|
|
1843
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1844
|
+
>>> fp.run()
|
|
1845
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1846
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1847
|
+
True
|
|
1848
|
+
|
|
1849
|
+
# Build the dataset.
|
|
1850
|
+
>>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
|
|
1851
|
+
>>> dataset = dc.build_dataset(entity='accounts',
|
|
1852
|
+
... selected_features = {
|
|
1853
|
+
... 'Jan': fp.process_id,
|
|
1854
|
+
... 'Feb': fp.process_id},
|
|
1855
|
+
... view_name='ds_jan_feb',
|
|
1856
|
+
... description='Dataset with Jan and Feb features')
|
|
1857
|
+
|
|
1858
|
+
# Get DatasetCatalog from FeatureStore.
|
|
1859
|
+
>>> fs.get_dataset_catalog()
|
|
1860
|
+
DatasetCatalog(repo=vfs_v1, data_domain=sales)
|
|
1861
|
+
"""
|
|
1862
|
+
return DatasetCatalog(repo=self.__repo,
|
|
1863
|
+
data_domain=self.__data_domain)
|
|
1864
|
+
|
|
1865
|
+
def set_features_inactive(self, names):
|
|
1866
|
+
"""
|
|
1867
|
+
DESCRIPTION:
|
|
1868
|
+
Mark the feature status as 'inactive'. Note that, inactive features are
|
|
1869
|
+
not available for any further processing. Set the status as 'active' with
|
|
1870
|
+
"set_features_active()" method.
|
|
1871
|
+
|
|
1872
|
+
PARAMETERS:
|
|
1873
|
+
names:
|
|
1874
|
+
Required Argument.
|
|
1875
|
+
Specifies the name(s) of the feature(s).
|
|
1876
|
+
Types: str OR list of str
|
|
1877
|
+
|
|
1878
|
+
RETURNS:
|
|
1879
|
+
bool
|
|
1880
|
+
|
|
1881
|
+
RAISES:
|
|
1882
|
+
teradataMLException
|
|
1883
|
+
|
|
1884
|
+
EXAMPLES:
|
|
1885
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1886
|
+
# Create DataFrame on admissions data.
|
|
1887
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1216
1888
|
>>> df = DataFrame("admissions_train")
|
|
1217
1889
|
>>> df
|
|
1218
1890
|
masters gpa stats programming admitted
|
|
@@ -1227,25 +1899,103 @@ class FeatureStore:
|
|
|
1227
1899
|
26 yes 3.57 Advanced Advanced 1
|
|
1228
1900
|
19 yes 1.98 Advanced Advanced 0
|
|
1229
1901
|
13 no 4.00 Advanced Novice 1
|
|
1230
|
-
|
|
1902
|
+
|
|
1903
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1904
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1905
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1906
|
+
# Setup FeatureStore for this repository.
|
|
1907
|
+
>>> fs.setup()
|
|
1908
|
+
True
|
|
1909
|
+
|
|
1231
1910
|
# Create FeatureGroup from DataFrame df.
|
|
1232
1911
|
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
1233
1912
|
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
1234
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1235
1913
|
>>> fs.apply(fg)
|
|
1236
1914
|
True
|
|
1915
|
+
|
|
1237
1916
|
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
1238
1917
|
>>> fg = fs.get_feature_group('admissions')
|
|
1239
1918
|
>>> fg
|
|
1240
1919
|
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1920
|
+
|
|
1921
|
+
# Example 1: Set the Feature 'programming' inactive.
|
|
1241
1922
|
# Set the Feature 'programming' inactive.
|
|
1242
1923
|
>>> fs.set_features_inactive('programming')
|
|
1243
1924
|
True
|
|
1925
|
+
|
|
1926
|
+
# Get FeatureGroup again after setting feature inactive.
|
|
1927
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1928
|
+
>>> fg
|
|
1929
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1930
|
+
|
|
1931
|
+
"""
|
|
1932
|
+
return self.__set_active_inactive_features(names, active=False)
|
|
1933
|
+
|
|
1934
|
+
def set_features_active(self, names):
|
|
1935
|
+
"""
|
|
1936
|
+
DESCRIPTION:
|
|
1937
|
+
Mark the feature status as active. Set the status as 'inactive' with
|
|
1938
|
+
"set_features_inactive()" method. Note that, inactive features are
|
|
1939
|
+
not available for any further processing.
|
|
1940
|
+
|
|
1941
|
+
PARAMETERS:
|
|
1942
|
+
names:
|
|
1943
|
+
Required Argument.
|
|
1944
|
+
Specifies the name(s) of the feature(s).
|
|
1945
|
+
Types: str OR list of str
|
|
1946
|
+
|
|
1947
|
+
RETURNS:
|
|
1948
|
+
bool
|
|
1949
|
+
|
|
1950
|
+
RAISES:
|
|
1951
|
+
teradataMLException
|
|
1952
|
+
|
|
1953
|
+
EXAMPLES:
|
|
1954
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1955
|
+
# Create DataFrame on admissions data.
|
|
1956
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1957
|
+
>>> df = DataFrame("admissions_train")
|
|
1958
|
+
>>> df
|
|
1959
|
+
masters gpa stats programming admitted
|
|
1960
|
+
id
|
|
1961
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1962
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1963
|
+
11 no 3.13 Advanced Advanced 1
|
|
1964
|
+
40 yes 3.95 Novice Beginner 0
|
|
1965
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1966
|
+
36 no 3.00 Advanced Novice 0
|
|
1967
|
+
7 yes 2.33 Novice Novice 1
|
|
1968
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1969
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1970
|
+
13 no 4.00 Advanced Novice 1
|
|
1971
|
+
|
|
1972
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1973
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1974
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1975
|
+
# Setup FeatureStore for this repository.
|
|
1976
|
+
>>> fs.setup()
|
|
1977
|
+
True
|
|
1978
|
+
|
|
1979
|
+
# Create FeatureGroup from DataFrame df.
|
|
1980
|
+
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
1981
|
+
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
1982
|
+
>>> fs.apply(fg)
|
|
1983
|
+
True
|
|
1984
|
+
|
|
1985
|
+
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
1986
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1987
|
+
>>> fg
|
|
1988
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1989
|
+
|
|
1990
|
+
# Example 1: Set the Feature 'programming' inactive.
|
|
1991
|
+
# Set the Feature 'programming' inactive.
|
|
1992
|
+
>>> fs.set_features_inactive('programming')
|
|
1993
|
+
True
|
|
1994
|
+
|
|
1244
1995
|
# Get FeatureGroup again after setting feature inactive.
|
|
1245
1996
|
>>> fg = fs.get_feature_group('admissions')
|
|
1246
1997
|
>>> fg
|
|
1247
1998
|
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1248
|
-
>>>
|
|
1249
1999
|
|
|
1250
2000
|
# Mark Feature 'programming' from 'inactive' to 'active'.
|
|
1251
2001
|
>>> fs.set_features_active('programming')
|
|
@@ -1293,17 +2043,53 @@ class FeatureStore:
|
|
|
1293
2043
|
|
|
1294
2044
|
status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
|
|
1295
2045
|
|
|
1296
|
-
|
|
2046
|
+
is_set = True
|
|
2047
|
+
if status == FeatureStatus.INACTIVE.name:
|
|
2048
|
+
# Get the joined df of '_efs_features' and '_efs_features_metadata'.
|
|
2049
|
+
feature_info_df = self.__get_feature_info_df()
|
|
2050
|
+
metadata_features = [feature.name for feature in feature_info_df.itertuples()]
|
|
2051
|
+
|
|
2052
|
+
# Form a list of user provided feature names which are
|
|
2053
|
+
# present in catalog and not present in catalog.
|
|
2054
|
+
catalog_features = []
|
|
2055
|
+
non_catalog_features = []
|
|
2056
|
+
for name in names:
|
|
2057
|
+
if name in metadata_features:
|
|
2058
|
+
catalog_features.append(name)
|
|
2059
|
+
else:
|
|
2060
|
+
non_catalog_features.append(name)
|
|
2061
|
+
|
|
2062
|
+
# If user provided all names are present in catalog.
|
|
2063
|
+
if len(catalog_features) == len(names):
|
|
2064
|
+
print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
|
|
2065
|
+
"to inactive.".format(", ".join(catalog_features)))
|
|
2066
|
+
return False
|
|
2067
|
+
# If some of the user provided features present in catalog.
|
|
2068
|
+
elif len(catalog_features) > 0:
|
|
2069
|
+
print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
|
|
2070
|
+
"to inactive.".format(", ".join(catalog_features)))
|
|
2071
|
+
is_set = is_set and False
|
|
2072
|
+
|
|
2073
|
+
# Assign feature names list which are not present in catalog.
|
|
2074
|
+
names = non_catalog_features
|
|
2075
|
+
|
|
2076
|
+
_update_data(table_name=self.__table_names['feature'],
|
|
1297
2077
|
schema_name=self.__repo,
|
|
1298
2078
|
update_columns_values={"status": status},
|
|
1299
2079
|
update_conditions={"name": names}
|
|
1300
2080
|
)
|
|
1301
|
-
|
|
2081
|
+
|
|
2082
|
+
return is_set
|
|
1302
2083
|
|
|
1303
2084
|
def apply(self, object):
|
|
1304
2085
|
"""
|
|
1305
2086
|
DESCRIPTION:
|
|
1306
2087
|
Register objects to repository.
|
|
2088
|
+
Note:
|
|
2089
|
+
* If the object is an Entity or FeatureGroup and the same entity or feature group is already
|
|
2090
|
+
registered in the repository, it is not updated.
|
|
2091
|
+
* If the entity or feature group is associated with any feature process, an error is raised
|
|
2092
|
+
while modifying these objects.
|
|
1307
2093
|
|
|
1308
2094
|
PARAMETERS:
|
|
1309
2095
|
object:
|
|
@@ -1318,16 +2104,24 @@ class FeatureStore:
|
|
|
1318
2104
|
TeradataMLException
|
|
1319
2105
|
|
|
1320
2106
|
EXAMPLES:
|
|
2107
|
+
>>> from teradataml import FeatureStore, DataFrame, load_example_data
|
|
2108
|
+
# Create DataFrame on sales data.
|
|
1321
2109
|
>>> load_example_data('dataframe', ['sales'])
|
|
1322
2110
|
>>> df = DataFrame("sales")
|
|
1323
2111
|
|
|
2112
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
2113
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
2114
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2115
|
+
# Setup FeatureStore for this repository.
|
|
2116
|
+
>>> fs.setup()
|
|
2117
|
+
True
|
|
2118
|
+
|
|
1324
2119
|
# Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
|
|
1325
2120
|
# and register with repo 'vfs_v1'.
|
|
1326
2121
|
>>> # Create Feature.
|
|
1327
2122
|
>>> from teradataml import Feature
|
|
1328
2123
|
>>> feature = Feature('sales:Feb', df.Feb)
|
|
1329
2124
|
>>> # Register the above Feature with repo.
|
|
1330
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1331
2125
|
>>> fs.apply(feature)
|
|
1332
2126
|
True
|
|
1333
2127
|
>>>
|
|
@@ -1338,7 +2132,6 @@ class FeatureStore:
|
|
|
1338
2132
|
>>> from teradataml import Entity
|
|
1339
2133
|
>>> entity = Entity('sales:accounts', df.accounts)
|
|
1340
2134
|
>>> # Register the above Entity with repo.
|
|
1341
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1342
2135
|
>>> fs.apply(entity)
|
|
1343
2136
|
True
|
|
1344
2137
|
>>>
|
|
@@ -1349,7 +2142,6 @@ class FeatureStore:
|
|
|
1349
2142
|
>>> from teradataml import DataSource
|
|
1350
2143
|
>>> ds = DataSource('Sales_Data', df)
|
|
1351
2144
|
>>> # Register the above DataSource with repo.
|
|
1352
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1353
2145
|
>>> fs.apply(ds)
|
|
1354
2146
|
True
|
|
1355
2147
|
>>>
|
|
@@ -1364,29 +2156,73 @@ class FeatureStore:
|
|
|
1364
2156
|
... entity=entity,
|
|
1365
2157
|
... data_source=data_source)
|
|
1366
2158
|
>>> # Register the above FeatureStore with repo.
|
|
1367
|
-
>>> fs = FeatureStore('vfs_v1')
|
|
1368
2159
|
>>> fs.apply(fg)
|
|
1369
2160
|
True
|
|
1370
|
-
>>>
|
|
1371
2161
|
"""
|
|
1372
2162
|
argument_validation_params = []
|
|
1373
2163
|
argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
|
|
1374
2164
|
|
|
1375
2165
|
# Validate argument types
|
|
1376
2166
|
_Validators._validate_function_arguments(argument_validation_params)
|
|
1377
|
-
return object.publish(self.__repo)
|
|
2167
|
+
return object.publish(self.__repo, self.__data_domain)
|
|
1378
2168
|
|
|
1379
|
-
def
|
|
2169
|
+
def get_data(self, process_id=None, entity=None, features=None,
|
|
2170
|
+
dataset_name=None, as_of=None, include_historic_records=False):
|
|
1380
2171
|
"""
|
|
1381
2172
|
DESCRIPTION:
|
|
1382
|
-
Returns teradataml DataFrame
|
|
2173
|
+
Returns teradataml DataFrame which has entities and feature values.
|
|
2174
|
+
Method generates dataset from following -
|
|
2175
|
+
* process_id
|
|
2176
|
+
* entity and features
|
|
2177
|
+
* dataset_name
|
|
1383
2178
|
|
|
1384
2179
|
PARAMETERS:
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
2180
|
+
process_id:
|
|
2181
|
+
Optional Argument.
|
|
2182
|
+
Either "process_id", "entity" and "features", "dataset_name" is mandatory.
|
|
2183
|
+
Specifies the process id of an existing feature process.
|
|
2184
|
+
Types: str
|
|
2185
|
+
|
|
2186
|
+
entity:
|
|
2187
|
+
Optional Argument.
|
|
2188
|
+
Specifies the name of the Entity or Object of Entity
|
|
2189
|
+
to be considered in the dataset.
|
|
2190
|
+
Types: str or Entity.
|
|
2191
|
+
|
|
2192
|
+
features:
|
|
2193
|
+
Optional Argument.
|
|
2194
|
+
Specifies the names of Features and the corresponding feature version
|
|
2195
|
+
to be included in the dataset.
|
|
2196
|
+
Notes:
|
|
2197
|
+
* Key is the name of the feature and value is the version of the
|
|
2198
|
+
feature.
|
|
2199
|
+
* Look at FeatureCatalog.list_feature_versions() to get the list of
|
|
2200
|
+
features and their versions.
|
|
2201
|
+
Types: dict
|
|
2202
|
+
|
|
2203
|
+
dataset_name:
|
|
2204
|
+
Optional Argument.
|
|
2205
|
+
Specifies the dataset name.
|
|
1388
2206
|
Types: str
|
|
1389
2207
|
|
|
2208
|
+
as_of:
|
|
2209
|
+
Optional Argument.
|
|
2210
|
+
Specifies the time to retrieve the Feature Values instead of
|
|
2211
|
+
retrieving the latest values.
|
|
2212
|
+
Notes:
|
|
2213
|
+
* Applicable only when "process_id" is passed to the function.
|
|
2214
|
+
* Ignored when "dataset_name" is passed.
|
|
2215
|
+
Types: str or datetime.datetime
|
|
2216
|
+
|
|
2217
|
+
include_historic_records:
|
|
2218
|
+
Optional Argument.
|
|
2219
|
+
Specifies whether to include historic data in the dataset.
|
|
2220
|
+
Note:
|
|
2221
|
+
* If "as_of" is specified, then the "include_historic_records" argument is ignored.
|
|
2222
|
+
Default Value: False.
|
|
2223
|
+
Types: bool.
|
|
2224
|
+
|
|
2225
|
+
|
|
1390
2226
|
RETURNS:
|
|
1391
2227
|
teradataml DataFrame.
|
|
1392
2228
|
|
|
@@ -1395,11 +2231,9 @@ class FeatureStore:
|
|
|
1395
2231
|
|
|
1396
2232
|
EXAMPLES:
|
|
1397
2233
|
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
1398
|
-
# Load the sales data to Vantage.
|
|
1399
|
-
>>> load_example_data("dataframe", "sales")
|
|
1400
2234
|
# Create DataFrame on sales data.
|
|
2235
|
+
>>> load_example_data("dataframe", "sales")
|
|
1401
2236
|
>>> df = DataFrame("sales")
|
|
1402
|
-
>>> df
|
|
1403
2237
|
>>> df
|
|
1404
2238
|
Feb Jan Mar Apr datetime
|
|
1405
2239
|
accounts
|
|
@@ -1408,99 +2242,415 @@ class FeatureStore:
|
|
|
1408
2242
|
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
1409
2243
|
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
1410
2244
|
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
>>>
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
2245
|
+
|
|
2246
|
+
>>> repo = 'vfs_v1'
|
|
2247
|
+
>>> data_domain = 'sales'
|
|
2248
|
+
>>> fs = FeatureStore(repo=repo, data_domain=data_domain)
|
|
2249
|
+
FeatureStore is ready to use.
|
|
2250
|
+
|
|
2251
|
+
# Example 1: Get the data from process_id.
|
|
2252
|
+
>>> fp = FeatureProcess(repo=repo,
|
|
2253
|
+
... data_domain=data_domain,
|
|
2254
|
+
... object=df,
|
|
2255
|
+
... entity='accounts',
|
|
2256
|
+
... features=['Jan', 'Feb'])
|
|
2257
|
+
>>> fp.run()
|
|
2258
|
+
Process '1e9e8d64-6851-11f0-99c5-a30631e77953' started.
|
|
2259
|
+
Process '1e9e8d64-6851-11f0-99c5-a30631e77953' completed.
|
|
1418
2260
|
True
|
|
1419
2261
|
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
2262
|
+
>>> fs.get_data(process_id=fp.process_id)
|
|
2263
|
+
accounts Feb Jan
|
|
2264
|
+
0 Alpha Co 210.0 200.0
|
|
2265
|
+
1 Blue Inc 90.0 50.0
|
|
2266
|
+
2 Jones LLC 200.0 150.0
|
|
2267
|
+
3 Orange Inc 210.0 NaN
|
|
2268
|
+
4 Yellow Inc 90.0 NaN
|
|
2269
|
+
5 Red Inc 200.0 150.0
|
|
2270
|
+
|
|
2271
|
+
# Example 2: Get the data from entity and features.
|
|
2272
|
+
>>> fs.get_data(entity='accounts', features={'Jan': fp.process_id})
|
|
2273
|
+
accounts Jan
|
|
2274
|
+
0 Alpha Co 200.0
|
|
2275
|
+
1 Blue Inc 50.0
|
|
2276
|
+
2 Jones LLC 150.0
|
|
2277
|
+
3 Orange Inc NaN
|
|
2278
|
+
4 Yellow Inc NaN
|
|
2279
|
+
5 Red Inc 150.0
|
|
2280
|
+
|
|
2281
|
+
# Example 3: Get the data from dataset name.
|
|
2282
|
+
>>> dc = DatasetCatalog(repo=repo, data_domain=data_domain)
|
|
2283
|
+
>>> dc.build_dataset(entity='accounts',
|
|
2284
|
+
... selected_features={'Jan': fp.process_id,
|
|
2285
|
+
... 'Feb': fp.process_id},
|
|
2286
|
+
... view_name='test_get_data',
|
|
2287
|
+
... description='Dataset with Jan and Feb')
|
|
2288
|
+
>>> fs.get_data(dataset_name='test_get_data')
|
|
2289
|
+
accounts Feb Jan
|
|
2290
|
+
0 Alpha Co 210.0 200.0
|
|
2291
|
+
1 Blue Inc 90.0 50.0
|
|
2292
|
+
2 Jones LLC 200.0 150.0
|
|
2293
|
+
3 Orange Inc 210.0 NaN
|
|
2294
|
+
4 Yellow Inc 90.0 NaN
|
|
2295
|
+
5 Red Inc 200.0 150.0
|
|
2296
|
+
|
|
2297
|
+
|
|
2298
|
+
# Example 4: Get the data from Entity and Features, where entity
|
|
2299
|
+
# object and feature objects passed to the entity and
|
|
2300
|
+
# features arguments.
|
|
2301
|
+
>>> # Create features.
|
|
2302
|
+
>>> feature1 = Feature('sales:Mar',
|
|
2303
|
+
... df.Mar,
|
|
2304
|
+
... feature_type=FeatureType.CATEGORICAL)
|
|
2305
|
+
|
|
2306
|
+
>>> feature2 = Feature('sales:Apr',
|
|
2307
|
+
... df.Apr,
|
|
2308
|
+
... feature_type=FeatureType.CONTINUOUS)
|
|
2309
|
+
|
|
2310
|
+
>>> # Create entity.
|
|
2311
|
+
>>> entity = Entity(name='accounts_entity', columns=['accounts'])
|
|
2312
|
+
|
|
2313
|
+
>>> fp1 = FeatureProcess(repo=repo,
|
|
2314
|
+
... data_domain=data_domain,
|
|
2315
|
+
... object=df,
|
|
2316
|
+
... entity=entity,
|
|
2317
|
+
... features=[feature1, feature2])
|
|
2318
|
+
>>> fp1.run()
|
|
2319
|
+
Process '5522c034-684d-11f0-99c5-a30631e77953' started.
|
|
2320
|
+
Process '5522c034-684d-11f0-99c5-a30631e77953' completed.
|
|
2321
|
+
True
|
|
2322
|
+
|
|
2323
|
+
>>> fs.get_data(entity=entity, features={feature1.name: fp1.process_id,
|
|
2324
|
+
... feature2.name: fp1.process_id})
|
|
2325
|
+
accounts sales:Mar sales:Apr
|
|
2326
|
+
0 Alpha Co 215.0 250.0
|
|
2327
|
+
1 Blue Inc 95.0 101.0
|
|
2328
|
+
2 Jones LLC 140.0 180.0
|
|
2329
|
+
3 Orange Inc NaN 250.0
|
|
2330
|
+
4 Yellow Inc NaN NaN
|
|
2331
|
+
5 Red Inc 140.0 NaN
|
|
2332
|
+
|
|
2333
|
+
# Example 5: Get the data for the time passed by the user via the as_of argument.
|
|
2334
|
+
>>> import time
|
|
2335
|
+
>>> from datetime import datetime as dt, date as d
|
|
2336
|
+
|
|
2337
|
+
# Retrieve the record where accounts == 'Blue Inc'.
|
|
2338
|
+
>>> df_test = df[df['accounts'] == 'Blue Inc']
|
|
2339
|
+
>>> df_test
|
|
2340
|
+
Feb Jan Mar Apr datetime
|
|
1424
2341
|
accounts
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
>>>
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
2342
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
2343
|
+
|
|
2344
|
+
# Example updates the data. Hence, creating a new table to avoid modifying the existing tables data.
|
|
2345
|
+
>>> df_test.to_sql('sales_test', if_exists='replace')
|
|
2346
|
+
>>> test_df = DataFrame('sales_test')
|
|
2347
|
+
>>> test_df
|
|
2348
|
+
accounts Feb Jan Mar Apr datetime
|
|
2349
|
+
0 Blue Inc 90.0 50 95 101 17/01/04
|
|
2350
|
+
|
|
2351
|
+
>>> # Create a feature process.
|
|
2352
|
+
>>> fp = FeatureProcess(repo=repo,
|
|
2353
|
+
... data_domain=data_domain,
|
|
2354
|
+
... object=test_df,
|
|
2355
|
+
... entity='accounts',
|
|
2356
|
+
... features=['Jan', 'Feb'])
|
|
2357
|
+
|
|
2358
|
+
>>> # Run the feature process
|
|
2359
|
+
>>> fp.run()
|
|
2360
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
|
|
2361
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
|
|
2362
|
+
True
|
|
1438
2363
|
|
|
1439
|
-
|
|
1440
|
-
|
|
2364
|
+
>>> # Running the same process more than once to demonstrate how user can
|
|
2365
|
+
>>> # retrieve specific version of Features using argument 'as_of'.
|
|
2366
|
+
>>> # Wait for 20 seconds. Then update the data. Then run again.
|
|
2367
|
+
>>> time.sleep(20)
|
|
2368
|
+
>>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
|
|
2369
|
+
TeradataCursor uRowsHandle=269 bClosed=False
|
|
2370
|
+
|
|
2371
|
+
>>> # Run the feature process again.
|
|
2372
|
+
>>> fp.run()
|
|
2373
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
|
|
2374
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
|
|
2375
|
+
True
|
|
1441
2376
|
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
required_columns = entity_columns + [feature_group.data_source.timestamp_col_name] + columns
|
|
1447
|
-
return df.select(required_columns)
|
|
2377
|
+
>>> # Then again wait for 20 seconds. Then update the data. Then run again.
|
|
2378
|
+
>>> time.sleep(20)
|
|
2379
|
+
>>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
|
|
2380
|
+
TeradataCursor uRowsHandle=397 bClosed=False
|
|
1448
2381
|
|
|
1449
|
-
|
|
2382
|
+
>>> # Run the feature process again.
|
|
2383
|
+
>>> fp.run()
|
|
2384
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
|
|
2385
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
|
|
2386
|
+
True
|
|
2387
|
+
|
|
2388
|
+
# Retrieve specific version of Features at '2025-08-15 12:37:23'
|
|
2389
|
+
>>> as_of_time = dt(2025, 8, 15, 12, 37, 23)
|
|
2390
|
+
|
|
2391
|
+
>>> # time passed to as_of in datetime.datetime format.
|
|
2392
|
+
>>> fs.get_data(process_id=fp.process_id,
|
|
2393
|
+
... as_of=as_of_time)
|
|
2394
|
+
accounts Feb Jan
|
|
2395
|
+
0 Blue Inc 900.0 500
|
|
2396
|
+
|
|
2397
|
+
>>> # time passed to as_of in string format.
|
|
2398
|
+
>>> fs.get_data(process_id=fp.process_id,
|
|
2399
|
+
... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
|
|
2400
|
+
accounts Feb Jan
|
|
2401
|
+
0 Blue Inc 900.0 500
|
|
2402
|
+
|
|
2403
|
+
# Example 6: Get the data for the time passed by the user via the as_of argument
|
|
2404
|
+
# by sourcing entity and features.
|
|
2405
|
+
>>> # time passed to as_of in datetime.datetime format.
|
|
2406
|
+
>>> fs.get_data(entity='accounts',
|
|
2407
|
+
... features={'Feb': fp.process_id,
|
|
2408
|
+
... 'Jan': fp.process_id},
|
|
2409
|
+
... as_of=as_of_time)
|
|
2410
|
+
accounts Feb Jan
|
|
2411
|
+
0 Blue Inc 900.0 500
|
|
2412
|
+
|
|
2413
|
+
>>> # time passed to as_of in string format.
|
|
2414
|
+
>>> fs.get_data(entity='accounts',
|
|
2415
|
+
... features={'Feb': fp.process_id,
|
|
2416
|
+
... 'Jan': fp.process_id},
|
|
2417
|
+
... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
|
|
2418
|
+
accounts Feb Jan
|
|
2419
|
+
0 Blue Inc 900.0 500
|
|
2420
|
+
|
|
2421
|
+
# Example 7: Get the latest data for the given process_id.
|
|
2422
|
+
>>> fs.get_data(process_id=fp.process_id, include_historic_records=False)
|
|
2423
|
+
accounts Feb Jan
|
|
2424
|
+
0 Blue Inc 9000.0 5000
|
|
2425
|
+
|
|
2426
|
+
# Example 8: Get the historic data for the given process_id.
|
|
2427
|
+
>>> fs.get_data(process_id=fp.process_id, include_historic_records=True)
|
|
2428
|
+
accounts Feb Jan
|
|
2429
|
+
0 Blue Inc 9000.0 5000
|
|
2430
|
+
1 Blue Inc 90.0 50
|
|
2431
|
+
2 Blue Inc 90.0 5000
|
|
2432
|
+
3 Blue Inc 900.0 500
|
|
2433
|
+
4 Blue Inc 900.0 5000
|
|
2434
|
+
5 Blue Inc 900.0 50
|
|
2435
|
+
6 Blue Inc 90.0 500
|
|
2436
|
+
7 Blue Inc 9000.0 50
|
|
2437
|
+
8 Blue Inc 9000.0 500
|
|
2438
|
+
|
|
2439
|
+
# Example 9: Get the latest data for the given feature.
|
|
2440
|
+
>>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=False)
|
|
2441
|
+
accounts Feb
|
|
2442
|
+
0 Blue Inc 9000.0
|
|
2443
|
+
|
|
2444
|
+
# Example 10: Get the historic data for the given feature.
|
|
2445
|
+
>>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=True)
|
|
2446
|
+
accounts Feb
|
|
2447
|
+
0 Blue Inc 900.0
|
|
2448
|
+
1 Blue Inc 90.0
|
|
2449
|
+
2 Blue Inc 9000.0
|
|
2450
|
+
|
|
2451
|
+
"""
|
|
2452
|
+
# Validate argument types
|
|
2453
|
+
args = []
|
|
2454
|
+
args.append(["process_id", process_id, True, (str), True])
|
|
2455
|
+
args.append(["entity", entity, True, (Entity, str), True])
|
|
2456
|
+
args.append(["features", features, True, (dict), True])
|
|
2457
|
+
args.append(["dataset_name", dataset_name, True, (str), True])
|
|
2458
|
+
args.append(["as_of", as_of, True, (str, dt), True])
|
|
2459
|
+
args.append(["include_historic_records", include_historic_records, True, (bool)])
|
|
2460
|
+
|
|
2461
|
+
_Validators._validate_function_arguments(args)
|
|
2462
|
+
|
|
2463
|
+
# Validate mutually exclusive arguments.
|
|
2464
|
+
_Validators._validate_mutually_exclusive_argument_groups({"process_id": process_id},
|
|
2465
|
+
{"dataset_name": dataset_name},
|
|
2466
|
+
{"entity": entity, "features": features})
|
|
2467
|
+
|
|
2468
|
+
# Validate whether entity and features are mutually inclusive.
|
|
2469
|
+
_Validators._validate_mutually_inclusive_arguments(entity, "entity",
|
|
2470
|
+
features, "features")
|
|
2471
|
+
|
|
2472
|
+
# Validate at least one argument is passed.
|
|
2473
|
+
_Validators._validate_any_argument_passed({"process_id": process_id,
|
|
2474
|
+
"entity' and 'features": entity,
|
|
2475
|
+
"dataset_name": dataset_name})
|
|
2476
|
+
|
|
2477
|
+
# If user pass view, return DataFrame directly.
|
|
2478
|
+
if dataset_name:
|
|
2479
|
+
return DataFrame(in_schema(self.__repo, dataset_name))
|
|
2480
|
+
|
|
2481
|
+
if process_id:
|
|
2482
|
+
entity, features = (
|
|
2483
|
+
self.__get_entity_and_features_from_process_id(process_id))
|
|
2484
|
+
|
|
2485
|
+
# Genarate the view name.
|
|
2486
|
+
view_name = UtilFuncs._generate_temp_table_name(databasename=self.__repo)
|
|
2487
|
+
|
|
2488
|
+
# When as_of is not None, get all the data instead of only latest.
|
|
2489
|
+
if as_of:
|
|
2490
|
+
include_historic_records = True
|
|
2491
|
+
|
|
2492
|
+
# Create the DatasetCatalog and build dataset on top of it.
|
|
2493
|
+
dc = DatasetCatalog(repo=self.__repo, data_domain=self.__data_domain)
|
|
2494
|
+
dataset = dc._build_dataset(
|
|
2495
|
+
entity, features,
|
|
2496
|
+
include_historic_records=include_historic_records,
|
|
2497
|
+
include_time_series=True if as_of else False,
|
|
2498
|
+
view_name=view_name,
|
|
2499
|
+
temporary=True)
|
|
2500
|
+
|
|
2501
|
+
if as_of:
|
|
2502
|
+
return self.__filter_dataset_by_as_of(dataset, entity, list(features.keys()), as_of)
|
|
2503
|
+
return dataset
|
|
2504
|
+
|
|
2505
|
+
def __get_entity_and_features_from_process_id(self, process_id):
|
|
1450
2506
|
"""
|
|
1451
2507
|
DESCRIPTION:
|
|
1452
|
-
Internal function to get
|
|
1453
|
-
|
|
2508
|
+
Internal function to get entity_columns, feature_columns, and s
|
|
2509
|
+
elected_features using process_id.
|
|
1454
2510
|
|
|
1455
2511
|
PARAMETERS:
|
|
1456
|
-
|
|
2512
|
+
process_id:
|
|
1457
2513
|
Required Argument.
|
|
1458
|
-
Specifies the
|
|
2514
|
+
Specifies the process id of FeatureProcess.
|
|
1459
2515
|
Types: str
|
|
1460
2516
|
|
|
1461
|
-
type_:
|
|
1462
|
-
Required Argument.
|
|
1463
|
-
Specifies the type of the objects stored in feature store.
|
|
1464
|
-
Permitted Values:
|
|
1465
|
-
* feature
|
|
1466
|
-
* data_source
|
|
1467
|
-
* entity
|
|
1468
|
-
Types: str
|
|
1469
|
-
|
|
1470
2517
|
RETURNS:
|
|
1471
|
-
|
|
2518
|
+
entity_id, selected_features
|
|
1472
2519
|
|
|
1473
2520
|
RAISES:
|
|
1474
2521
|
None
|
|
1475
2522
|
|
|
1476
2523
|
EXAMPLES:
|
|
1477
|
-
>>>
|
|
2524
|
+
>>> fs.__get_entity_and_features_from_process_id('123-acd')
|
|
1478
2525
|
"""
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
2526
|
+
feature_ver = self.__get_feature_version()
|
|
2527
|
+
feature_ver = feature_ver[feature_ver["feature_version"] == process_id]
|
|
2528
|
+
|
|
2529
|
+
# Check if a feature with that process id exists or not. If not, raise error.
|
|
2530
|
+
if feature_ver.shape[0] == 0:
|
|
2531
|
+
res = _FSUtils._get_data_domains(self.__repo, process_id, 'feature_version')
|
|
2532
|
+
if res:
|
|
2533
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
2534
|
+
error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
|
|
2535
|
+
self.__data_domain, res)
|
|
2536
|
+
else:
|
|
2537
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
2538
|
+
error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
|
|
2539
|
+
self.__data_domain)
|
|
2540
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1488
2541
|
|
|
1489
|
-
|
|
2542
|
+
selected_features = {}
|
|
2543
|
+
for f_ver in feature_ver.itertuples():
|
|
2544
|
+
entity_id = f_ver.entity_id
|
|
2545
|
+
selected_features[f_ver.feature_name] = process_id
|
|
2546
|
+
return entity_id, selected_features
|
|
2547
|
+
|
|
2548
|
+
def __filter_dataset_by_as_of(self, dataset, entity_column, features_column_list, as_of):
|
|
1490
2549
|
"""
|
|
1491
2550
|
DESCRIPTION:
|
|
1492
|
-
Internal function to
|
|
1493
|
-
|
|
2551
|
+
Internal function to filter the dataset using as_of and
|
|
2552
|
+
return only required columns.
|
|
1494
2553
|
|
|
1495
2554
|
PARAMETERS:
|
|
1496
|
-
|
|
2555
|
+
dataset:
|
|
1497
2556
|
Required Argument.
|
|
1498
|
-
Specifies the
|
|
2557
|
+
Specifies the teradataml DataFrame.
|
|
2558
|
+
Types: teradataml DataFrame
|
|
2559
|
+
|
|
2560
|
+
entity_column:
|
|
2561
|
+
Required Argument.
|
|
2562
|
+
Specifies the column name of entity.
|
|
1499
2563
|
Types: str
|
|
1500
2564
|
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
2565
|
+
features_column_list:
|
|
2566
|
+
Required Argument.
|
|
2567
|
+
Specifies the list of feature columns list.
|
|
2568
|
+
Types: list of str
|
|
2569
|
+
|
|
2570
|
+
as_of:
|
|
2571
|
+
Required Argument.
|
|
2572
|
+
Specifies the time to retrieve the Feature Values instead of
|
|
2573
|
+
retrieving the latest values.
|
|
2574
|
+
Notes:
|
|
2575
|
+
* Applicable only when "process_id" is passed to the function.
|
|
2576
|
+
* Ignored when "dataset_name" is passed.
|
|
2577
|
+
Types: str or datetime.datetime
|
|
2578
|
+
|
|
2579
|
+
RETURNS:
|
|
2580
|
+
teradataml DataFrame
|
|
2581
|
+
|
|
2582
|
+
RAISES:
|
|
2583
|
+
None
|
|
2584
|
+
|
|
2585
|
+
EXAMPLES:
|
|
2586
|
+
>>> load_examples_data("dataframe", "sales")
|
|
2587
|
+
>>> df = DataFrame("sales")
|
|
2588
|
+
>>> fs.__filter_dataset_by_as_of(df, "accounts", ["Jan", "Feb"], datetime.datetime(2025, 1, 1))
|
|
2589
|
+
|
|
2590
|
+
"""
|
|
2591
|
+
conditions = [
|
|
2592
|
+
(dataset[f"{f}_start_time"] <= as_of) & (as_of <= dataset[f"{f}_end_time"])
|
|
2593
|
+
for f in features_column_list
|
|
2594
|
+
]
|
|
2595
|
+
combined_condition = reduce(operator.and_, conditions)
|
|
2596
|
+
required_columns = UtilFuncs._as_list(entity_column) + features_column_list
|
|
2597
|
+
return dataset[combined_condition].select(required_columns)
|
|
2598
|
+
|
|
2599
|
+
def __get_feature_group_names(self, name, type_):
|
|
2600
|
+
"""
|
|
2601
|
+
DESCRIPTION:
|
|
2602
|
+
Internal function to get the associated group names for
|
|
2603
|
+
Feature or DataSource OR Entity.
|
|
2604
|
+
|
|
2605
|
+
PARAMETERS:
|
|
2606
|
+
name:
|
|
2607
|
+
Required Argument.
|
|
2608
|
+
Specifies the name of the Feature or DataSource or Entity.
|
|
2609
|
+
Types: str
|
|
2610
|
+
|
|
2611
|
+
type_:
|
|
2612
|
+
Required Argument.
|
|
2613
|
+
Specifies the type of the objects stored in feature store.
|
|
2614
|
+
Permitted Values:
|
|
2615
|
+
* feature
|
|
2616
|
+
* data_source
|
|
2617
|
+
* entity
|
|
2618
|
+
Types: str
|
|
2619
|
+
|
|
2620
|
+
RETURNS:
|
|
2621
|
+
list
|
|
2622
|
+
|
|
2623
|
+
RAISES:
|
|
2624
|
+
None
|
|
2625
|
+
|
|
2626
|
+
EXAMPLES:
|
|
2627
|
+
>>> self.__get_feature_group_names('admissions', 'data_source')
|
|
2628
|
+
"""
|
|
2629
|
+
if type_ == "feature":
|
|
2630
|
+
df = self.__get_features_df()
|
|
2631
|
+
return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
|
|
2632
|
+
elif type_ == "data_source":
|
|
2633
|
+
df = self.__get_feature_group_df()
|
|
2634
|
+
return [rec.name for rec in df[df.data_source_name == name].itertuples()]
|
|
2635
|
+
elif type_ == "entity":
|
|
2636
|
+
df = self.__get_feature_group_df()
|
|
2637
|
+
return [rec.name for rec in df[df.entity_name == name].itertuples()]
|
|
2638
|
+
|
|
2639
|
+
def __remove_obj(self, name, type_, action="archive"):
|
|
2640
|
+
"""
|
|
2641
|
+
DESCRIPTION:
|
|
2642
|
+
Internal function to get the remove Feature or DataSource OR
|
|
2643
|
+
Entity from repo.
|
|
2644
|
+
|
|
2645
|
+
PARAMETERS:
|
|
2646
|
+
name:
|
|
2647
|
+
Required Argument.
|
|
2648
|
+
Specifies the name of the Feature or DataSource or Entity.
|
|
2649
|
+
Types: str
|
|
2650
|
+
|
|
2651
|
+
type_:
|
|
2652
|
+
Required Argument.
|
|
2653
|
+
Specifies the type of "name".
|
|
1504
2654
|
Types: str
|
|
1505
2655
|
Permitted Values:
|
|
1506
2656
|
* feature
|
|
@@ -1539,6 +2689,9 @@ class FeatureStore:
|
|
|
1539
2689
|
if isinstance(name, _vars[type_]["class"]):
|
|
1540
2690
|
name = name.name
|
|
1541
2691
|
|
|
2692
|
+
# Get the feature info DataFrame.
|
|
2693
|
+
feature_info_df = self.__get_feature_info_df()
|
|
2694
|
+
|
|
1542
2695
|
# Before removing it, check if it is associated with any FeatureGroup.
|
|
1543
2696
|
# If yes, raise error. Applicable only for Archive.
|
|
1544
2697
|
if action == "archive":
|
|
@@ -1550,6 +2703,47 @@ class FeatureStore:
|
|
|
1550
2703
|
raise TeradataMlException(Messages.get_message(
|
|
1551
2704
|
MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
|
|
1552
2705
|
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
2706
|
+
# Check if the feature or entity exists in Feature metadata table.
|
|
2707
|
+
# If yes, then raise error. Applicable only for Archive.
|
|
2708
|
+
info_checks = {
|
|
2709
|
+
'feature': ('name', MessageCodes.EFS_FEATURE_IN_CATALOG),
|
|
2710
|
+
'entity': ('entity_name', MessageCodes.EFS_ENTITY_IN_CATALOG)
|
|
2711
|
+
}
|
|
2712
|
+
if type_ in info_checks:
|
|
2713
|
+
col, error_code = info_checks[type_]
|
|
2714
|
+
validate_df = feature_info_df[feature_info_df[col].isin([name])]
|
|
2715
|
+
if validate_df.shape[0] > 0:
|
|
2716
|
+
if type_ == "entity":
|
|
2717
|
+
related_features = [feature.name for feature in validate_df.itertuples()]
|
|
2718
|
+
features = ", ".join(("'{}'".format(f) for f in related_features))
|
|
2719
|
+
err_msg = Messages.get_message(error_code,
|
|
2720
|
+
name,
|
|
2721
|
+
features)
|
|
2722
|
+
else:
|
|
2723
|
+
err_msg = Messages.get_message(error_code,
|
|
2724
|
+
name)
|
|
2725
|
+
raise TeradataMlException(err_msg, error_code)
|
|
2726
|
+
|
|
2727
|
+
stg_table = _FeatureStoreDFContainer.get_df("{}_staging".format(type_), self.__repo, self.__data_domain)
|
|
2728
|
+
stg_table = stg_table[stg_table.name == name]
|
|
2729
|
+
if stg_table.shape[0] > 0:
|
|
2730
|
+
print("{} '{}' is already archived.".format(c_name_, name))
|
|
2731
|
+
return False
|
|
2732
|
+
|
|
2733
|
+
# Validation for delete action - ensure object is already archived
|
|
2734
|
+
if action == "delete":
|
|
2735
|
+
# Check if object exists in main table (not archived)
|
|
2736
|
+
main_table_name = self.__table_names[type_]
|
|
2737
|
+
main_df = _FeatureStoreDFContainer.get_df(type_, self.__repo, self.__data_domain)
|
|
2738
|
+
existing_records = main_df[(main_df["name"] == name)]
|
|
2739
|
+
|
|
2740
|
+
if existing_records.shape[0] > 0:
|
|
2741
|
+
error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
|
|
2742
|
+
error_msg = Messages.get_message(error_code,
|
|
2743
|
+
c_name_,
|
|
2744
|
+
name,
|
|
2745
|
+
type_)
|
|
2746
|
+
raise TeradataMlException(error_msg, error_code)
|
|
1553
2747
|
|
|
1554
2748
|
if type_ == "entity":
|
|
1555
2749
|
res = self._remove_entity(name, action)
|
|
@@ -1560,7 +2754,8 @@ class FeatureStore:
|
|
|
1560
2754
|
|
|
1561
2755
|
res = _delete_data(table_name=table_name,
|
|
1562
2756
|
schema_name=self.__repo,
|
|
1563
|
-
delete_conditions=(Col("name") == name)
|
|
2757
|
+
delete_conditions=(Col("name") == name) &
|
|
2758
|
+
(Col("data_domain") == self.__data_domain)
|
|
1564
2759
|
)
|
|
1565
2760
|
|
|
1566
2761
|
if res == 1:
|
|
@@ -1607,13 +2802,15 @@ class FeatureStore:
|
|
|
1607
2802
|
# remove it from xref table first.
|
|
1608
2803
|
_delete_data(table_name=ent_table_xref,
|
|
1609
2804
|
schema_name=self.__repo,
|
|
1610
|
-
delete_conditions=(Col("entity_name") == name)
|
|
2805
|
+
delete_conditions=(Col("entity_name") == name) &
|
|
2806
|
+
(Col("data_domain") == self.__data_domain)
|
|
1611
2807
|
)
|
|
1612
2808
|
|
|
1613
2809
|
# remove from entity table.
|
|
1614
2810
|
res = _delete_data(table_name=ent_table,
|
|
1615
2811
|
schema_name=self.__repo,
|
|
1616
|
-
delete_conditions=(Col("name") == name)
|
|
2812
|
+
delete_conditions=(Col("name") == name) &
|
|
2813
|
+
(Col("data_domain") == self.__data_domain)
|
|
1617
2814
|
)
|
|
1618
2815
|
|
|
1619
2816
|
return res
|
|
@@ -1623,7 +2820,7 @@ class FeatureStore:
|
|
|
1623
2820
|
DESCRIPTION:
|
|
1624
2821
|
Archives DataSource from repository. Note that archived DataSource
|
|
1625
2822
|
is not available for any further processing. Archived DataSource can be
|
|
1626
|
-
viewed using "
|
|
2823
|
+
viewed using "list_data_sources(archived=True)" method.
|
|
1627
2824
|
|
|
1628
2825
|
PARAMETERS:
|
|
1629
2826
|
data_source:
|
|
@@ -1639,30 +2836,60 @@ class FeatureStore:
|
|
|
1639
2836
|
TeradataMLException, TypeError, ValueError
|
|
1640
2837
|
|
|
1641
2838
|
EXAMPLES:
|
|
1642
|
-
>>> from teradataml import DataSource, FeatureStore
|
|
1643
|
-
# Create a DataSource using SELECT statement.
|
|
1644
|
-
>>> ds = DataSource(name="sales_data", source="select * from sales")
|
|
2839
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore
|
|
1645
2840
|
# Create FeatureStore for repo 'vfs_v1'.
|
|
1646
2841
|
>>> fs = FeatureStore("vfs_v1")
|
|
2842
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2843
|
+
# Setup FeatureStore for this repository.
|
|
2844
|
+
>>> fs.setup()
|
|
2845
|
+
True
|
|
2846
|
+
|
|
2847
|
+
# Example 1: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource object.
|
|
2848
|
+
# Create a DataSource using SELECT statement.
|
|
2849
|
+
>>> ds = DataSource(name="sales_data", source="select * from sales")
|
|
1647
2850
|
# Apply DataSource to FeatureStore.
|
|
1648
2851
|
>>> fs.apply(ds)
|
|
1649
2852
|
True
|
|
2853
|
+
|
|
1650
2854
|
# List the available DataSources.
|
|
1651
2855
|
>>> fs.list_data_sources()
|
|
1652
|
-
|
|
1653
|
-
name
|
|
1654
|
-
sales_data
|
|
2856
|
+
description timestamp_column source creation_time modified_time
|
|
2857
|
+
name data_domain
|
|
2858
|
+
sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None
|
|
1655
2859
|
|
|
1656
2860
|
# Archive DataSource with name "sales_data".
|
|
1657
2861
|
>>> fs.archive_data_source("sales_data")
|
|
1658
2862
|
DataSource 'sales_data' is archived.
|
|
1659
2863
|
True
|
|
1660
|
-
|
|
2864
|
+
|
|
1661
2865
|
# List the available DataSources after archive.
|
|
1662
|
-
>>> fs.list_data_sources()
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
2866
|
+
>>> fs.list_data_sources(archived=True)
|
|
2867
|
+
name data_domain description timestamp_column source creation_time modified_time archived_time
|
|
2868
|
+
0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
|
|
2869
|
+
|
|
2870
|
+
# Example 2: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource name.
|
|
2871
|
+
# Create a DataSource using teradataml DataFrame.
|
|
2872
|
+
>>> from teradataml import DataFrame
|
|
2873
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
2874
|
+
>>> df = DataFrame("sales")
|
|
2875
|
+
>>> ds2 = DataSource(name="sales_data_df", source=df)
|
|
2876
|
+
|
|
2877
|
+
# Apply DataSource to FeatureStore.
|
|
2878
|
+
>>> fs.apply(ds2)
|
|
2879
|
+
True
|
|
2880
|
+
|
|
2881
|
+
# Archive DataSource with name "sales_data_df".
|
|
2882
|
+
>>> fs.archive_data_source("sales_data_df")
|
|
2883
|
+
DataSource 'sales_data_df' is archived.
|
|
2884
|
+
True
|
|
2885
|
+
|
|
2886
|
+
# List the available DataSources after archive.
|
|
2887
|
+
>>> fs.list_data_sources(archived=True)
|
|
2888
|
+
name data_domain description timestamp_column source creation_time modified_time archived_time
|
|
2889
|
+
0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
|
|
2890
|
+
1 sales_data_df ALICE None None select * from sales 2025-07-28 04:26:10.123456 None 2025-07-28 04:26:45.456789
|
|
2891
|
+
|
|
2892
|
+
|
|
1666
2893
|
"""
|
|
1667
2894
|
return self.__remove_obj(name=data_source, type_="data_source")
|
|
1668
2895
|
|
|
@@ -1686,16 +2913,23 @@ class FeatureStore:
|
|
|
1686
2913
|
|
|
1687
2914
|
EXAMPLES:
|
|
1688
2915
|
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1689
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
1690
2916
|
# Create teradataml DataFrame.
|
|
2917
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1691
2918
|
>>> df = DataFrame("sales")
|
|
2919
|
+
|
|
2920
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
2921
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
2922
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2923
|
+
# Setup FeatureStore for this repository.
|
|
2924
|
+
>>> fs.setup()
|
|
2925
|
+
True
|
|
2926
|
+
|
|
1692
2927
|
# Create DataSource with source as teradataml DataFrame.
|
|
1693
2928
|
>>> ds = DataSource(name="sales_data", source=df)
|
|
1694
|
-
# # Create FeatureStore for repo 'vfs_v1'.
|
|
1695
|
-
>>> fs = FeatureStore("vfs_v1")
|
|
1696
2929
|
# Apply the DataSource to FeatureStore.
|
|
1697
2930
|
>>> fs.apply(ds)
|
|
1698
2931
|
True
|
|
2932
|
+
|
|
1699
2933
|
# Let's first archive the DataSource.
|
|
1700
2934
|
>>> fs.archive_data_source("sales_data")
|
|
1701
2935
|
DataSource 'sales_data' is archived.
|
|
@@ -1705,7 +2939,12 @@ class FeatureStore:
|
|
|
1705
2939
|
>>> fs.delete_data_source("sales_data")
|
|
1706
2940
|
DataSource 'sales_data' is deleted.
|
|
1707
2941
|
True
|
|
1708
|
-
|
|
2942
|
+
|
|
2943
|
+
# List the available DataSources after delete.
|
|
2944
|
+
>>> fs.list_data_sources()
|
|
2945
|
+
Empty DataFrame
|
|
2946
|
+
Columns: [description, timestamp_column, source, creation_time, modified_time]
|
|
2947
|
+
Index: []
|
|
1709
2948
|
"""
|
|
1710
2949
|
return self.__remove_obj(name=data_source, type_="data_source", action="delete")
|
|
1711
2950
|
|
|
@@ -1714,7 +2953,7 @@ class FeatureStore:
|
|
|
1714
2953
|
DESCRIPTION:
|
|
1715
2954
|
Archives Feature from repository. Note that archived Feature
|
|
1716
2955
|
is not available for any further processing. Archived Feature can be
|
|
1717
|
-
viewed using "
|
|
2956
|
+
viewed using "list_features(archived=True)" method.
|
|
1718
2957
|
|
|
1719
2958
|
PARAMETERS:
|
|
1720
2959
|
feature:
|
|
@@ -1731,36 +2970,62 @@ class FeatureStore:
|
|
|
1731
2970
|
|
|
1732
2971
|
EXAMPLES:
|
|
1733
2972
|
>>> from teradataml import DataFrame, Feature, FeatureStore
|
|
1734
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
1735
2973
|
# Create teradataml DataFrame.
|
|
2974
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1736
2975
|
>>> df = DataFrame("sales")
|
|
2976
|
+
|
|
2977
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
2978
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
2979
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2980
|
+
# Setup FeatureStore for this repository.
|
|
2981
|
+
>>> fs.setup()
|
|
2982
|
+
True
|
|
2983
|
+
|
|
2984
|
+
Example 1: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
|
|
1737
2985
|
# Create Feature for Column 'Feb'.
|
|
1738
2986
|
>>> feature = Feature(name="sales_data_Feb", column=df.Feb)
|
|
1739
|
-
# Create FeatureStore for the repo 'staging_repo'.
|
|
1740
|
-
>>> fs = FeatureStore("staging_repo")
|
|
1741
2987
|
# Apply the Feature to FeatureStore.
|
|
1742
2988
|
>>> fs.apply(feature)
|
|
1743
2989
|
True
|
|
2990
|
+
|
|
1744
2991
|
# List the available Features.
|
|
1745
2992
|
>>> fs.list_features()
|
|
1746
|
-
|
|
1747
|
-
name
|
|
1748
|
-
sales_data_Feb Feb None
|
|
2993
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
2994
|
+
name data_domain
|
|
2995
|
+
sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None None
|
|
1749
2996
|
|
|
1750
2997
|
# Archive Feature with name "sales_data_Feb".
|
|
1751
2998
|
>>> fs.archive_feature(feature=feature)
|
|
1752
2999
|
Feature 'sales_data_Feb' is archived.
|
|
1753
3000
|
True
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
3001
|
+
|
|
3002
|
+
# List the available archived Features.
|
|
3003
|
+
>>> fs.list_features(archived=True)
|
|
3004
|
+
id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
|
|
3005
|
+
0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
|
|
3006
|
+
|
|
3007
|
+
# Example 2: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
|
|
3008
|
+
# Create Feature for Column 'Jan'.
|
|
3009
|
+
>>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
|
|
3010
|
+
# Apply the Feature to FeatureStore.
|
|
3011
|
+
>>> fs.apply(feature2)
|
|
3012
|
+
True
|
|
3013
|
+
|
|
3014
|
+
# Archive Feature with name "sales_data_Jan".
|
|
3015
|
+
>>> fs.archive_feature(feature="sales_data_Jan")
|
|
3016
|
+
Feature 'sales_data_Jan' is archived.
|
|
3017
|
+
True
|
|
3018
|
+
|
|
3019
|
+
# List the available archived Features.
|
|
3020
|
+
>>> fs.list_features(archived=True)
|
|
3021
|
+
id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
|
|
3022
|
+
0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
|
|
3023
|
+
1 2 sales_data_Jan ALICE Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:42:01.641026 None 2025-07-28 04:43:35.600000 None
|
|
3024
|
+
|
|
1760
3025
|
"""
|
|
1761
3026
|
return self.__remove_obj(name=feature, type_="feature")
|
|
1762
3027
|
|
|
1763
|
-
def delete(self):
|
|
3028
|
+
def delete(self, force=False):
|
|
1764
3029
|
"""
|
|
1765
3030
|
DESCRIPTION:
|
|
1766
3031
|
Removes the FeatureStore and its components from repository.
|
|
@@ -1777,7 +3042,15 @@ class FeatureStore:
|
|
|
1777
3042
|
to not use this function.
|
|
1778
3043
|
|
|
1779
3044
|
PARAMETERS:
|
|
1780
|
-
|
|
3045
|
+
force:
|
|
3046
|
+
Optional Argument.
|
|
3047
|
+
Specifies whether to forcefully delete feature store or not.
|
|
3048
|
+
When set to True, delete() method proceeds to drop objects
|
|
3049
|
+
even if previous step is errored. Otherwise, delete() method
|
|
3050
|
+
raises the exception at the first error and do not proceed to
|
|
3051
|
+
remove other objects.
|
|
3052
|
+
Defaults: False
|
|
3053
|
+
Types: bool
|
|
1781
3054
|
|
|
1782
3055
|
RETURNS:
|
|
1783
3056
|
bool.
|
|
@@ -1789,23 +3062,36 @@ class FeatureStore:
|
|
|
1789
3062
|
# Setup FeatureStore for repo 'vfs_v1'.
|
|
1790
3063
|
>>> from teradataml import FeatureStore
|
|
1791
3064
|
>>> fs = FeatureStore("vfs_v1")
|
|
3065
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3066
|
+
|
|
3067
|
+
# Setup FeatureStore.
|
|
1792
3068
|
>>> fs.setup()
|
|
1793
3069
|
True
|
|
1794
|
-
|
|
3070
|
+
|
|
3071
|
+
# Delete the FeatureStore and all its components.
|
|
1795
3072
|
>>> fs.delete()
|
|
3073
|
+
The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
|
|
3074
|
+
True
|
|
3075
|
+
|
|
3076
|
+
# Forcefully delete the FeatureStore and all its components.
|
|
3077
|
+
>>> fs.delete(force=True)
|
|
3078
|
+
The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
|
|
1796
3079
|
True
|
|
1797
|
-
>>>
|
|
1798
3080
|
"""
|
|
3081
|
+
_args = []
|
|
3082
|
+
_args.append(["force", force, True, (bool)])
|
|
3083
|
+
# Validate argument types
|
|
3084
|
+
_Validators._validate_function_arguments(_args)
|
|
3085
|
+
|
|
1799
3086
|
confirmation = input("The function removes Feature Store and drops the "
|
|
1800
3087
|
"corresponding repo also. Are you sure you want to proceed? (Y/N): ")
|
|
1801
3088
|
|
|
1802
3089
|
if confirmation in ["Y", "y"]:
|
|
1803
|
-
return self.__drop_feature_store_objects(
|
|
3090
|
+
return self.__drop_feature_store_objects(force=force)
|
|
1804
3091
|
|
|
1805
3092
|
return False
|
|
1806
3093
|
|
|
1807
|
-
|
|
1808
|
-
def __drop_feature_store_objects(repo_name):
|
|
3094
|
+
def __drop_feature_store_objects(self, force=False):
|
|
1809
3095
|
"""
|
|
1810
3096
|
DESCRIPTION:
|
|
1811
3097
|
Removes the FeatureStore and it's components from repository.
|
|
@@ -1816,37 +3102,77 @@ class FeatureStore:
|
|
|
1816
3102
|
Specifies the name of the repository.
|
|
1817
3103
|
Types: str
|
|
1818
3104
|
|
|
3105
|
+
force:
|
|
3106
|
+
Optional Argument.
|
|
3107
|
+
Specifies whether to forcefully delete feature store or not.
|
|
3108
|
+
When set to True, delete() method proceeds to drop objects
|
|
3109
|
+
even if previous step is errored. Otherwise, delete() method
|
|
3110
|
+
raises the exception at the first error and do not proceed to
|
|
3111
|
+
remove other objects.
|
|
3112
|
+
Defaults: False.
|
|
3113
|
+
Types: bool
|
|
3114
|
+
|
|
1819
3115
|
RETURNS:
|
|
1820
3116
|
bool
|
|
1821
3117
|
"""
|
|
1822
3118
|
# Drop all the tables and staging tables.
|
|
1823
3119
|
tables_ = [
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
3120
|
+
self.__table_names["group_features"],
|
|
3121
|
+
self.__table_names["feature_group"],
|
|
3122
|
+
self.__table_names['feature'],
|
|
3123
|
+
self.__table_names['entity_xref'],
|
|
3124
|
+
self.__table_names['entity'],
|
|
3125
|
+
self.__table_names['data_source'],
|
|
3126
|
+
self.__table_names['feature_process'],
|
|
3127
|
+
self.__table_names['feature_runs'],
|
|
3128
|
+
self.__table_names['feature_metadata'],
|
|
3129
|
+
self.__table_names['dataset_catalog'],
|
|
3130
|
+
self.__table_names['dataset_features'],
|
|
3131
|
+
self.__table_names['data_domain'],
|
|
3132
|
+
self.__table_names['version']
|
|
1830
3133
|
]
|
|
1831
3134
|
|
|
1832
3135
|
tables_stg_ = [
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
3136
|
+
self.__table_names['feature_staging'],
|
|
3137
|
+
self.__table_names["entity_staging"],
|
|
3138
|
+
self.__table_names["entity_staging_xref"],
|
|
3139
|
+
self.__table_names["data_source_staging"],
|
|
3140
|
+
self.__table_names["feature_group_staging"],
|
|
3141
|
+
self.__table_names["group_features_staging"]
|
|
1839
3142
|
]
|
|
1840
3143
|
|
|
1841
3144
|
# Drop all the triggers first. So that tables can be dropped.
|
|
1842
|
-
|
|
1843
|
-
for trigger in
|
|
1844
|
-
execute_sql("drop trigger {}.{}".format(
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
3145
|
+
ignr_errors = 'all' if force else None
|
|
3146
|
+
for trigger in EFS_TRIGGERS.values():
|
|
3147
|
+
execute_sql("drop trigger {}.{}".format(self.__repo, trigger),
|
|
3148
|
+
ignore_errors=ignr_errors)
|
|
3149
|
+
|
|
3150
|
+
# Drop the views first.
|
|
3151
|
+
views_ = [EFS_DB_COMPONENTS['feature_version']]
|
|
3152
|
+
for view in views_:
|
|
3153
|
+
db_drop_view(view, schema_name=self.__repo, suppress_error=force)
|
|
3154
|
+
|
|
3155
|
+
# Drop datesets.
|
|
3156
|
+
# Used EFS_DB_COMPONENTS['dataset_catalog'] because it contains all the datasets.
|
|
3157
|
+
# The get_df methods are filtered by data_domain, hence they don't show all datasets.
|
|
3158
|
+
for dataset in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['dataset_catalog'])).itertuples():
|
|
3159
|
+
db_drop_view(dataset.name, schema_name=self.__repo, suppress_error=force)
|
|
3160
|
+
|
|
3161
|
+
# Drop all the Feature tables.
|
|
3162
|
+
dropped_tab = set()
|
|
3163
|
+
# Used EFS_DB_COMPONENTS['feature_metadata'] because it contains all the features.
|
|
3164
|
+
# The get_df methods are filtered by data_domain, hence they don't show all features.
|
|
3165
|
+
for rec in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['feature_metadata'])).itertuples():
|
|
3166
|
+
# Avoid dropping the same table again.
|
|
3167
|
+
dropped_tab.add(rec.table_name)
|
|
3168
|
+
|
|
3169
|
+
for table in dropped_tab:
|
|
3170
|
+
db_drop_table(table, schema_name=self.__repo, suppress_error=force)
|
|
3171
|
+
|
|
3172
|
+
for table in (tables_ + tables_stg_):
|
|
3173
|
+
db_drop_table(table, schema_name=self.__repo, suppress_error=force)
|
|
3174
|
+
|
|
3175
|
+
execute_sql(f"DROP DATABASE {self.__repo}")
|
|
1850
3176
|
|
|
1851
3177
|
return True
|
|
1852
3178
|
|
|
@@ -1870,16 +3196,30 @@ class FeatureStore:
|
|
|
1870
3196
|
|
|
1871
3197
|
EXAMPLES:
|
|
1872
3198
|
>>> from teradataml import DataFrame, Feature, FeatureStore
|
|
1873
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
1874
3199
|
# Create teradataml DataFrame.
|
|
3200
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1875
3201
|
>>> df = DataFrame("sales")
|
|
3202
|
+
|
|
3203
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3204
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3205
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3206
|
+
# Setup FeatureStore for this repository.
|
|
3207
|
+
>>> fs.setup()
|
|
3208
|
+
True
|
|
3209
|
+
|
|
3210
|
+
# Example 1: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
|
|
1876
3211
|
# Create Feature for Column 'Feb'.
|
|
1877
3212
|
>>> feature = Feature(name="sales_data_Feb", column=df.Feb)
|
|
1878
|
-
# Create a feature store with name "staging_repo".
|
|
1879
|
-
>>> fs = FeatureStore("staging_repo")
|
|
1880
3213
|
# Add the feature created above in the feature store.
|
|
1881
3214
|
>>> fs.apply(feature)
|
|
1882
3215
|
True
|
|
3216
|
+
|
|
3217
|
+
# List the available Features.
|
|
3218
|
+
>>> fs.list_features()
|
|
3219
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
3220
|
+
name data_domain
|
|
3221
|
+
sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:49:55.827391 None None
|
|
3222
|
+
|
|
1883
3223
|
# Let's first archive the Feature.
|
|
1884
3224
|
>>> fs.archive_feature(feature=feature)
|
|
1885
3225
|
Feature 'sales_data_Feb' is archived.
|
|
@@ -1889,7 +3229,35 @@ class FeatureStore:
|
|
|
1889
3229
|
>>> fs.delete_feature(feature=feature)
|
|
1890
3230
|
Feature 'sales_data_Feb' is deleted.
|
|
1891
3231
|
True
|
|
1892
|
-
|
|
3232
|
+
|
|
3233
|
+
# List the available Features after delete.
|
|
3234
|
+
>>> fs.list_features()
|
|
3235
|
+
Empty DataFrame
|
|
3236
|
+
Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
|
|
3237
|
+
Index: []
|
|
3238
|
+
|
|
3239
|
+
Example 2: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
|
|
3240
|
+
# Create Feature for Column 'Jan'.
|
|
3241
|
+
>>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
|
|
3242
|
+
# Add the feature created above in the feature store.
|
|
3243
|
+
>>> fs.apply(feature2)
|
|
3244
|
+
True
|
|
3245
|
+
|
|
3246
|
+
# List the available Features.
|
|
3247
|
+
>>> fs.list_features()
|
|
3248
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
3249
|
+
name data_domain
|
|
3250
|
+
sales_data_Jan ALICE 2 Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:50:55.827391 None None
|
|
3251
|
+
|
|
3252
|
+
# Let's first archive the Feature using feature name.
|
|
3253
|
+
>>> fs.archive_feature(feature="sales_data_Jan")
|
|
3254
|
+
Feature 'sales_data_Jan' is archived.
|
|
3255
|
+
True
|
|
3256
|
+
|
|
3257
|
+
# Delete Feature with name "sales_data_Jan".
|
|
3258
|
+
>>> fs.delete_feature(feature="sales_data_Jan")
|
|
3259
|
+
Feature 'sales_data_Jan' is deleted.
|
|
3260
|
+
True
|
|
1893
3261
|
"""
|
|
1894
3262
|
return self.__remove_obj(name=feature, type_="feature", action="delete")
|
|
1895
3263
|
|
|
@@ -1898,7 +3266,7 @@ class FeatureStore:
|
|
|
1898
3266
|
DESCRIPTION:
|
|
1899
3267
|
Archives Entity from repository. Note that archived Entity
|
|
1900
3268
|
is not available for any further processing. Archived Entity can be
|
|
1901
|
-
viewed using "
|
|
3269
|
+
viewed using "list_entities(archived=True)" method.
|
|
1902
3270
|
|
|
1903
3271
|
PARAMETERS:
|
|
1904
3272
|
entity:
|
|
@@ -1915,31 +3283,58 @@ class FeatureStore:
|
|
|
1915
3283
|
|
|
1916
3284
|
EXAMPLES:
|
|
1917
3285
|
>>> from teradataml import DataFrame, Entity, FeatureStore
|
|
1918
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
1919
3286
|
# Create teradataml DataFrame.
|
|
3287
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1920
3288
|
>>> df = DataFrame("sales")
|
|
3289
|
+
|
|
3290
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3291
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3292
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3293
|
+
# Setup FeatureStore for this repository.
|
|
3294
|
+
>>> fs.setup()
|
|
3295
|
+
True
|
|
3296
|
+
|
|
3297
|
+
# Example 1: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
|
|
1921
3298
|
# Create Entity using teradataml DataFrame Column.
|
|
1922
3299
|
>>> entity = Entity(name="sales_data", columns=df.accounts)
|
|
1923
|
-
# Create FeatureStore for repo 'staging_repo'.
|
|
1924
|
-
>>> fs = FeatureStore("staging_repo")
|
|
1925
3300
|
# Apply the entity to FeatureStore.
|
|
1926
3301
|
>>> fs.apply(entity)
|
|
1927
3302
|
True
|
|
3303
|
+
|
|
1928
3304
|
# List all the available entities.
|
|
1929
3305
|
>>> fs.list_entities()
|
|
1930
|
-
|
|
1931
|
-
name
|
|
1932
|
-
sales_data accounts
|
|
3306
|
+
description creation_time modified_time entity_column
|
|
3307
|
+
name data_domain
|
|
3308
|
+
sales_data ALICE None 2025-07-28 04:54:34.687139 None accounts
|
|
1933
3309
|
|
|
1934
3310
|
# Archive Entity with name "sales_data".
|
|
1935
3311
|
>>> fs.archive_entity(entity=entity.name)
|
|
1936
3312
|
Entity 'sales_data' is archived.
|
|
1937
3313
|
True
|
|
3314
|
+
|
|
1938
3315
|
# List the entities after archive.
|
|
1939
|
-
>>> fs.list_entities()
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
3316
|
+
>>> fs.list_entities(archived=True)
|
|
3317
|
+
name data_domain description creation_time modified_time archived_time entity_column
|
|
3318
|
+
0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
|
|
3319
|
+
|
|
3320
|
+
# Example 2: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
|
|
3321
|
+
# Create Entity using teradataml DataFrame Column.
|
|
3322
|
+
>>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
|
|
3323
|
+
# Apply the entity to FeatureStore.
|
|
3324
|
+
>>> fs.apply(entity2)
|
|
3325
|
+
True
|
|
3326
|
+
|
|
3327
|
+
# Archive Entity with Entity object.
|
|
3328
|
+
>>> fs.archive_entity(entity=entity2)
|
|
3329
|
+
Entity 'sales_data_df' is archived.
|
|
3330
|
+
True
|
|
3331
|
+
|
|
3332
|
+
# List the entities after archive.
|
|
3333
|
+
>>> fs.list_entities(archived=True)
|
|
3334
|
+
name data_domain description creation_time modified_time archived_time entity_column
|
|
3335
|
+
0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
|
|
3336
|
+
1 sales_data_df ALICE None 2025-07-28 04:56:01.123456 None 2025-07-28 04:57:35.456789 accounts
|
|
3337
|
+
|
|
1943
3338
|
"""
|
|
1944
3339
|
return self.__remove_obj(name=entity, type_="entity")
|
|
1945
3340
|
|
|
@@ -1963,16 +3358,30 @@ class FeatureStore:
|
|
|
1963
3358
|
|
|
1964
3359
|
EXAMPLES:
|
|
1965
3360
|
>>> from teradataml import DataFrame, Entity, FeatureStore
|
|
1966
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
1967
3361
|
# Create teradataml DataFrame.
|
|
3362
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1968
3363
|
>>> df = DataFrame("sales")
|
|
3364
|
+
|
|
3365
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3366
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3367
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3368
|
+
# Setup FeatureStore for this repository.
|
|
3369
|
+
>>> fs.setup()
|
|
3370
|
+
True
|
|
3371
|
+
|
|
3372
|
+
# Example 1: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
|
|
1969
3373
|
# Create Entity using teradataml DataFrame Column.
|
|
1970
3374
|
>>> entity = Entity(name="sales_data", columns=df.accounts)
|
|
1971
|
-
# Create FeatureStore for repo 'staging_repo'.
|
|
1972
|
-
>>> fs = FeatureStore("staging_repo")
|
|
1973
3375
|
# Apply the entity to FeatureStore.
|
|
1974
3376
|
>>> fs.apply(entity)
|
|
1975
3377
|
True
|
|
3378
|
+
|
|
3379
|
+
# List all the available entities.
|
|
3380
|
+
>>> fs.list_entities()
|
|
3381
|
+
description creation_time modified_time entity_column
|
|
3382
|
+
name data_domain
|
|
3383
|
+
sales_data ALICE None 2025-07-28 04:58:01.123456 None accounts
|
|
3384
|
+
|
|
1976
3385
|
# Let's first archive the entity.
|
|
1977
3386
|
>>> fs.archive_entity(entity=entity.name)
|
|
1978
3387
|
Entity 'sales_data' is archived.
|
|
@@ -1982,7 +3391,35 @@ class FeatureStore:
|
|
|
1982
3391
|
>>> fs.delete_entity(entity=entity.name)
|
|
1983
3392
|
Entity 'sales_data' is deleted.
|
|
1984
3393
|
True
|
|
1985
|
-
|
|
3394
|
+
|
|
3395
|
+
# List the entities after delete.
|
|
3396
|
+
>>> fs.list_entities()
|
|
3397
|
+
Empty DataFrame
|
|
3398
|
+
Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
|
|
3399
|
+
Index: []
|
|
3400
|
+
|
|
3401
|
+
Example 2: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
|
|
3402
|
+
# Create Entity using teradataml DataFrame Column.
|
|
3403
|
+
>>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
|
|
3404
|
+
# Apply the entity to FeatureStore.
|
|
3405
|
+
>>> fs.apply(entity2)
|
|
3406
|
+
True
|
|
3407
|
+
|
|
3408
|
+
# List all the available entities.
|
|
3409
|
+
>>> fs.list_entities()
|
|
3410
|
+
description creation_time modified_time entity_column
|
|
3411
|
+
name data_domain
|
|
3412
|
+
sales_data_df ALICE None 2025-07-28 04:59:14.325456 None accounts
|
|
3413
|
+
|
|
3414
|
+
# Let's first archive the entity.
|
|
3415
|
+
>>> fs.archive_entity(entity=entity2)
|
|
3416
|
+
Entity 'sales_data_df' is archived.
|
|
3417
|
+
True
|
|
3418
|
+
|
|
3419
|
+
# Delete Entity with Entity object.
|
|
3420
|
+
>>> fs.delete_entity(entity=entity2)
|
|
3421
|
+
Entity 'sales_data_df' is deleted.
|
|
3422
|
+
True
|
|
1986
3423
|
"""
|
|
1987
3424
|
return self.__remove_obj(name=entity, type_="entity", action="delete")
|
|
1988
3425
|
|
|
@@ -1993,7 +3430,7 @@ class FeatureStore:
|
|
|
1993
3430
|
col_expr = Col("name") == features[0]
|
|
1994
3431
|
for feature in features[1:]:
|
|
1995
3432
|
col_expr = ((col_expr) | (Col("name") == feature))
|
|
1996
|
-
|
|
3433
|
+
col_expr = col_expr & (Col("data_domain") == self.__data_domain)
|
|
1997
3434
|
return col_expr
|
|
1998
3435
|
|
|
1999
3436
|
def archive_feature_group(self, feature_group):
|
|
@@ -2001,7 +3438,7 @@ class FeatureStore:
|
|
|
2001
3438
|
DESCRIPTION:
|
|
2002
3439
|
Archives FeatureGroup from repository. Note that archived FeatureGroup
|
|
2003
3440
|
is not available for any further processing. Archived FeatureGroup can be
|
|
2004
|
-
viewed using "
|
|
3441
|
+
viewed using "list_feature_groups(archived=True)" method.
|
|
2005
3442
|
Note:
|
|
2006
3443
|
The function archives the associated Features, Entity and DataSource
|
|
2007
3444
|
if they are not associated with any other FeatureGroups.
|
|
@@ -2021,32 +3458,57 @@ class FeatureStore:
|
|
|
2021
3458
|
|
|
2022
3459
|
EXAMPLES:
|
|
2023
3460
|
>>> from teradataml import DataFrame, FeatureGroup, FeatureStore
|
|
2024
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
2025
3461
|
# Create teradataml DataFrame.
|
|
3462
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
2026
3463
|
>>> df = DataFrame("sales")
|
|
3464
|
+
|
|
3465
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3466
|
+
>>> fs = FeatureStore("vfs_v1", data_domain="d1")
|
|
3467
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3468
|
+
# Setup FeatureStore for this repository.
|
|
3469
|
+
>>> fs.setup()
|
|
3470
|
+
True
|
|
3471
|
+
|
|
3472
|
+
# Example 1: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
|
|
2027
3473
|
# Create FeatureGroup from teradataml DataFrame.
|
|
2028
|
-
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df,
|
|
2029
|
-
# Create FeatureStore for the repo 'staging_repo'.
|
|
2030
|
-
>>> fs = FeatureStore("staging_repo")
|
|
3474
|
+
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
2031
3475
|
# Apply FeatureGroup to FeatureStore.
|
|
2032
3476
|
>>> fs.apply(fg)
|
|
2033
3477
|
True
|
|
3478
|
+
|
|
2034
3479
|
# List all the available FeatureGroups.
|
|
2035
3480
|
>>> fs.list_feature_groups()
|
|
2036
|
-
|
|
2037
|
-
name
|
|
2038
|
-
sales
|
|
3481
|
+
description data_source_name entity_name creation_time modified_time
|
|
3482
|
+
name data_domain
|
|
3483
|
+
sales d1 None sales sales 2025-07-28 05:00:19.780453 None
|
|
2039
3484
|
|
|
2040
3485
|
# Archive FeatureGroup with name "sales".
|
|
2041
3486
|
>>> fs.archive_feature_group(feature_group='sales')
|
|
2042
3487
|
FeatureGroup 'sales' is archived.
|
|
2043
3488
|
True
|
|
2044
|
-
|
|
3489
|
+
|
|
2045
3490
|
# List all the available FeatureGroups after archive.
|
|
2046
|
-
>>> fs.list_feature_groups()
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
3491
|
+
>>> fs.list_feature_groups(archived=True)
|
|
3492
|
+
name data_domain description data_source_name entity_name creation_time modified_time archived_time
|
|
3493
|
+
0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
|
|
3494
|
+
|
|
3495
|
+
# Example 2: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
|
|
3496
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
3497
|
+
>>> fg2 = FeatureGroup.from_DataFrame(name="sales_df", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
3498
|
+
# Apply FeatureGroup to FeatureStore.
|
|
3499
|
+
>>> fs.apply(fg2)
|
|
3500
|
+
True
|
|
3501
|
+
|
|
3502
|
+
# Archive FeatureGroup with FeatureGroup object.
|
|
3503
|
+
>>> fs.archive_feature_group(feature_group=fg2)
|
|
3504
|
+
FeatureGroup 'sales_df' is archived.
|
|
3505
|
+
True
|
|
3506
|
+
|
|
3507
|
+
# List all the available FeatureGroups after archive.
|
|
3508
|
+
>>> fs.list_feature_groups(archived=True)
|
|
3509
|
+
name data_domain description data_source_name entity_name creation_time modified_time archived_time
|
|
3510
|
+
0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
|
|
3511
|
+
1 sales_df d1 None sales sales 2025-07-28 05:02:01.123456 None 2025-07-28 05:03:35.456789
|
|
2050
3512
|
"""
|
|
2051
3513
|
argument_validation_params = []
|
|
2052
3514
|
argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
|
|
@@ -2056,6 +3518,29 @@ class FeatureStore:
|
|
|
2056
3518
|
|
|
2057
3519
|
feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
|
|
2058
3520
|
|
|
3521
|
+
stg_table = _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
|
|
3522
|
+
stg_table = stg_table[stg_table.name == feature_group_name]
|
|
3523
|
+
if stg_table.shape[0] > 0:
|
|
3524
|
+
print("{} '{}' is already archived.".format('FeatureGroup', feature_group_name))
|
|
3525
|
+
return False
|
|
3526
|
+
|
|
3527
|
+
# Check if FeatureGroup is related to any FeatureProcess
|
|
3528
|
+
feature_process_df = self.list_feature_processes()
|
|
3529
|
+
related_processes = feature_process_df[(feature_process_df['data_source'] == feature_group_name)]
|
|
3530
|
+
|
|
3531
|
+
if related_processes.shape[0] > 0:
|
|
3532
|
+
process_ids = [fp.process_id for fp in related_processes.itertuples()]
|
|
3533
|
+
related_process_ids = "feature process(es) {}".format(process_ids)
|
|
3534
|
+
err_code = MessageCodes.EFS_OBJ_IN_FEATURE_PROCESS
|
|
3535
|
+
err_msg = Messages.get_message(err_code,
|
|
3536
|
+
'FeatureGroup',
|
|
3537
|
+
feature_group_name,
|
|
3538
|
+
related_process_ids,
|
|
3539
|
+
"feature process(es)",
|
|
3540
|
+
"FeatureStore.archive_feature_process() and FeatureStore.delete_feature_process()",
|
|
3541
|
+
)
|
|
3542
|
+
raise TeradataMlException(err_msg, err_code)
|
|
3543
|
+
|
|
2059
3544
|
fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
|
|
2060
3545
|
|
|
2061
3546
|
fg_df = self.list_feature_groups()
|
|
@@ -2132,40 +3617,45 @@ class FeatureStore:
|
|
|
2132
3617
|
# Remove data for FeatureGroup from Xref table.
|
|
2133
3618
|
# This allows to remove data from other tables.
|
|
2134
3619
|
res = _delete_data(schema_name=self.__repo,
|
|
2135
|
-
table_name=
|
|
2136
|
-
delete_conditions=(Col("group_name") == group_name)
|
|
3620
|
+
table_name=self.__table_names["group_features"],
|
|
3621
|
+
delete_conditions=(Col("group_name") == group_name) &
|
|
3622
|
+
(Col("group_data_domain") == self.__data_domain)
|
|
2137
3623
|
)
|
|
2138
3624
|
|
|
2139
3625
|
# Remove FeatureGroup.
|
|
2140
3626
|
res = _delete_data(schema_name=self.__repo,
|
|
2141
|
-
table_name=
|
|
2142
|
-
delete_conditions=(Col("name") == group_name)
|
|
3627
|
+
table_name=self.__table_names["feature_group"],
|
|
3628
|
+
delete_conditions=(Col("name") == group_name) &
|
|
3629
|
+
(Col("data_domain") == self.__data_domain)
|
|
2143
3630
|
)
|
|
2144
3631
|
|
|
2145
3632
|
# Remove Features.
|
|
2146
3633
|
if feature_names:
|
|
2147
3634
|
_delete_data(schema_name=self.__repo,
|
|
2148
|
-
table_name=
|
|
3635
|
+
table_name=self.__table_names["feature"],
|
|
2149
3636
|
delete_conditions=self.__get_features_where_clause(feature_names)
|
|
2150
3637
|
)
|
|
2151
3638
|
|
|
2152
3639
|
# Remove entities.
|
|
2153
3640
|
if entity_name:
|
|
2154
3641
|
_delete_data(schema_name=self.__repo,
|
|
2155
|
-
table_name=
|
|
2156
|
-
delete_conditions=(Col("entity_name") == entity_name)
|
|
3642
|
+
table_name=self.__table_names["entity_xref"],
|
|
3643
|
+
delete_conditions=(Col("entity_name") == entity_name) &
|
|
3644
|
+
(Col("data_domain") == self.__data_domain)
|
|
2157
3645
|
)
|
|
2158
3646
|
|
|
2159
3647
|
_delete_data(schema_name=self.__repo,
|
|
2160
|
-
table_name=
|
|
2161
|
-
delete_conditions=(Col("name") == entity_name)
|
|
3648
|
+
table_name=self.__table_names["entity"],
|
|
3649
|
+
delete_conditions=(Col("name") == entity_name) &
|
|
3650
|
+
(Col("data_domain") == self.__data_domain)
|
|
2162
3651
|
)
|
|
2163
3652
|
|
|
2164
3653
|
# Remove DataSource.
|
|
2165
3654
|
if data_source_name:
|
|
2166
3655
|
_delete_data(schema_name=self.__repo,
|
|
2167
|
-
table_name=
|
|
2168
|
-
delete_conditions=(Col("name") == data_source_name)
|
|
3656
|
+
table_name=self.__table_names["data_source"],
|
|
3657
|
+
delete_conditions=(Col("name") == data_source_name) &
|
|
3658
|
+
(Col("data_domain") == self.__data_domain)
|
|
2169
3659
|
)
|
|
2170
3660
|
|
|
2171
3661
|
return res
|
|
@@ -2195,17 +3685,31 @@ class FeatureStore:
|
|
|
2195
3685
|
|
|
2196
3686
|
EXAMPLES:
|
|
2197
3687
|
>>> from teradataml import DataFrame, FeatureGroup, FeatureStore
|
|
2198
|
-
>>> load_example_data('dataframe', ['sales'])
|
|
2199
3688
|
# Create teradataml DataFrame.
|
|
3689
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
2200
3690
|
>>> df = DataFrame("sales")
|
|
3691
|
+
|
|
3692
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3693
|
+
>>> fs = FeatureStore("vfs_v1", data_domain="d1")
|
|
3694
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3695
|
+
# Setup FeatureStore for this repository.
|
|
3696
|
+
>>> fs.setup()
|
|
3697
|
+
True
|
|
3698
|
+
|
|
3699
|
+
# Example 1: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
|
|
2201
3700
|
# Create FeatureGroup from teradataml DataFrame.
|
|
2202
|
-
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df,
|
|
2203
|
-
# Create FeatureStore for the repo 'staging_repo'.
|
|
2204
|
-
>>> fs = FeatureStore("staging_repo")
|
|
3701
|
+
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
2205
3702
|
# Apply FeatureGroup to FeatureStore.
|
|
2206
3703
|
>>> fs.apply(fg)
|
|
2207
3704
|
True
|
|
2208
|
-
|
|
3705
|
+
|
|
3706
|
+
# List all the available FeatureGroups.
|
|
3707
|
+
>>> fs.list_feature_groups()
|
|
3708
|
+
description data_source_name entity_name creation_time modified_time
|
|
3709
|
+
name data_domain
|
|
3710
|
+
sales d1 None sales sales 2025-07-28 05:00:19.780453 None
|
|
3711
|
+
|
|
3712
|
+
# Archive FeatureGroup with name "sales".
|
|
2209
3713
|
>>> fs.archive_feature_group(feature_group='sales')
|
|
2210
3714
|
FeatureGroup 'sales' is archived.
|
|
2211
3715
|
True
|
|
@@ -2214,7 +3718,29 @@ class FeatureStore:
|
|
|
2214
3718
|
>>> fs.delete_feature_group(feature_group='sales')
|
|
2215
3719
|
FeatureGroup 'sales' is deleted.
|
|
2216
3720
|
True
|
|
2217
|
-
|
|
3721
|
+
|
|
3722
|
+
# List all the available FeatureGroups after delete.
|
|
3723
|
+
>>> fs.list_feature_groups()
|
|
3724
|
+
Empty DataFrame
|
|
3725
|
+
Columns: [description, data_source_name, entity_name, creation_time, modified_time]
|
|
3726
|
+
Index: []
|
|
3727
|
+
|
|
3728
|
+
Example 2: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
|
|
3729
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
3730
|
+
>>> fg2 = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
3731
|
+
# Apply FeatureGroup to FeatureStore.
|
|
3732
|
+
>>> fs.apply(fg2)
|
|
3733
|
+
True
|
|
3734
|
+
|
|
3735
|
+
# Archive FeatureGroup with FeatureGroup object.
|
|
3736
|
+
>>> fs.archive_feature_group(feature_group=fg2)
|
|
3737
|
+
FeatureGroup 'sales' is archived.
|
|
3738
|
+
True
|
|
3739
|
+
|
|
3740
|
+
# Delete FeatureGroup with FeatureGroup object.
|
|
3741
|
+
>>> fs.delete_feature_group(feature_group=fg2)
|
|
3742
|
+
FeatureGroup 'sales' is deleted.
|
|
3743
|
+
True
|
|
2218
3744
|
"""
|
|
2219
3745
|
argument_validation_params = []
|
|
2220
3746
|
argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
|
|
@@ -2224,95 +3750,839 @@ class FeatureStore:
|
|
|
2224
3750
|
|
|
2225
3751
|
fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
|
|
2226
3752
|
|
|
3753
|
+
# Validation for delete action - ensure FeatureGroup is already archived
|
|
3754
|
+
main_fg_df = self.__get_feature_group_df()
|
|
3755
|
+
existing_records = main_fg_df[main_fg_df["name"] == fg_name]
|
|
3756
|
+
|
|
3757
|
+
if existing_records.shape[0] > 0:
|
|
3758
|
+
error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
|
|
3759
|
+
error_msg = Messages.get_message(
|
|
3760
|
+
error_code,
|
|
3761
|
+
'FeatureGroup',
|
|
3762
|
+
fg_name,
|
|
3763
|
+
'feature_group')
|
|
3764
|
+
raise TeradataMlException(error_msg, error_code)
|
|
3765
|
+
|
|
2227
3766
|
# Remove data for FeatureGroup.
|
|
2228
3767
|
_delete_data(table_name=self.__table_names["group_features_staging"],
|
|
2229
3768
|
schema_name=self.__repo,
|
|
2230
|
-
delete_conditions=(Col("group_name") == fg_name)
|
|
3769
|
+
delete_conditions=(Col("group_name") == fg_name) &
|
|
3770
|
+
(Col("group_data_domain") == self.__data_domain)
|
|
2231
3771
|
)
|
|
2232
3772
|
|
|
2233
3773
|
res = _delete_data(table_name=self.__table_names["feature_group_staging"],
|
|
2234
3774
|
schema_name=self.__repo,
|
|
2235
|
-
delete_conditions=(Col("name") == fg_name)
|
|
3775
|
+
delete_conditions=(Col("name") == fg_name) &
|
|
3776
|
+
(Col("data_domain") == self.__data_domain)
|
|
2236
3777
|
)
|
|
2237
3778
|
|
|
2238
3779
|
if res == 1:
|
|
2239
3780
|
print("FeatureGroup '{}' is deleted.".format(fg_name))
|
|
2240
3781
|
return True
|
|
2241
3782
|
|
|
2242
|
-
print("FeatureGroup '{}' not exist to delete.".format(fg_name))
|
|
3783
|
+
print("FeatureGroup '{}' does not exist to delete.".format(fg_name))
|
|
2243
3784
|
return False
|
|
2244
3785
|
|
|
2245
|
-
|
|
3786
|
+
@property
|
|
3787
|
+
def version(self):
|
|
2246
3788
|
"""
|
|
2247
3789
|
DESCRIPTION:
|
|
2248
|
-
|
|
2249
|
-
OR DataSource DataFrame OR FeatureGroup DataFrame.
|
|
3790
|
+
Get the FeatureStore version.
|
|
2250
3791
|
|
|
2251
3792
|
PARAMETERS:
|
|
2252
|
-
|
|
2253
|
-
Required Argument.
|
|
2254
|
-
Specifies the type of DataFrame to return.
|
|
2255
|
-
Allowed Values:
|
|
2256
|
-
* feature
|
|
2257
|
-
* feature_group
|
|
2258
|
-
* entity
|
|
2259
|
-
* data_source
|
|
2260
|
-
* group_features
|
|
3793
|
+
None
|
|
2261
3794
|
|
|
2262
3795
|
RETURNS:
|
|
2263
|
-
|
|
3796
|
+
str
|
|
2264
3797
|
|
|
2265
3798
|
RAISES:
|
|
2266
3799
|
None
|
|
2267
3800
|
|
|
2268
3801
|
EXAMPLES:
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
# For feature or feature_staging, join it with xref table
|
|
2275
|
-
# so group name appears while listing features.
|
|
2276
|
-
map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
|
|
2277
|
-
if obj_type in map_:
|
|
2278
|
-
features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2279
|
-
features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
|
|
2280
|
-
["feature_name", "group_name"])
|
|
2281
|
-
df = features.join(features_xref, on="name==feature_name", how='left')
|
|
2282
|
-
self.__df_container[obj_type] = df.select(features.columns+["group_name"])
|
|
2283
|
-
# For entity, join with xref table.
|
|
2284
|
-
elif obj_type == "entity" or obj_type == "entity_staging":
|
|
2285
|
-
ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2286
|
-
xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
|
|
2287
|
-
['entity_name', 'entity_column'])
|
|
2288
|
-
df = ent_df.join(xref_df, on="name==entity_name", how="inner")
|
|
2289
|
-
self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
|
|
2290
|
-
else:
|
|
2291
|
-
self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
3802
|
+
# Example 1: Get the version of FeatureStore version for
|
|
3803
|
+
# the repo 'vfs_v1'.
|
|
3804
|
+
>>> from teradataml import FeatureStore
|
|
3805
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
3806
|
+
FeatureStore is ready to use.
|
|
2292
3807
|
|
|
2293
|
-
|
|
3808
|
+
# Get the version of FeatureStore.
|
|
3809
|
+
>>> fs.version
|
|
3810
|
+
'2.0.0'
|
|
3811
|
+
"""
|
|
3812
|
+
if self.__version is None:
|
|
3813
|
+
self.__version = self.__get_version()
|
|
3814
|
+
return self.__version
|
|
2294
3815
|
|
|
2295
|
-
def
|
|
3816
|
+
def list_feature_catalogs(self) -> DataFrame:
|
|
2296
3817
|
"""
|
|
2297
3818
|
DESCRIPTION:
|
|
2298
|
-
|
|
3819
|
+
Lists all the feature catalogs.
|
|
2299
3820
|
|
|
2300
3821
|
PARAMETERS:
|
|
2301
3822
|
None
|
|
2302
3823
|
|
|
2303
3824
|
RETURNS:
|
|
2304
|
-
|
|
3825
|
+
teradataml DataFrame
|
|
2305
3826
|
|
|
2306
3827
|
RAISES:
|
|
2307
3828
|
None
|
|
2308
3829
|
|
|
2309
3830
|
EXAMPLES:
|
|
2310
|
-
# Example 1:
|
|
2311
|
-
# the repo 'vfs_v1'.
|
|
3831
|
+
# Example 1: List all the feature catalogs in the repo 'vfs_v1'.
|
|
2312
3832
|
>>> from teradataml import FeatureStore
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
3833
|
+
|
|
3834
|
+
# Create FeatureStore for the repo 'vfs_v1' or use existing one.
|
|
3835
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3836
|
+
FeatureStore is ready to use.
|
|
3837
|
+
|
|
3838
|
+
# Load the sales data.
|
|
3839
|
+
>>> load_example_data("dataframe", "sales")
|
|
3840
|
+
>>> df = DataFrame("sales")
|
|
3841
|
+
|
|
3842
|
+
# Create a feature process.
|
|
3843
|
+
>>> from teradataml import FeatureProcess
|
|
3844
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
3845
|
+
... data_domain='sales',
|
|
3846
|
+
... object=df,
|
|
3847
|
+
... entity="accounts",
|
|
3848
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
3849
|
+
>>> fp.run()
|
|
3850
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
3851
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
3852
|
+
|
|
3853
|
+
# List all the feature catalogs in the repo 'vfs_v1'.
|
|
3854
|
+
>>> fs.list_feature_catalogs()
|
|
3855
|
+
data_domain feature_id table_name valid_start valid_end
|
|
3856
|
+
entity_name
|
|
3857
|
+
accounts sales 2 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
3858
|
+
accounts sales 100001 FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
3859
|
+
accounts sales 1 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
3860
|
+
accounts sales 200001 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
3861
|
+
"""
|
|
3862
|
+
df = self.__get_without_valid_period_df(self.__get_features_metadata_df())
|
|
3863
|
+
return df[df.data_domain==self.__data_domain]
|
|
3864
|
+
|
|
3865
|
+
def archive_feature_process(self, process_id):
|
|
3866
|
+
"""
|
|
3867
|
+
DESCRIPTION:
|
|
3868
|
+
Archives the FeatureProcess with the given process_id.
|
|
3869
|
+
Notes:
|
|
3870
|
+
* Archived FeatureProcess is not available for any further processing.
|
|
3871
|
+
* Archived FeatureProcess can be viewed using `FeatureStore.list_feature_processes(archived=True)`.
|
|
3872
|
+
method.
|
|
3873
|
+
* Same feature can be ingested by multiple processes. If feature associated with
|
|
3874
|
+
process "process_id" is also associated with other processes, then this
|
|
3875
|
+
function only archives the feature values associated with the process "process_id". Else
|
|
3876
|
+
it archives the feature from the feature catalog. Look at `FeatureCatalog.archive_features()`.
|
|
3877
|
+
for more details.
|
|
3878
|
+
|
|
3879
|
+
PARAMETERS:
|
|
3880
|
+
process_id:
|
|
3881
|
+
Required Argument.
|
|
3882
|
+
Specifies the ID of the FeatureProcess to archive from repository.
|
|
3883
|
+
Types: str
|
|
3884
|
+
|
|
3885
|
+
RETURNS:
|
|
3886
|
+
bool
|
|
3887
|
+
|
|
3888
|
+
RAISES:
|
|
3889
|
+
TeradataMLException, TypeError, ValueError
|
|
3890
|
+
|
|
3891
|
+
EXAMPLES:
|
|
3892
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3893
|
+
# Create a teradataml DataFrame.
|
|
3894
|
+
>>> from teradataml import DataFrame, FeatureProcess, FeatureStore
|
|
3895
|
+
>>> df = DataFrame("sales")
|
|
3896
|
+
|
|
3897
|
+
# Create FeatureStore for repo 'repo'.
|
|
3898
|
+
>>> fs = FeatureStore("repo", data_domain='sales')
|
|
3899
|
+
Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3900
|
+
# Setup FeatureStore for this repository.
|
|
3901
|
+
>>> fs.setup()
|
|
3902
|
+
True
|
|
3903
|
+
|
|
3904
|
+
# Run FeatureProcess to ingest features.
|
|
3905
|
+
>>> from teradataml import FeatureProcess
|
|
3906
|
+
>>> fp = FeatureProcess(repo='repo',
|
|
3907
|
+
... data_domain='sales',
|
|
3908
|
+
... object=df,
|
|
3909
|
+
... entity='accounts',
|
|
3910
|
+
... features=['Jan', 'Feb', 'Mar', 'Apr'])
|
|
3911
|
+
>>> fp.run()
|
|
3912
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
|
|
3913
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
|
|
3914
|
+
|
|
3915
|
+
# List the available FeatureProcesses.
|
|
3916
|
+
>>> fs.list_feature_processes()
|
|
3917
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
3918
|
+
process_id
|
|
3919
|
+
2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
|
|
3920
|
+
|
|
3921
|
+
# Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
|
|
3922
|
+
>>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
|
|
3923
|
+
Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
3924
|
+
Feature 'Jan' is archived from metadata.
|
|
3925
|
+
Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
|
|
3926
|
+
Feature 'Feb' is archived from metadata.
|
|
3927
|
+
Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
3928
|
+
Feature 'Mar' is archived from metadata.
|
|
3929
|
+
Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
3930
|
+
Feature 'Apr' is archived from metadata.
|
|
3931
|
+
FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
|
|
3932
|
+
True
|
|
2317
3933
|
"""
|
|
2318
|
-
|
|
3934
|
+
argument_validation_params = []
|
|
3935
|
+
argument_validation_params.append(["process_id", process_id, True, str, True])
|
|
3936
|
+
|
|
3937
|
+
# Validate argument types
|
|
3938
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
3939
|
+
|
|
3940
|
+
features = self.__validate_feature_process(process_id)
|
|
3941
|
+
if features is False:
|
|
3942
|
+
return False
|
|
3943
|
+
|
|
3944
|
+
feature_details = FeatureCatalog._get_feature_details(
|
|
3945
|
+
self.__repo, self.__data_domain, features)
|
|
3946
|
+
|
|
3947
|
+
# Get the shared features.
|
|
3948
|
+
shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
|
|
3949
|
+
|
|
3950
|
+
# Remove the features from the feature metadata table.
|
|
3951
|
+
return self.__remove_feature_process(
|
|
3952
|
+
process_id, features, feature_details, shared_features)
|
|
3953
|
+
|
|
3954
|
+
def delete_feature_process(self, process_id):
|
|
3955
|
+
"""
|
|
3956
|
+
DESCRIPTION:
|
|
3957
|
+
Deletes the archived feature process from feature store with the given process_id.
|
|
3958
|
+
Notes:
|
|
3959
|
+
* One feature can be ingested by multiple processes. If feature associated with
|
|
3960
|
+
process "process_id" is also ingested by other processes, then "delete_feature_process()"
|
|
3961
|
+
function only deletes the feature values associated with the process "process_id". Else
|
|
3962
|
+
it deletes the feature from the feature catalog. Look at 'FeatureCatalog.delete_features()'
|
|
3963
|
+
for more details.
|
|
3964
|
+
|
|
3965
|
+
PARAMETERS:
|
|
3966
|
+
process_id:
|
|
3967
|
+
Required Argument.
|
|
3968
|
+
Specifies the ID of the FeatureProcess to delete from repository.
|
|
3969
|
+
Types: str
|
|
3970
|
+
|
|
3971
|
+
RETURNS:
|
|
3972
|
+
bool
|
|
3973
|
+
|
|
3974
|
+
RAISES:
|
|
3975
|
+
TeradataMLException, TypeError, ValueError
|
|
3976
|
+
|
|
3977
|
+
EXAMPLES:
|
|
3978
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3979
|
+
# Create a teradataml DataFrame.
|
|
3980
|
+
>>> from teradataml import DataFrame, FeatureProcess, FeatureStore
|
|
3981
|
+
>>> df = DataFrame("sales")
|
|
3982
|
+
|
|
3983
|
+
# Create FeatureStore for repo 'repo'.
|
|
3984
|
+
>>> fs = FeatureStore("repo", data_domain='sales')
|
|
3985
|
+
Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3986
|
+
# Setup FeatureStore for this repository.
|
|
3987
|
+
>>> fs.setup()
|
|
3988
|
+
True
|
|
3989
|
+
|
|
3990
|
+
# Run FeatureProcess to ingest features.
|
|
3991
|
+
>>> from teradataml import FeatureProcess
|
|
3992
|
+
>>> fp = FeatureProcess(repo='repo',
|
|
3993
|
+
... data_domain='sales',
|
|
3994
|
+
... object=df,
|
|
3995
|
+
... entity='accounts',
|
|
3996
|
+
... features=['Jan', 'Feb', 'Mar', 'Apr'])
|
|
3997
|
+
>>> fp.run()
|
|
3998
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
|
|
3999
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
|
|
4000
|
+
|
|
4001
|
+
# List the available FeatureProcesses.
|
|
4002
|
+
>>> fs.list_feature_processes()
|
|
4003
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
4004
|
+
process_id
|
|
4005
|
+
2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
|
|
4006
|
+
|
|
4007
|
+
# Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
|
|
4008
|
+
>>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
|
|
4009
|
+
Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4010
|
+
Feature 'Jan' is archived from metadata.
|
|
4011
|
+
Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
|
|
4012
|
+
Feature 'Feb' is archived from metadata.
|
|
4013
|
+
Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4014
|
+
Feature 'Mar' is archived from metadata.
|
|
4015
|
+
Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4016
|
+
Feature 'Apr' is archived from metadata.
|
|
4017
|
+
FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
|
|
4018
|
+
True
|
|
4019
|
+
|
|
4020
|
+
# Example: Delete the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
|
|
4021
|
+
>>> fs.delete_feature_process('2a014f2d-6b71-11f0-aeda-f020ffe7fe09')
|
|
4022
|
+
Feature 'Feb' deleted successfully from table 'FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4'.
|
|
4023
|
+
Feature 'Jan' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
|
|
4024
|
+
Feature 'Mar' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
|
|
4025
|
+
Feature 'Apr' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
|
|
4026
|
+
FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is deleted.
|
|
4027
|
+
True
|
|
4028
|
+
|
|
4029
|
+
# List the available FeatureProcesses after delete.
|
|
4030
|
+
>>> fs.list_feature_processes()
|
|
4031
|
+
Empty DataFrame
|
|
4032
|
+
Columns: [description, data_domain, process_type, data_source, entity_id, feature_names, feature_ids, valid_start, valid_end]
|
|
4033
|
+
Index: []
|
|
4034
|
+
"""
|
|
4035
|
+
argument_validation_params = []
|
|
4036
|
+
argument_validation_params.append(["process_id", process_id, True, str, True])
|
|
4037
|
+
|
|
4038
|
+
# Validate argument types
|
|
4039
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
4040
|
+
|
|
4041
|
+
# Before archive check if the specified process id is existed or not.
|
|
4042
|
+
features = self.__validate_feature_process(process_id, type_='delete')
|
|
4043
|
+
if features is False:
|
|
4044
|
+
return False
|
|
4045
|
+
|
|
4046
|
+
feature_details = FeatureCatalog._get_feature_details(
|
|
4047
|
+
self.__repo, self.__data_domain, features)
|
|
4048
|
+
|
|
4049
|
+
# Get the shared features.
|
|
4050
|
+
shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
|
|
4051
|
+
|
|
4052
|
+
return self.__remove_feature_process(
|
|
4053
|
+
process_id, features, feature_details, shared_features, type_='delete')
|
|
4054
|
+
|
|
4055
|
+
@db_transaction
|
|
4056
|
+
def __remove_feature_process(self,
|
|
4057
|
+
process_id,
|
|
4058
|
+
process_features,
|
|
4059
|
+
feature_details,
|
|
4060
|
+
shared_features,
|
|
4061
|
+
type_='archive'):
|
|
4062
|
+
"""
|
|
4063
|
+
DESCRIPTION:
|
|
4064
|
+
Internal function to remove the FeatureProcess from repository.
|
|
4065
|
+
It also removes the associated features from the feature table.
|
|
4066
|
+
|
|
4067
|
+
PARAMETERS:
|
|
4068
|
+
process_id:
|
|
4069
|
+
Required Argument.
|
|
4070
|
+
Specifies the ID of the FeatureProcess to remove from repository.
|
|
4071
|
+
Types: str
|
|
4072
|
+
|
|
4073
|
+
feature_details:
|
|
4074
|
+
Required Argument.
|
|
4075
|
+
Specifies the list of features to remove from repository.
|
|
4076
|
+
Types: list of namedtuple
|
|
4077
|
+
|
|
4078
|
+
type_:
|
|
4079
|
+
Optional Argument.
|
|
4080
|
+
Specifies the type of removal. Allowed values are 'archive' and 'delete'.
|
|
4081
|
+
Default value is 'archive'.
|
|
4082
|
+
Types: str
|
|
4083
|
+
|
|
4084
|
+
RETURNS:
|
|
4085
|
+
bool
|
|
4086
|
+
|
|
4087
|
+
RAISES:
|
|
4088
|
+
None
|
|
4089
|
+
|
|
4090
|
+
EXAMPLES:
|
|
4091
|
+
>>> self.__remove_feature_process("5747082b-4acb-11f0-a2d7-f020ffe7fe09",
|
|
4092
|
+
... process_features=[namedtuple('feature_', ['name', 'id', 'table_name'])('sales_data_Feb', 1, 'FS_T_12345')],
|
|
4093
|
+
... type_='archive')
|
|
4094
|
+
"""
|
|
4095
|
+
temporal_clause = 'CURRENT VALIDTIME'
|
|
4096
|
+
delete_condition = (Col("process_id") == process_id)
|
|
4097
|
+
if type_ == 'delete':
|
|
4098
|
+
temporal_clause = None
|
|
4099
|
+
|
|
4100
|
+
fc = FeatureCatalog(self.__repo, self.__data_domain)
|
|
4101
|
+
res1 = fc._remove_features(process_features, feature_details, type_=='archive', shared_features, process_id)
|
|
4102
|
+
|
|
4103
|
+
# Remove it from feature process table.
|
|
4104
|
+
res = _delete_data(table_name=self.__table_names["feature_process"],
|
|
4105
|
+
schema_name=self.__repo,
|
|
4106
|
+
delete_conditions=delete_condition,
|
|
4107
|
+
temporal_clause=temporal_clause
|
|
4108
|
+
)
|
|
4109
|
+
|
|
4110
|
+
if res >= 1:
|
|
4111
|
+
print("FeatureProcess with process id '{}' is {}d.".format(process_id, type_))
|
|
4112
|
+
return res1 & True
|
|
4113
|
+
|
|
4114
|
+
print("FeatureProcess with process id '{}' does not exist to {}.".format(process_id, type_))
|
|
4115
|
+
return res1 & False
|
|
4116
|
+
|
|
4117
|
+
def __validate_feature_process(self, process_id, type_='archive'):
|
|
4118
|
+
"""
|
|
4119
|
+
DESCRIPTION:
|
|
4120
|
+
Internal function to validate if the feature process is existed or not.
|
|
4121
|
+
Also, the function checks if the process is archived or not.
|
|
4122
|
+
|
|
4123
|
+
PARAMETERS:
|
|
4124
|
+
process_id:
|
|
4125
|
+
Required Argument.
|
|
4126
|
+
Specifies the ID of the FeatureProcess to validate.
|
|
4127
|
+
Types: str
|
|
4128
|
+
|
|
4129
|
+
type_:
|
|
4130
|
+
Optional Argument.
|
|
4131
|
+
Specifies the type of validation. Allowed values are 'archive' and 'delete'.
|
|
4132
|
+
Default value is 'archive'.
|
|
4133
|
+
Types: str
|
|
4134
|
+
|
|
4135
|
+
RETURNS:
|
|
4136
|
+
list or bool.
|
|
4137
|
+
False if process does not exist or archived.
|
|
4138
|
+
list if all validations are passed.
|
|
4139
|
+
|
|
4140
|
+
RAISES:
|
|
4141
|
+
TeradatamlException
|
|
4142
|
+
|
|
4143
|
+
EXAMPLES:
|
|
4144
|
+
>>> # Validate the feature process with process_id '5747082b-4acb-11f0-a2d7-f020ffe7fe09'.
|
|
4145
|
+
>>> fs.__validate_feature_process(process_id='5747082b-4acb-11f0-a2d7-f020ffe7fe09')
|
|
4146
|
+
(['sales_data_Feb', 'sales_data_Jan'], ['sales_data_Mar', 'sales_data_Apr'])
|
|
4147
|
+
"""
|
|
4148
|
+
# Extract process type, data source, entity_id, feature_names from given process id.
|
|
4149
|
+
sql = EFS_ARCHIVED_RECORDS.format("feature_names",
|
|
4150
|
+
'"{}"."{}"'.format(self.__repo,
|
|
4151
|
+
self.__table_names["feature_process"]),
|
|
4152
|
+
"PROCESS_ID = '{}' AND DATA_DOMAIN = '{}'".
|
|
4153
|
+
format(process_id, self.__data_domain))
|
|
4154
|
+
|
|
4155
|
+
feature_names = set()
|
|
4156
|
+
all_archived = True
|
|
4157
|
+
any_one_not_archived = False
|
|
4158
|
+
for rec in execute_sql(sql):
|
|
4159
|
+
is_archived = rec[1] == 1
|
|
4160
|
+
all_archived = all_archived and is_archived
|
|
4161
|
+
any_one_not_archived = any_one_not_archived or (not is_archived)
|
|
4162
|
+
feature_names.update([f.strip() for f in rec[0].split(",")])
|
|
4163
|
+
|
|
4164
|
+
# Not raising error to align with the behavior of other methods.
|
|
4165
|
+
if not feature_names:
|
|
4166
|
+
print("FeatureProcess with process id '{}' does not exist.".format(process_id))
|
|
4167
|
+
return False
|
|
4168
|
+
|
|
4169
|
+
# Check if feature is already archived or not.
|
|
4170
|
+
if type_ == 'archive' and all_archived:
|
|
4171
|
+
# All records valid end date should be less than current timestamp in such case.
|
|
4172
|
+
print("FeatureProcess with process id '{}' is already archived.".format(process_id))
|
|
4173
|
+
return False
|
|
4174
|
+
|
|
4175
|
+
# For delete, check if the process is archived or not first.
|
|
4176
|
+
if type_ == 'delete' and any_one_not_archived:
|
|
4177
|
+
print("FeatureProcess with process id '{}' is not archived. "
|
|
4178
|
+
"First archive the process and then delete it.".format(process_id))
|
|
4179
|
+
return False
|
|
4180
|
+
|
|
4181
|
+
# Check if feature is associated with any dataset or not.
|
|
4182
|
+
dataset_features_df = self.__get_dataset_features_df()
|
|
4183
|
+
# Validate the feature names.
|
|
4184
|
+
_Validators._validate_features_not_in_efs_dataset(
|
|
4185
|
+
df=dataset_features_df[(dataset_features_df['data_domain'] == self.__data_domain)],
|
|
4186
|
+
feature_names=list(feature_names),
|
|
4187
|
+
action='archived')
|
|
4188
|
+
|
|
4189
|
+
return feature_names
|
|
4190
|
+
|
|
4191
|
+
def remove_data_domain(self):
|
|
4192
|
+
"""
|
|
4193
|
+
DESCRIPTION:
|
|
4194
|
+
Removes the data domain from the FeatureStore and all associated objects.
|
|
4195
|
+
|
|
4196
|
+
Notes:
|
|
4197
|
+
* This operation permanently deletes all objects, tables, and views tied to the data domain.
|
|
4198
|
+
* There is no archival or built‑in recovery, all deletions are irreversible.
|
|
4199
|
+
|
|
4200
|
+
PARAMETERS:
|
|
4201
|
+
None
|
|
4202
|
+
|
|
4203
|
+
RETURNS:
|
|
4204
|
+
bool
|
|
4205
|
+
|
|
4206
|
+
RAISES:
|
|
4207
|
+
TeradataMLException
|
|
4208
|
+
|
|
4209
|
+
EXAMPLES:
|
|
4210
|
+
>>> from teradataml import FeatureStore
|
|
4211
|
+
# Create a new FeatureStore or use an existing one.
|
|
4212
|
+
>>> fs = FeatureStore("repo", data_domain="sales")
|
|
4213
|
+
FeatureStore is ready to use.
|
|
4214
|
+
|
|
4215
|
+
# Remove the data domain 'sales' and all associated objects.
|
|
4216
|
+
>>> fs.remove_data_domain()
|
|
4217
|
+
The function will remove the data domain 'sales' and all associated objects. Are you sure you want to proceed? (Y/N): Y
|
|
4218
|
+
Data domain 'sales' is removed from the FeatureStore.
|
|
4219
|
+
True
|
|
4220
|
+
"""
|
|
4221
|
+
confirmation = input("The function will remove the data domain '{}' and" \
|
|
4222
|
+
" all associated objects. Are you sure you want to proceed? (Y/N): ".format(self.__data_domain))
|
|
4223
|
+
|
|
4224
|
+
if confirmation not in ["Y", "y"]:
|
|
4225
|
+
return False
|
|
4226
|
+
|
|
4227
|
+
# Get the views to drop related to the data domain.
|
|
4228
|
+
dataset_features_df = self.__get_dataset_features_df()
|
|
4229
|
+
filtered_dataset_features_df = dataset_features_df[dataset_features_df['data_domain'] == self.__data_domain].itertuples()
|
|
4230
|
+
views_to_drop = list({rec.feature_view for rec in filtered_dataset_features_df})
|
|
4231
|
+
|
|
4232
|
+
# Get the tables to drop related to the data domain.
|
|
4233
|
+
features_metadata_df = self.__get_features_metadata_df()
|
|
4234
|
+
filtered_features_metadata_df = features_metadata_df[features_metadata_df['data_domain'] == self.__data_domain].itertuples()
|
|
4235
|
+
tables_to_drop = list({rec.table_name for rec in filtered_features_metadata_df})
|
|
4236
|
+
|
|
4237
|
+
res = db_transaction(self.__remove_data_domain)()
|
|
4238
|
+
|
|
4239
|
+
# Drop the views related to the data domain.
|
|
4240
|
+
for view in views_to_drop:
|
|
4241
|
+
try:
|
|
4242
|
+
execute_sql(f"DROP VIEW {_get_quoted_object_name(schema_name=self.__repo, object_name=view)}")
|
|
4243
|
+
except Exception as e:
|
|
4244
|
+
print(f"Error dropping view {view}: {e}")
|
|
4245
|
+
# Drop the tables related to the data domain.
|
|
4246
|
+
for table in tables_to_drop:
|
|
4247
|
+
try:
|
|
4248
|
+
execute_sql(f"DROP TABLE {_get_quoted_object_name(schema_name=self.__repo, object_name=table)}")
|
|
4249
|
+
except Exception as e:
|
|
4250
|
+
print(f"Error dropping table {table}: {e}")
|
|
4251
|
+
|
|
4252
|
+
return True
|
|
4253
|
+
|
|
4254
|
+
def __remove_data_domain(self):
|
|
4255
|
+
"""
|
|
4256
|
+
DESCRIPTION:
|
|
4257
|
+
Internal method to remove the data domain from the FeatureStore and all associated objects.
|
|
4258
|
+
|
|
4259
|
+
PARAMETERS:
|
|
4260
|
+
None
|
|
4261
|
+
|
|
4262
|
+
RETURNS:
|
|
4263
|
+
bool
|
|
4264
|
+
|
|
4265
|
+
RAISES:
|
|
4266
|
+
TeradataMLException
|
|
4267
|
+
|
|
4268
|
+
EXAMPLES:
|
|
4269
|
+
>>> fs.__remove_data_domain()
|
|
4270
|
+
"""
|
|
4271
|
+
# TO remove data domain from the FeatureStore, we need to:
|
|
4272
|
+
# 1. Remove data domain entries from the dataset catalog and dataset features.
|
|
4273
|
+
# 2. Remove data domain entries from the feature metadata.
|
|
4274
|
+
# 3. Remove data domain entries from the feature processes.
|
|
4275
|
+
# 4. Remove data_domain entries from feature groups, group features, and their staging tables.
|
|
4276
|
+
# 5. Remove data_domain entries from features and their staging tables.
|
|
4277
|
+
# 6. Remove data_domain entries from entities, entity xref, and their staging tables.
|
|
4278
|
+
# 7. Remove data_domain entries from data sources and their staging tables.
|
|
4279
|
+
# 8. Remove data_domain entries from data_domain table.
|
|
4280
|
+
|
|
4281
|
+
# 1. Remove data domain entries from the dataset catalog and dataset features.
|
|
4282
|
+
_delete_data(
|
|
4283
|
+
table_name=self.__table_names['dataset_catalog'],
|
|
4284
|
+
schema_name=self.__repo,
|
|
4285
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4286
|
+
)
|
|
4287
|
+
|
|
4288
|
+
_delete_data(
|
|
4289
|
+
table_name=self.__table_names['dataset_features'],
|
|
4290
|
+
schema_name=self.__repo,
|
|
4291
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4292
|
+
)
|
|
4293
|
+
|
|
4294
|
+
# 2. Remove data domain entries from the feature metadata.
|
|
4295
|
+
_delete_data(
|
|
4296
|
+
table_name=self.__table_names['feature_metadata'],
|
|
4297
|
+
schema_name=self.__repo,
|
|
4298
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4299
|
+
)
|
|
4300
|
+
|
|
4301
|
+
# 3. Remove data_domain entries from the feature processes.
|
|
4302
|
+
_delete_data(
|
|
4303
|
+
table_name=self.__table_names['feature_process'],
|
|
4304
|
+
schema_name=self.__repo,
|
|
4305
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4306
|
+
)
|
|
4307
|
+
|
|
4308
|
+
# 4. Remove data_domain entries from feature groups, group features, and their staging tables.
|
|
4309
|
+
_delete_data(
|
|
4310
|
+
table_name=self.__table_names['group_features'],
|
|
4311
|
+
schema_name=self.__repo,
|
|
4312
|
+
delete_conditions=((Col("group_data_domain") == self.__data_domain))
|
|
4313
|
+
)
|
|
4314
|
+
_delete_data(
|
|
4315
|
+
table_name=self.__table_names['feature_group'],
|
|
4316
|
+
schema_name=self.__repo,
|
|
4317
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4318
|
+
)
|
|
4319
|
+
|
|
4320
|
+
_delete_data(
|
|
4321
|
+
table_name=self.__table_names["group_features_staging"],
|
|
4322
|
+
schema_name=self.__repo,
|
|
4323
|
+
delete_conditions=(Col("group_data_domain") == self.__data_domain))
|
|
4324
|
+
|
|
4325
|
+
_delete_data(
|
|
4326
|
+
table_name=self.__table_names["feature_group_staging"],
|
|
4327
|
+
schema_name=self.__repo,
|
|
4328
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4329
|
+
)
|
|
4330
|
+
|
|
4331
|
+
# 5. Remove data_domain entries from features and their staging tables.
|
|
4332
|
+
_delete_data(
|
|
4333
|
+
table_name=self.__table_names['feature'],
|
|
4334
|
+
schema_name=self.__repo,
|
|
4335
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4336
|
+
)
|
|
4337
|
+
|
|
4338
|
+
_delete_data(
|
|
4339
|
+
table_name=self.__table_names['feature_staging'],
|
|
4340
|
+
schema_name=self.__repo,
|
|
4341
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4342
|
+
)
|
|
4343
|
+
|
|
4344
|
+
# 6. Remove data_domain entries from entities, entity xref, and their staging tables.
|
|
4345
|
+
_delete_data(
|
|
4346
|
+
table_name=self.__table_names['entity_xref'],
|
|
4347
|
+
schema_name=self.__repo,
|
|
4348
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4349
|
+
)
|
|
4350
|
+
_delete_data(
|
|
4351
|
+
table_name=self.__table_names['entity'],
|
|
4352
|
+
schema_name=self.__repo,
|
|
4353
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4354
|
+
)
|
|
4355
|
+
|
|
4356
|
+
_delete_data(
|
|
4357
|
+
table_name=self.__table_names['entity_staging'],
|
|
4358
|
+
schema_name=self.__repo,
|
|
4359
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4360
|
+
)
|
|
4361
|
+
|
|
4362
|
+
_delete_data(
|
|
4363
|
+
table_name=self.__table_names['entity_staging_xref'],
|
|
4364
|
+
schema_name=self.__repo,
|
|
4365
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4366
|
+
)
|
|
4367
|
+
|
|
4368
|
+
# 7. Remove data_domain entries from data sources and their staging tables.
|
|
4369
|
+
_delete_data(
|
|
4370
|
+
table_name=self.__table_names['data_source'],
|
|
4371
|
+
schema_name=self.__repo,
|
|
4372
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4373
|
+
)
|
|
4374
|
+
|
|
4375
|
+
_delete_data(
|
|
4376
|
+
table_name=self.__table_names['data_source_staging'],
|
|
4377
|
+
schema_name=self.__repo,
|
|
4378
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4379
|
+
)
|
|
4380
|
+
|
|
4381
|
+
# 8. Remove data_domain entries from data_domain table.
|
|
4382
|
+
_delete_data(
|
|
4383
|
+
table_name=self.__table_names['data_domain'],
|
|
4384
|
+
schema_name=self.__repo,
|
|
4385
|
+
delete_conditions=(Col("name") == self.__data_domain)
|
|
4386
|
+
)
|
|
4387
|
+
|
|
4388
|
+
print(f"Data domain '{self.__data_domain}' is removed from the FeatureStore.")
|
|
4389
|
+
return True
|
|
4390
|
+
|
|
4391
|
+
def mind_map(self, feature_process=None):
|
|
4392
|
+
"""
|
|
4393
|
+
DESCRIPTION:
|
|
4394
|
+
Returns a visual mind map of the FeatureStore, showing data sources,
|
|
4395
|
+
feature processes, feature catalog, and dataset catalog, with dependencies
|
|
4396
|
+
illustrated by curves.
|
|
4397
|
+
Note:
|
|
4398
|
+
Works only in Jupyter Notebook or similar environments that support HTML rendering.
|
|
4399
|
+
|
|
4400
|
+
PARAMETERS:
|
|
4401
|
+
feature_process:
|
|
4402
|
+
Optional Argument.
|
|
4403
|
+
Specifies the feature process to filter the mind map. When specified,
|
|
4404
|
+
only the feature process and its related data sources, features, and datasets
|
|
4405
|
+
is displayed.
|
|
4406
|
+
Notes:
|
|
4407
|
+
* mind_map() display only the features which are associated with the
|
|
4408
|
+
feature process for the datasets also. For example, if Dataset is associated
|
|
4409
|
+
with Feature1, Feature2 and Feature1 is ingested by FeatureProcess1 and
|
|
4410
|
+
Feature2 is ingested by FeatureProcess2, then mind_map() displays the
|
|
4411
|
+
Dataset with Feature1 only if "feature_process" is set to FeatureProcess1.
|
|
4412
|
+
* If "feature_process" is not specified, then mind_map() displays all the
|
|
4413
|
+
feature processes, data sources, features, and datasets in the FeatureStore.
|
|
4414
|
+
Types: str OR list of str
|
|
4415
|
+
|
|
4416
|
+
RETURNS:
|
|
4417
|
+
None (displays HTML visualization)
|
|
4418
|
+
|
|
4419
|
+
RAISES:
|
|
4420
|
+
TypeError
|
|
4421
|
+
|
|
4422
|
+
EXAMPLES:
|
|
4423
|
+
# Example 1: Display the mind map of the FeatureStore with all feature processes.
|
|
4424
|
+
>>> from teradataml import DataFrame, FeatureStore
|
|
4425
|
+
>>> load_example_data("dataframe", "sales")
|
|
4426
|
+
# Create DataFrame.
|
|
4427
|
+
>>> sales_df = DataFrame("sales")
|
|
4428
|
+
>>> admissions_df = DataFrame("admissions")
|
|
4429
|
+
|
|
4430
|
+
# Create a FeatureStore for the repo 'vfs_v1'.
|
|
4431
|
+
>>> fs = FeatureStore("vfs_v1", data_domain='Analytics')
|
|
4432
|
+
FeatureStore is ready to use.
|
|
4433
|
+
|
|
4434
|
+
# Create a feature process to ingest sales df.
|
|
4435
|
+
>>> fp1 = fs.get_feature_process(object=df,
|
|
4436
|
+
... features=['Jan', 'Feb', 'Mar', 'Apr'],
|
|
4437
|
+
... entity='accounts')
|
|
4438
|
+
>>> fp1.run()
|
|
4439
|
+
Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' started.
|
|
4440
|
+
Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' completed.
|
|
4441
|
+
True
|
|
4442
|
+
|
|
4443
|
+
# Create a feature process to ingest admissions df.
|
|
4444
|
+
>>> fp2 = fs.get_feature_process(object=admissions_df,
|
|
4445
|
+
... features=[ 'masters', 'gpa', 'stats', 'programming', 'admitted'],
|
|
4446
|
+
... entity='id')
|
|
4447
|
+
>>> fp2.run()
|
|
4448
|
+
Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' started.
|
|
4449
|
+
Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' completed.
|
|
4450
|
+
|
|
4451
|
+
# Example 1: Display the mind map of the FeatureStore.
|
|
4452
|
+
>>> fs.mind_map()
|
|
4453
|
+
|
|
4454
|
+
# Example 2: Display the mind map of the FeatureStore for the sales feature process.
|
|
4455
|
+
>>> fs.mind_map(feature_process=fp1.process_id)
|
|
4456
|
+
|
|
4457
|
+
# Example 3: Display the mind map of the FeatureStore for admissions features.
|
|
4458
|
+
>>> fs.mind_map(feature_process=fp2.process_id)
|
|
4459
|
+
|
|
4460
|
+
# Example 4: Display the mind map of the FeatureStore for both sales and admissions feature
|
|
4461
|
+
# processes.
|
|
4462
|
+
>>> fs.mind_map(feature_process=[fp1.process_id, fp2.process_id])
|
|
4463
|
+
"""
|
|
4464
|
+
# Validate arguments
|
|
4465
|
+
argument_validation_params = []
|
|
4466
|
+
argument_validation_params.append(["feature_process", feature_process, True, (str, list), True])
|
|
4467
|
+
|
|
4468
|
+
# Validate argument types
|
|
4469
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
4470
|
+
|
|
4471
|
+
# 1. Declare Python variables for the mind map
|
|
4472
|
+
data_sources_ = set()
|
|
4473
|
+
feature_processes_ = set()
|
|
4474
|
+
features_ = set()
|
|
4475
|
+
datasets_ = set()
|
|
4476
|
+
data_source_map = {}
|
|
4477
|
+
feature_process_map = {}
|
|
4478
|
+
dataset_feature_map = {}
|
|
4479
|
+
|
|
4480
|
+
sql = """
|
|
4481
|
+
select distinct process_id, oreplace(data_source, '"', '') as data_source, feature_names from "{}".{}
|
|
4482
|
+
where data_domain = '{}'
|
|
4483
|
+
""".format(self.__repo, EFS_DB_COMPONENTS['feature_process'], self.__data_domain)
|
|
4484
|
+
|
|
4485
|
+
# If user provides feature process, filter the SQL query.
|
|
4486
|
+
if feature_process:
|
|
4487
|
+
feature_process = UtilFuncs._as_list(feature_process)
|
|
4488
|
+
feature_process_str = ', '.join(f"'{fp}'" for fp in feature_process)
|
|
4489
|
+
sql += " and process_id in ({})".format(feature_process_str)
|
|
4490
|
+
|
|
4491
|
+
recs = execute_sql(sql)
|
|
4492
|
+
for rec in recs:
|
|
4493
|
+
process_id, data_source, feature_names = rec
|
|
4494
|
+
data_sources_.add(data_source)
|
|
4495
|
+
feature_processes_.add(process_id)
|
|
4496
|
+
feature_names = [f.strip() for f in feature_names.split(',')]
|
|
4497
|
+
features_.update(feature_names)
|
|
4498
|
+
|
|
4499
|
+
# Populate the maps.
|
|
4500
|
+
if data_source not in data_source_map:
|
|
4501
|
+
data_source_map[data_source] = []
|
|
4502
|
+
data_source_map[data_source].append(process_id)
|
|
4503
|
+
|
|
4504
|
+
if process_id not in feature_process_map:
|
|
4505
|
+
feature_process_map[process_id] = []
|
|
4506
|
+
feature_process_map[process_id].extend(feature_names)
|
|
4507
|
+
|
|
4508
|
+
# feature process map can have duplicates.
|
|
4509
|
+
feature_process_map = {k: list(set(v)) for k, v in feature_process_map.items()}
|
|
4510
|
+
|
|
4511
|
+
data_sources = [{"id": ds, "label": ds} for ds in data_sources_]
|
|
4512
|
+
feature_processes = [{"id": fp, "label": fp} for fp in feature_processes_]
|
|
4513
|
+
features = [{"id": f, "label": f} for f in features_]
|
|
4514
|
+
|
|
4515
|
+
# Create datasets and dataset_feature_map.
|
|
4516
|
+
ds_sql = """
|
|
4517
|
+
select feature_view, feature_name from
|
|
4518
|
+
"{}".{}
|
|
4519
|
+
where data_domain = '{}'
|
|
4520
|
+
""".format(self.__repo, EFS_DB_COMPONENTS['dataset_features'], self.__data_domain)
|
|
4521
|
+
|
|
4522
|
+
# If user provides a specific feature process, then show only those features in datasets.
|
|
4523
|
+
if feature_process:
|
|
4524
|
+
fp_str = ', '.join(f"'{fp}'" for fp in feature_process)
|
|
4525
|
+
ds_sql += " and feature_version IN ({})".format(fp_str)
|
|
4526
|
+
|
|
4527
|
+
recs = execute_sql(ds_sql)
|
|
4528
|
+
for rec in recs:
|
|
4529
|
+
feature_view, feature_name = rec
|
|
4530
|
+
datasets_.add(feature_view)
|
|
4531
|
+
if feature_view not in dataset_feature_map:
|
|
4532
|
+
dataset_feature_map[feature_view] = []
|
|
4533
|
+
dataset_feature_map[feature_view].append(feature_name)
|
|
4534
|
+
|
|
4535
|
+
datasets = [{"id": ds, "label": ds} for ds in datasets_]
|
|
4536
|
+
|
|
4537
|
+
# 2. Add unique suffix to all ids in the variables
|
|
4538
|
+
from time import time as epoch_seconds
|
|
4539
|
+
suffix = f"_fs_{str(epoch_seconds()).replace('.', '_')}"
|
|
4540
|
+
|
|
4541
|
+
def add_suffix_to_list(lst):
|
|
4542
|
+
return [dict(obj, id=obj["id"] + suffix) for obj in lst]
|
|
4543
|
+
|
|
4544
|
+
def add_suffix_to_dict_keys_and_values(dct):
|
|
4545
|
+
return {k + suffix: [v + suffix for v in vs] for k, vs in dct.items()}
|
|
4546
|
+
|
|
4547
|
+
data_sources_js = add_suffix_to_list(data_sources)
|
|
4548
|
+
feature_processes_js = add_suffix_to_list([obj for obj in feature_processes if not obj.get("invisible")])
|
|
4549
|
+
# Keep invisible objects for completeness in features, but filter for display if needed
|
|
4550
|
+
features_js = add_suffix_to_list(features)
|
|
4551
|
+
datasets_js = add_suffix_to_list(datasets)
|
|
4552
|
+
data_source_map_js = add_suffix_to_dict_keys_and_values(data_source_map)
|
|
4553
|
+
feature_process_map_js = add_suffix_to_dict_keys_and_values(feature_process_map)
|
|
4554
|
+
dataset_feature_map_js = add_suffix_to_dict_keys_and_values(dataset_feature_map)
|
|
4555
|
+
|
|
4556
|
+
# 3. Prepare JS variable strings
|
|
4557
|
+
import json
|
|
4558
|
+
js_data_sources = json.dumps(data_sources_js)
|
|
4559
|
+
js_feature_processes = json.dumps(feature_processes_js)
|
|
4560
|
+
js_features = json.dumps(features_js)
|
|
4561
|
+
js_datasets = json.dumps(datasets_js)
|
|
4562
|
+
js_data_source_map = json.dumps(data_source_map_js)
|
|
4563
|
+
js_feature_process_map = json.dumps(feature_process_map_js)
|
|
4564
|
+
js_dataset_feature_map = json.dumps(dataset_feature_map_js)
|
|
4565
|
+
|
|
4566
|
+
# 4. Get current GMT timestamp for display
|
|
4567
|
+
from datetime import datetime, timezone
|
|
4568
|
+
gmt_now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S GMT')
|
|
4569
|
+
|
|
4570
|
+
# 5. Inject the JS variables, timestamp, and feature store name into the template
|
|
4571
|
+
html_ = _TD_FS_MindMap_Template\
|
|
4572
|
+
.replace("__DATA_SOURCES__", js_data_sources) \
|
|
4573
|
+
.replace("__FEATURE_PROCESSES__", js_feature_processes) \
|
|
4574
|
+
.replace("__FEATURES__", js_features) \
|
|
4575
|
+
.replace("__DATASETS__", js_datasets) \
|
|
4576
|
+
.replace("__DATA_SOURCE_MAP__", js_data_source_map) \
|
|
4577
|
+
.replace("__FEATURE_PROCESS_MAP__", js_feature_process_map) \
|
|
4578
|
+
.replace("__DATASET_FEATURE_MAP__", js_dataset_feature_map) \
|
|
4579
|
+
.replace("__MINDMAP_TIMESTAMP__", gmt_now) \
|
|
4580
|
+
.replace("__REPO__", self.__repo)\
|
|
4581
|
+
.replace("__DATA_DOMAIN__", self.__data_domain)
|
|
4582
|
+
|
|
4583
|
+
# 7. Add the unique suffix to all element IDs in the HTML/JS
|
|
4584
|
+
html_ = html_.replace("_fs_i", suffix)
|
|
4585
|
+
|
|
4586
|
+
from IPython.display import display, HTML
|
|
4587
|
+
display(HTML(html_))
|
|
4588
|
+
|